0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00

[1.3.0] Added spiffy new smoketest printDefinition.php, which lets you twiddle with the configuration settings and see how the internal rules are affected. (currently only complete for HTMLDefinition).

- HTMLPurifier -> HTML Purifier
. HTMLPurifier_Config->getBatch($namespace) added
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
. <?xml ... tags added to all smoketests

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@578 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-24 06:26:02 +00:00
parent 775763c583
commit 73a1e31fad
12 changed files with 483 additions and 14 deletions

4
NEWS
View File

@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! New directive %Attr.DisableURI, which eliminates all hyperlinking ! New directive %Attr.DisableURI, which eliminates all hyperlinking
! New directive %URI.Munge, munges URI so you can use some sort of redirector ! New directive %URI.Munge, munges URI so you can use some sort of redirector
service to avoid PageRank leaks or warn users that they are exiting your site. service to avoid PageRank leaks or warn users that they are exiting your site.
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
the configuration settings and see how the internal rules are affected.
- Added missing type to ChildDef_Chameleon - Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available - Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags . ChildDef_Required guards against empty tags
@ -33,6 +35,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Added peace-of-mind variable initialization to Strategy_FixNesting . Added peace-of-mind variable initialization to Strategy_FixNesting
. Added HTMLPurifier->info_parent_def, parent child processing made special . Added HTMLPurifier->info_parent_def, parent child processing made special
. Added internal documents briefly summarizing future progression of HTML . Added internal documents briefly summarizing future progression of HTML
. HTMLPurifier_Config->getBatch($namespace) added
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
1.2.1, unknown release date 1.2.1, unknown release date
(bugfix/minor feature release, may be dropped if 1.2.0 is stable) (bugfix/minor feature release, may be dropped if 1.2.0 is stable)

View File

@ -26,11 +26,11 @@ if (empty($_REQUEST['strict'])) {
?> ?>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head> <head>
<title>HTMLPurifier Live Demo</title> <title>HTML Purifier Live Demo</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head> </head>
<body> <body>
<h1>HTMLPurifier Live Demo</h1> <h1>HTML Purifier Live Demo</h1>
<?php <?php
require_once '../../library/HTMLPurifier.auto.php'; require_once '../../library/HTMLPurifier.auto.php';
@ -89,7 +89,7 @@ validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
} else { } else {
?> ?>
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier <p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
will filter it.</p> will filter it.</p>
<?php <?php
@ -128,7 +128,7 @@ if (isset($html)) {
</div> </div>
</fieldset> </fieldset>
</form> </form>
<p>Return to <a href="http://hp.jpsband.org/">HTMLPurifier's home page</a>. <p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p> flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
</body> </body>

View File

@ -68,6 +68,19 @@ class HTMLPurifier_Config
return $this->conf[$namespace][$key]; return $this->conf[$namespace][$key];
} }
/**
* Retreives an array of directives to values from a given namespace
* @param $namespace String namespace
*/
function getBatch($namespace) {
if (!isset($this->def->info[$namespace])) {
trigger_error('Cannot retrieve undefined namespace',
E_USER_WARNING);
return;
}
return $this->conf[$namespace];
}
/** /**
* Sets a value to configuration. * Sets a value to configuration.
* @param $namespace String namespace * @param $namespace String namespace
@ -134,6 +147,7 @@ class HTMLPurifier_Config
*/ */
function loadArray($config_array) { function loadArray($config_array) {
foreach ($config_array as $key => $value) { foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) { if (strpos($key, '.') !== false) {
// condensed form // condensed form
list($namespace, $directive) = explode('.', $key); list($namespace, $directive) = explode('.', $key);

View File

@ -247,11 +247,20 @@ class HTMLPurifier_ConfigSchema {
case 'bool': case 'bool':
if (is_int($var) && ($var === 0 || $var === 1)) { if (is_int($var) && ($var === 0 || $var === 1)) {
$var = (bool) $var; $var = (bool) $var;
} elseif (is_string($var)) {
if ($var == 'on' || $var == 'true' || $var == '1') {
$var = true;
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
$var = false;
} else {
break;
}
} elseif (!is_bool($var)) break; } elseif (!is_bool($var)) break;
return $var; return $var;
case 'list': case 'list':
case 'hash': case 'hash':
case 'lookup': case 'lookup':
if (is_string($var)) $var = explode(',',$var);
if (!is_array($var)) break; if (!is_array($var)) break;
$keys = array_keys($var); $keys = array_keys($var);
if ($keys === array_keys($keys)) { if ($keys === array_keys($keys)) {

View File

@ -0,0 +1,125 @@
<?php
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Token.php';
require_once 'HTMLPurifier/Encoder.php';
class HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_Generator for HTML generation convenience funcs
*/
var $generator;
/**
* Instance of HTMLPurifier_Config, for easy access
*/
var $config;
/**
* Initialize $generator.
*/
function HTMLPurifier_Printer() {
$this->generator = new HTMLPurifier_Generator();
}
/**
* Main function that renders object or aspect of that object
* @param $config Configuration object
*/
function render($config) {}
/**
* Returns a start tag
* @param $tag Tag name
* @param $attr Attribute array
*/
function start($tag, $attr = array()) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
);
}
/**
* Returns an end teg
* @param $tag Tag name
*/
function end($tag) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_End($tag)
);
}
/**
* Prints a complete element with content inside
* @param $tag Tag name
* @param $contents Element contents
* @param $attr Tag attributes
* @param $escape Bool whether or not to escape contents
*/
function element($tag, $contents, $attr = array(), $escape = true) {
return $this->start($tag, $attr) .
($escape ? $this->escape($contents) : $contents) .
$this->end($tag);
}
/**
* Prints a simple key/value row in a table.
* @param $name Key
* @param $value Value
*/
function row($name, $value) {
if (is_bool($value)) $value = $value ? 'On' : 'Off';
return
$this->start('tr') . "\n" .
$this->element('th', $name) . "\n" .
$this->element('td', $value) . "\n" .
$this->end('tr')
;
}
/**
* Escapes a string for HTML output.
* @param $string String to escape
*/
function escape($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string);
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
return $string;
}
/**
* Takes a list of strings and turns them into a single list
* @param $array List of strings
* @param $polite Bool whether or not to add an end before the last
*/
function listify($array, $polite = false) {
if (empty($array)) return 'None';
$ret = '';
$i = count($array);
foreach ($array as $value) {
$i--;
$ret .= $value;
if ($i > 0 && !($polite && $i == 1)) $ret .= ', ';
if ($polite && $i == 1) $ret .= 'and ';
}
return $ret;
}
/**
* Retrieves the class of an object without prefixes
* @param $obj Object to determine class of
* @param $prefix Further prefix to remove
*/
function getClass($obj, $prefix = '') {
static $five = null;
if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
$prefix = 'HTMLPurifier_' . $prefix;
if (!$five) $prefix = strtolower($prefix);
return str_replace($prefix, '', get_class($obj));
}
}
?>

View File

@ -0,0 +1,10 @@
<?php
class HTMLPurifier_Printer_CSSDefinition
{
function render() {return '<p>To be implemented.</p>';}
}
?>

View File

@ -0,0 +1,184 @@
<?php
require_once 'HTMLPurifier/Printer.php';
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_HTMLDefinition, for easy access
*/
var $def;
function render(&$config) {
$ret = '';
$this->config =& $config;
$this->def =& $config->getHTMLDefinition();
$def =& $this->def;
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
$ret .= $this->start('table') . "\n";
$ret .= $this->element('caption', 'Environment');
$ret .= $this->row('Parent of fragment', $def->info_parent) . "\n";
$ret .= $this->row('Strict mode', $def->strict) . "\n";
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper) . "\n";
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Global attributes');
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
$ret .= $this->end('tr');
$ret .= $this->renderChildren($def->info_parent_def->child);
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Tag transforms');
$list = array();
foreach ($def->info_tag_transform as $old => $new) {
$new = $this->getClass($new, 'TagTransform_');
$list[] = "<$old> with $new";
}
$ret .= $this->element('td', $this->listify($list));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Pre-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Post-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
$ret .= $this->end('tr');
$ret .= $this->end('table') . "\n";
$ret .= $this->renderInfo() . "\n";
$ret .= $this->end('div');
return $ret;
}
function renderInfo() {
$ret = '';
$ret .= $this->start('table') . "\n";
$ret .= $this->element('caption', 'Elements ($info)');
ksort($this->def->info);
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy'));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2));
$ret .= $this->end('tr');
foreach ($this->def->info as $name => $def) {
$ret .= $this->start('tr');
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Type');
$ret .= $this->element('td', ucfirst($def->type));
$ret .= $this->end('tr');
if (!empty($def->excludes)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Excludes');
$ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_pre)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Pre-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_post)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Post-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
$ret .= $this->end('tr');
}
if (!empty($def->auto_close)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Auto closed by');
$ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
$ret .= $this->end('tr');
}
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Allowed attributes');
$ret .= $this->element('td',$this->listifyAttr($def->attr),0,0);
$ret .= $this->end('tr');
$ret .= $this->renderChildren($def->child);
}
$ret .= $this->end('table');
return $ret;
}
function renderChildren($def) {
$context = new HTMLPurifier_Context();
$ret = '';
$ret .= $this->start('tr');
$elements = array();
$attr = array();
if (isset($def->elements)) {
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
$elements = $def->elements;
} elseif ($def->type == 'chameleon') {
$attr['rowspan'] = 2;
} elseif ($def->type == 'empty') {
$elements = array();
} elseif ($def->type == 'table') {
$elements = array('col', 'caption', 'colgroup', 'thead',
'tfoot', 'tbody', 'tr');
}
$ret .= $this->element('th', 'Allowed children', $attr);
if ($def->type == 'chameleon') {
$ret .= $this->element('td',
'<em>Block</em>: ' .
$this->escape($this->listifyTagLookup($def->block->elements)),0,0);
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('td',
'<em>Inline</em>: ' .
$this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
} else {
$ret .= $this->element('td',
'<em>'.ucfirst($def->type).'</em>: ' .
$this->escape($this->listifyTagLookup($elements)),0,0);
}
$ret .= $this->end('tr');
return $ret;
}
function listifyTagLookup($array) {
$list = array();
foreach ($array as $name => $discard) {
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
$list[] = $name;
}
return $this->listify($list);
}
function listifyObjectList($array) {
$list = array();
foreach ($array as $discard => $obj) {
$list[] = $this->getClass($obj, 'AttrTransform_');
}
return $this->listify($list);
}
function listifyAttr($array) {
$list = array();
foreach ($array as $name => $obj) {
if ($obj === false) continue;
$list[] = "$name&nbsp;=&nbsp;<i>" . $this->getClass($obj, 'AttrDef_') . '</i>';
}
return $this->listify($list);
}
}
?>

View File

@ -2,8 +2,7 @@
header('Content-type: text/html; charset=UTF-8'); header('Content-type: text/html; charset=UTF-8');
set_include_path('../library' . PATH_SEPARATOR . get_include_path()); require_once '../library/HTMLPurifier.auto.php';
require_once 'HTMLPurifier.php';
function escapeHTML($string) { function escapeHTML($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string); $string = HTMLPurifier_Encoder::cleanUTF8($string);

View File

@ -0,0 +1,122 @@
<?php
require_once 'common.php'; // load library
require_once 'HTMLPurifier/Printer/HTMLDefinition.php';
require_once 'HTMLPurifier/Printer/CSSDefinition.php';
$config = HTMLPurifier_Config::createDefault();
// you can do custom configuration!
if (file_exists('printDefinition.settings.php')) {
include 'printDefinition.settings.php';
}
$get = $_GET;
foreach ($_GET as $key => $value) {
if (!strncmp($key, 'Null_', 5) && !empty($value)) {
unset($get[substr($key, 5)]);
unset($get[$key]);
}
}
@$config->loadArray($get);
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>HTML Purifier Printer Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<style type="text/css">
form table {margin:1em auto;}
form th {text-align:right;padding-right:1em;}
.HTMLPurifier_Printer table {border-collapse:collapse;
border:1px solid #000; width:600px;
margin:1em auto;font-family:sans-serif;font-size:75%;}
.HTMLPurifier_Printer td, .HTMLPurifier_Printer th {padding:3px;
border:1px solid #000;background:#CCC; vertical-align: baseline;}
.HTMLPurifier_Printer th {text-align:left;background:#CCF;width:20%;}
.HTMLPurifier_Printer caption {font-size:1.5em; font-weight:bold;
width:100%;}
.HTMLPurifier_Printer .heavy {background:#99C;text-align:center;}
</style>
<script type="text/javascript">
function toggleWriteability(id_of_patient, checked) {
document.getElementById(id_of_patient).disabled = checked;
}
</script>
</head>
<body>
<h1>HTML Purifier Printer Smoketest</h1>
<p>Pretty-print an object and see how it turns out.</p>
<h2>Modify configuration</h2>
<form id="edit-config" method="get" action="printDefinition.php">
<table>
<?php
$directives = $config->getBatch('HTML');
// can't handle hashes
foreach ($directives as $key => $value) {
$directive = "HTML.$key";
if (is_array($value)) {
$keys = array_keys($value);
if ($keys === array_keys($keys)) {
$value = implode(',', $keys);
} else {
$new_value = '';
foreach ($value as $name => $bool) {
if ($bool !== true) continue;
$new_value .= "$name,";
}
$value = rtrim($new_value, ',');
}
}
$allow_null = $config->def->info['HTML'][$key]->allow_null;
?>
<tr>
<th>%<?php echo $directive; ?></th>
<td>
<?php if (is_bool($value)) { ?>
<input type="checkbox" name="<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
<?php } else { ?>
<?php if($allow_null) { ?>
Null <input
type="checkbox"
value="1"
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
name="Null_<?php echo $directive; ?>"
<?php if ($value === null) { ?> checked="checked"<?php } ?>
/> or
<?php } ?>
<input
type="text"
id="<?php echo $directive; ?>"
name="<?php echo $directive; ?>"
value="<?php echo escapeHTML($value); ?>"
<?php if($value === null) {echo 'disabled="disabled"';} ?>
/>
<?php } ?>
</td>
</tr>
<?php
}
?>
<tr>
<td colspan="2" style="text-align:right;">
[<a href="printDefinition.php">Reset</a>]
<input type="submit" value="Submit" />
</td>
</tr>
</table>
</form>
<h2>HTMLDefinition</h2>
<?php echo $printer_html_definition->render($config) ?>
<h2>CSSDefinition</h2>
<?php echo $printer_css_definition->render($config) ?>
</body>
</html>

View File

@ -2,16 +2,17 @@
require_once 'common.php'; require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html ?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html> <html>
<head> <head>
<title>HTMLPurifier UTF-8 Smoketest</title> <title>HTML Purifier UTF-8 Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head> </head>
<body> <body>
<h1>HTMLPurifier UTF-8 Smoketest</h1> <h1>HTML Purifier UTF-8 Smoketest</h1>
<?php <?php
$purifier = new HTMLPurifier(); $purifier = new HTMLPurifier();

View File

@ -2,16 +2,17 @@
require_once 'common.php'; require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html ?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html> <html>
<head> <head>
<title>HTMLPurifier Variable Width Attack Smoketest</title> <title>HTML Purifier Variable Width Attack Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head> </head>
<body> <body>
<h1>HTMLPurifier Variable Width Attack Smoketest</h1> <h1>HTML Purifier Variable Width Attack Smoketest</h1>
<p>For more information, see <p>For more information, see
<a href="http://applesoup.googlepages.com/bypass_filter.txt">Cheng Peng Su's <a href="http://applesoup.googlepages.com/bypass_filter.txt">Cheng Peng Su's
original advisory.</a> This particular exploit code appears only to work original advisory.</a> This particular exploit code appears only to work

View File

@ -20,7 +20,7 @@ function formatCode($string) {
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html> <html>
<head> <head>
<title>HTMLPurifier XSS Attacks Smoketest</title> <title>HTML Purifier XSS Attacks Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<style type="text/css"> <style type="text/css">
.scroll {overflow:auto; width:100%;} .scroll {overflow:auto; width:100%;}
@ -31,7 +31,7 @@ function formatCode($string) {
</style> </style>
</head> </head>
<body> <body>
<h1>HTMLPurifier XSS Attacks Smoketest</h1> <h1>HTML Purifier XSS Attacks Smoketest</h1>
<p>XSS attacks are from <p>XSS attacks are from
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p> <a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
<p><strong>Caveats:</strong> <p><strong>Caveats:</strong>