0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[1.3.0] Added spiffy new smoketest printDefinition.php, which lets you twiddle with the configuration settings and see how the internal rules are affected. (currently only complete for HTMLDefinition).

- HTMLPurifier -> HTML Purifier
. HTMLPurifier_Config->getBatch($namespace) added
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
. <?xml ... tags added to all smoketests

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@578 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-24 06:26:02 +00:00
parent 775763c583
commit 73a1e31fad
12 changed files with 483 additions and 14 deletions

4
NEWS
View File

@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! New directive %Attr.DisableURI, which eliminates all hyperlinking
! New directive %URI.Munge, munges URI so you can use some sort of redirector
service to avoid PageRank leaks or warn users that they are exiting your site.
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
the configuration settings and see how the internal rules are affected.
- Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags
@ -33,6 +35,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Added peace-of-mind variable initialization to Strategy_FixNesting
. Added HTMLPurifier->info_parent_def, parent child processing made special
. Added internal documents briefly summarizing future progression of HTML
. HTMLPurifier_Config->getBatch($namespace) added
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
1.2.1, unknown release date
(bugfix/minor feature release, may be dropped if 1.2.0 is stable)

View File

@ -26,11 +26,11 @@ if (empty($_REQUEST['strict'])) {
?>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>HTMLPurifier Live Demo</title>
<title>HTML Purifier Live Demo</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTMLPurifier Live Demo</h1>
<h1>HTML Purifier Live Demo</h1>
<?php
require_once '../../library/HTMLPurifier.auto.php';
@ -89,7 +89,7 @@ validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
} else {
?>
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
will filter it.</p>
<?php
@ -128,7 +128,7 @@ if (isset($html)) {
</div>
</fieldset>
</form>
<p>Return to <a href="http://hp.jpsband.org/">HTMLPurifier's home page</a>.
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
</body>

View File

@ -68,6 +68,19 @@ class HTMLPurifier_Config
return $this->conf[$namespace][$key];
}
/**
* Retreives an array of directives to values from a given namespace
* @param $namespace String namespace
*/
function getBatch($namespace) {
if (!isset($this->def->info[$namespace])) {
trigger_error('Cannot retrieve undefined namespace',
E_USER_WARNING);
return;
}
return $this->conf[$namespace];
}
/**
* Sets a value to configuration.
* @param $namespace String namespace
@ -134,6 +147,7 @@ class HTMLPurifier_Config
*/
function loadArray($config_array) {
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
// condensed form
list($namespace, $directive) = explode('.', $key);

View File

@ -247,11 +247,20 @@ class HTMLPurifier_ConfigSchema {
case 'bool':
if (is_int($var) && ($var === 0 || $var === 1)) {
$var = (bool) $var;
} elseif (is_string($var)) {
if ($var == 'on' || $var == 'true' || $var == '1') {
$var = true;
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
$var = false;
} else {
break;
}
} elseif (!is_bool($var)) break;
return $var;
case 'list':
case 'hash':
case 'lookup':
if (is_string($var)) $var = explode(',',$var);
if (!is_array($var)) break;
$keys = array_keys($var);
if ($keys === array_keys($keys)) {

View File

@ -0,0 +1,125 @@
<?php
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Token.php';
require_once 'HTMLPurifier/Encoder.php';
class HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_Generator for HTML generation convenience funcs
*/
var $generator;
/**
* Instance of HTMLPurifier_Config, for easy access
*/
var $config;
/**
* Initialize $generator.
*/
function HTMLPurifier_Printer() {
$this->generator = new HTMLPurifier_Generator();
}
/**
* Main function that renders object or aspect of that object
* @param $config Configuration object
*/
function render($config) {}
/**
* Returns a start tag
* @param $tag Tag name
* @param $attr Attribute array
*/
function start($tag, $attr = array()) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
);
}
/**
* Returns an end teg
* @param $tag Tag name
*/
function end($tag) {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_End($tag)
);
}
/**
* Prints a complete element with content inside
* @param $tag Tag name
* @param $contents Element contents
* @param $attr Tag attributes
* @param $escape Bool whether or not to escape contents
*/
function element($tag, $contents, $attr = array(), $escape = true) {
return $this->start($tag, $attr) .
($escape ? $this->escape($contents) : $contents) .
$this->end($tag);
}
/**
* Prints a simple key/value row in a table.
* @param $name Key
* @param $value Value
*/
function row($name, $value) {
if (is_bool($value)) $value = $value ? 'On' : 'Off';
return
$this->start('tr') . "\n" .
$this->element('th', $name) . "\n" .
$this->element('td', $value) . "\n" .
$this->end('tr')
;
}
/**
* Escapes a string for HTML output.
* @param $string String to escape
*/
function escape($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string);
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
return $string;
}
/**
* Takes a list of strings and turns them into a single list
* @param $array List of strings
* @param $polite Bool whether or not to add an end before the last
*/
function listify($array, $polite = false) {
if (empty($array)) return 'None';
$ret = '';
$i = count($array);
foreach ($array as $value) {
$i--;
$ret .= $value;
if ($i > 0 && !($polite && $i == 1)) $ret .= ', ';
if ($polite && $i == 1) $ret .= 'and ';
}
return $ret;
}
/**
* Retrieves the class of an object without prefixes
* @param $obj Object to determine class of
* @param $prefix Further prefix to remove
*/
function getClass($obj, $prefix = '') {
static $five = null;
if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
$prefix = 'HTMLPurifier_' . $prefix;
if (!$five) $prefix = strtolower($prefix);
return str_replace($prefix, '', get_class($obj));
}
}
?>

View File

@ -0,0 +1,10 @@
<?php
class HTMLPurifier_Printer_CSSDefinition
{
function render() {return '<p>To be implemented.</p>';}
}
?>

View File

@ -0,0 +1,184 @@
<?php
require_once 'HTMLPurifier/Printer.php';
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
{
/**
* Instance of HTMLPurifier_HTMLDefinition, for easy access
*/
var $def;
function render(&$config) {
$ret = '';
$this->config =& $config;
$this->def =& $config->getHTMLDefinition();
$def =& $this->def;
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
$ret .= $this->start('table') . "\n";
$ret .= $this->element('caption', 'Environment');
$ret .= $this->row('Parent of fragment', $def->info_parent) . "\n";
$ret .= $this->row('Strict mode', $def->strict) . "\n";
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper) . "\n";
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Global attributes');
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
$ret .= $this->end('tr');
$ret .= $this->renderChildren($def->info_parent_def->child);
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Tag transforms');
$list = array();
foreach ($def->info_tag_transform as $old => $new) {
$new = $this->getClass($new, 'TagTransform_');
$list[] = "<$old> with $new";
}
$ret .= $this->element('td', $this->listify($list));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Pre-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Post-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
$ret .= $this->end('tr');
$ret .= $this->end('table') . "\n";
$ret .= $this->renderInfo() . "\n";
$ret .= $this->end('div');
return $ret;
}
function renderInfo() {
$ret = '';
$ret .= $this->start('table') . "\n";
$ret .= $this->element('caption', 'Elements ($info)');
ksort($this->def->info);
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy'));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2));
$ret .= $this->end('tr');
foreach ($this->def->info as $name => $def) {
$ret .= $this->start('tr');
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Type');
$ret .= $this->element('td', ucfirst($def->type));
$ret .= $this->end('tr');
if (!empty($def->excludes)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Excludes');
$ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_pre)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Pre-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_post)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Post-AttrTransform');
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
$ret .= $this->end('tr');
}
if (!empty($def->auto_close)) {
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Auto closed by');
$ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
$ret .= $this->end('tr');
}
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Allowed attributes');
$ret .= $this->element('td',$this->listifyAttr($def->attr),0,0);
$ret .= $this->end('tr');
$ret .= $this->renderChildren($def->child);
}
$ret .= $this->end('table');
return $ret;
}
function renderChildren($def) {
$context = new HTMLPurifier_Context();
$ret = '';
$ret .= $this->start('tr');
$elements = array();
$attr = array();
if (isset($def->elements)) {
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
$elements = $def->elements;
} elseif ($def->type == 'chameleon') {
$attr['rowspan'] = 2;
} elseif ($def->type == 'empty') {
$elements = array();
} elseif ($def->type == 'table') {
$elements = array('col', 'caption', 'colgroup', 'thead',
'tfoot', 'tbody', 'tr');
}
$ret .= $this->element('th', 'Allowed children', $attr);
if ($def->type == 'chameleon') {
$ret .= $this->element('td',
'<em>Block</em>: ' .
$this->escape($this->listifyTagLookup($def->block->elements)),0,0);
$ret .= $this->end('tr');
$ret .= $this->start('tr');
$ret .= $this->element('td',
'<em>Inline</em>: ' .
$this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
} else {
$ret .= $this->element('td',
'<em>'.ucfirst($def->type).'</em>: ' .
$this->escape($this->listifyTagLookup($elements)),0,0);
}
$ret .= $this->end('tr');
return $ret;
}
function listifyTagLookup($array) {
$list = array();
foreach ($array as $name => $discard) {
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
$list[] = $name;
}
return $this->listify($list);
}
function listifyObjectList($array) {
$list = array();
foreach ($array as $discard => $obj) {
$list[] = $this->getClass($obj, 'AttrTransform_');
}
return $this->listify($list);
}
function listifyAttr($array) {
$list = array();
foreach ($array as $name => $obj) {
if ($obj === false) continue;
$list[] = "$name&nbsp;=&nbsp;<i>" . $this->getClass($obj, 'AttrDef_') . '</i>';
}
return $this->listify($list);
}
}
?>

View File

@ -2,8 +2,7 @@
header('Content-type: text/html; charset=UTF-8');
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
require_once 'HTMLPurifier.php';
require_once '../library/HTMLPurifier.auto.php';
function escapeHTML($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string);

View File

@ -0,0 +1,122 @@
<?php
require_once 'common.php'; // load library
require_once 'HTMLPurifier/Printer/HTMLDefinition.php';
require_once 'HTMLPurifier/Printer/CSSDefinition.php';
$config = HTMLPurifier_Config::createDefault();
// you can do custom configuration!
if (file_exists('printDefinition.settings.php')) {
include 'printDefinition.settings.php';
}
$get = $_GET;
foreach ($_GET as $key => $value) {
if (!strncmp($key, 'Null_', 5) && !empty($value)) {
unset($get[substr($key, 5)]);
unset($get[$key]);
}
}
@$config->loadArray($get);
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>HTML Purifier Printer Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<style type="text/css">
form table {margin:1em auto;}
form th {text-align:right;padding-right:1em;}
.HTMLPurifier_Printer table {border-collapse:collapse;
border:1px solid #000; width:600px;
margin:1em auto;font-family:sans-serif;font-size:75%;}
.HTMLPurifier_Printer td, .HTMLPurifier_Printer th {padding:3px;
border:1px solid #000;background:#CCC; vertical-align: baseline;}
.HTMLPurifier_Printer th {text-align:left;background:#CCF;width:20%;}
.HTMLPurifier_Printer caption {font-size:1.5em; font-weight:bold;
width:100%;}
.HTMLPurifier_Printer .heavy {background:#99C;text-align:center;}
</style>
<script type="text/javascript">
function toggleWriteability(id_of_patient, checked) {
document.getElementById(id_of_patient).disabled = checked;
}
</script>
</head>
<body>
<h1>HTML Purifier Printer Smoketest</h1>
<p>Pretty-print an object and see how it turns out.</p>
<h2>Modify configuration</h2>
<form id="edit-config" method="get" action="printDefinition.php">
<table>
<?php
$directives = $config->getBatch('HTML');
// can't handle hashes
foreach ($directives as $key => $value) {
$directive = "HTML.$key";
if (is_array($value)) {
$keys = array_keys($value);
if ($keys === array_keys($keys)) {
$value = implode(',', $keys);
} else {
$new_value = '';
foreach ($value as $name => $bool) {
if ($bool !== true) continue;
$new_value .= "$name,";
}
$value = rtrim($new_value, ',');
}
}
$allow_null = $config->def->info['HTML'][$key]->allow_null;
?>
<tr>
<th>%<?php echo $directive; ?></th>
<td>
<?php if (is_bool($value)) { ?>
<input type="checkbox" name="<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
<?php } else { ?>
<?php if($allow_null) { ?>
Null <input
type="checkbox"
value="1"
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
name="Null_<?php echo $directive; ?>"
<?php if ($value === null) { ?> checked="checked"<?php } ?>
/> or
<?php } ?>
<input
type="text"
id="<?php echo $directive; ?>"
name="<?php echo $directive; ?>"
value="<?php echo escapeHTML($value); ?>"
<?php if($value === null) {echo 'disabled="disabled"';} ?>
/>
<?php } ?>
</td>
</tr>
<?php
}
?>
<tr>
<td colspan="2" style="text-align:right;">
[<a href="printDefinition.php">Reset</a>]
<input type="submit" value="Submit" />
</td>
</tr>
</table>
</form>
<h2>HTMLDefinition</h2>
<?php echo $printer_html_definition->render($config) ?>
<h2>CSSDefinition</h2>
<?php echo $printer_css_definition->render($config) ?>
</body>
</html>

View File

@ -2,16 +2,17 @@
require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title>HTMLPurifier UTF-8 Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>HTML Purifier UTF-8 Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTMLPurifier UTF-8 Smoketest</h1>
<h1>HTML Purifier UTF-8 Smoketest</h1>
<?php
$purifier = new HTMLPurifier();

View File

@ -2,16 +2,17 @@
require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title>HTMLPurifier Variable Width Attack Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>HTML Purifier Variable Width Attack Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTMLPurifier Variable Width Attack Smoketest</h1>
<h1>HTML Purifier Variable Width Attack Smoketest</h1>
<p>For more information, see
<a href="http://applesoup.googlepages.com/bypass_filter.txt">Cheng Peng Su's
original advisory.</a> This particular exploit code appears only to work

View File

@ -20,7 +20,7 @@ function formatCode($string) {
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title>HTMLPurifier XSS Attacks Smoketest</title>
<title>HTML Purifier XSS Attacks Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<style type="text/css">
.scroll {overflow:auto; width:100%;}
@ -31,7 +31,7 @@ function formatCode($string) {
</style>
</head>
<body>
<h1>HTMLPurifier XSS Attacks Smoketest</h1>
<h1>HTML Purifier XSS Attacks Smoketest</h1>
<p>XSS attacks are from
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
<p><strong>Caveats:</strong>