mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-11 17:18:44 +00:00
Split out code in Definition.php .
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@73 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
f00a80c561
commit
025b648c99
603
AttrDef.php
603
AttrDef.php
@ -1,608 +1,5 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_Definition
|
||||
{
|
||||
|
||||
var $generator;
|
||||
var $info = array();
|
||||
var $info_closes_p = array(
|
||||
// these are all block elements: blocks aren't allowed in P
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'dd' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'dt' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true
|
||||
);
|
||||
|
||||
function HTMLPurifier_Definition() {
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
}
|
||||
|
||||
function loadData() {
|
||||
// emulates the structure of the DTD
|
||||
|
||||
// entities: prefixed with e_ and _ replaces .
|
||||
// we don't use an array because that complicates interpolation
|
||||
// strings are used instead of arrays because if you use arrays,
|
||||
// you have to do some hideous manipulation with array_merge()
|
||||
|
||||
// these are condensed, remember, with bad stuff taken out
|
||||
|
||||
// transforms: font, menu, dir, center
|
||||
|
||||
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
||||
// and also know the assumptions the code makes about what this
|
||||
// contains for optimization purposes (see fixNesting)
|
||||
|
||||
$e_special_extra = 'img';
|
||||
$e_special_basic = 'br | span | bdo';
|
||||
$e_special = "$e_special_basic | $e_special_extra";
|
||||
$e_fontstyle_extra = 'big | small';
|
||||
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
||||
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
||||
$e_phrase_extra = 'sub | sup';
|
||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||
' | cite | abbr | acronym';
|
||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||
$e_inline_forms = ''; // humor the dtd
|
||||
$e_misc_inline = 'ins | del';
|
||||
$e_misc = "$e_misc_inline";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
||||
" | $e_inline_forms";
|
||||
// note the casing
|
||||
$e_Inline = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_inline".
|
||||
" | $e_misc_inline");
|
||||
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
||||
$e_lists = 'ul | ol | dl';
|
||||
$e_blocktext = 'pre | hr | blockquote | address';
|
||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block".
|
||||
" | $e_inline | $e_misc");
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
||||
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||
" | $e_inline_forms | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
||||
|
||||
$this->info['ins'] =
|
||||
$this->info['del'] =
|
||||
$this->info['blockquote'] =
|
||||
$this->info['dd'] =
|
||||
$this->info['li'] =
|
||||
$this->info['div'] = new HTMLDTD_Element($e_Flow);
|
||||
|
||||
$this->info['em'] =
|
||||
$this->info['strong'] =
|
||||
$this->info['dfn'] =
|
||||
$this->info['code'] =
|
||||
$this->info['samp'] =
|
||||
$this->info['kbd'] =
|
||||
$this->info['var'] =
|
||||
$this->info['code'] =
|
||||
$this->info['samp'] =
|
||||
$this->info['kbd'] =
|
||||
$this->info['var'] =
|
||||
$this->info['cite'] =
|
||||
$this->info['abbr'] =
|
||||
$this->info['acronym'] =
|
||||
$this->info['q'] =
|
||||
$this->info['sub'] =
|
||||
$this->info['tt'] =
|
||||
$this->info['sup'] =
|
||||
$this->info['i'] =
|
||||
$this->info['b'] =
|
||||
$this->info['big'] =
|
||||
$this->info['small'] =
|
||||
$this->info['u'] =
|
||||
$this->info['s'] =
|
||||
$this->info['strike'] =
|
||||
$this->info['bdo'] =
|
||||
$this->info['span'] =
|
||||
$this->info['dt'] =
|
||||
$this->info['p'] =
|
||||
$this->info['h1'] =
|
||||
$this->info['h2'] =
|
||||
$this->info['h3'] =
|
||||
$this->info['h4'] =
|
||||
$this->info['h5'] =
|
||||
$this->info['h6'] = new HTMLDTD_Element($e_Inline);
|
||||
|
||||
$this->info['ol'] =
|
||||
$this->info['ul'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Required('li')
|
||||
);
|
||||
|
||||
$this->info['dl'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Required('dt|dd')
|
||||
);
|
||||
$this->info['address'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline")
|
||||
);
|
||||
|
||||
$this->info['img'] =
|
||||
$this->info['br'] =
|
||||
$this->info['hr'] = new HTMLDTD_Element(new HTMLPurifier_ChildDef_Empty());
|
||||
|
||||
$this->info['pre'] = new HTMLDTD_Element($e_pre_content);
|
||||
|
||||
$this->info['a'] = new HTMLDTD_Element($e_a_content);
|
||||
|
||||
}
|
||||
|
||||
function purifyTokens($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$tokens = $this->removeForeignElements($tokens);
|
||||
$tokens = $this->makeWellFormed($tokens);
|
||||
$tokens = $this->fixNesting($tokens);
|
||||
$tokens = $this->validateAttributes($tokens);
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
function removeForeignElements($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$result = array();
|
||||
foreach($tokens as $token) {
|
||||
if (!empty( $token->is_tag )) {
|
||||
if (!isset($this->info[$token->name])) {
|
||||
// invalid tag, generate HTML and insert in
|
||||
$token = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
}
|
||||
} elseif ($token->type == 'comment') {
|
||||
// strip comments
|
||||
continue;
|
||||
} elseif ($token->type == 'text') {
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
function makeWellFormed($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$result = array();
|
||||
$current_nesting = array();
|
||||
foreach ($tokens as $token) {
|
||||
if (empty( $token->is_tag )) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$info = $this->info[$token->name]; // assumption but valid
|
||||
|
||||
// test if it claims to be a start tag but is empty
|
||||
if ($info->child_def->type == 'empty' &&
|
||||
$token->type == 'start' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||
$token->attributes);
|
||||
continue;
|
||||
}
|
||||
|
||||
// test if it claims to be empty but really is a start tag
|
||||
if ($info->child_def->type != 'empty' &&
|
||||
$token->type == 'empty' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||
$token->attributes);
|
||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// automatically insert empty tags
|
||||
if ($token->type == 'empty') {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// we give start tags precedence, so automatically accept unless...
|
||||
// it's one of those special cases
|
||||
if ($token->type == 'start') {
|
||||
|
||||
// if there's a parent, check for special case
|
||||
if (!empty($current_nesting)) {
|
||||
$current_parent = array_pop($current_nesting);
|
||||
|
||||
// check if we're closing a P tag
|
||||
if ($current_parent->name == 'p' &&
|
||||
isset($this->info_closes_p[$token->name])
|
||||
) {
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if we're closing a LI tag
|
||||
if ($current_parent->name == 'li' &&
|
||||
$token->name == 'li'
|
||||
) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// this is more TIDY stuff
|
||||
// we should also get some TABLE related code
|
||||
// mismatched h#
|
||||
|
||||
$current_nesting[] = $current_parent; // undo the pop
|
||||
}
|
||||
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if ($token->type != 'end') continue;
|
||||
|
||||
// okay, we're dealing with a closing tag
|
||||
|
||||
// make sure that we have something open
|
||||
if (empty($current_nesting)) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// first, check for the simplest case: everything closes neatly
|
||||
|
||||
// current_nesting is modified
|
||||
$current_parent = array_pop($current_nesting);
|
||||
if ($current_parent->name == $token->name) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// undo the array_pop
|
||||
$current_nesting[] = $current_parent;
|
||||
|
||||
// okay, so we're trying to close the wrong tag
|
||||
|
||||
// scroll back the entire nest, trying to find our tag
|
||||
// feature could be to specify how far you'd like to go
|
||||
$size = count($current_nesting);
|
||||
// -2 because -1 is the last element, but we already checked that
|
||||
$skipped_tags = false;
|
||||
for ($i = $size - 2; $i >= 0; $i--) {
|
||||
if ($current_nesting[$i]->name == $token->name) {
|
||||
// current nesting is modified
|
||||
$skipped_tags = array_splice($current_nesting, $i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// we still didn't find the tag, so translate to text
|
||||
if ($skipped_tags === false) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// okay, we found it, close all the skipped tags
|
||||
// note that skipped tags contains the element we need closed
|
||||
$size = count($skipped_tags);
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
}
|
||||
|
||||
// done!
|
||||
|
||||
}
|
||||
|
||||
// we're at the end now, fix all still unclosed tags
|
||||
|
||||
if (!empty($current_nesting)) {
|
||||
$size = count($current_nesting);
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_End($current_nesting[$i]->name);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
function fixNesting($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
|
||||
// insert implicit "parent" node, will be removed at end
|
||||
array_unshift($tokens, new HTMLPurifier_Token_Start('div'));
|
||||
$tokens[] = new HTMLPurifier_Token_End('div');
|
||||
|
||||
for ($i = 0, $size = count($tokens) ; $i < $size; ) {
|
||||
|
||||
$child_tokens = array();
|
||||
|
||||
// scroll to the end of this node, and report number
|
||||
for ($j = $i, $depth = 0; ; $j++) {
|
||||
if ($tokens[$j]->type == 'start') {
|
||||
$depth++;
|
||||
// skip token assignment on first iteration
|
||||
if ($depth == 1) continue;
|
||||
} elseif ($tokens[$j]->type == 'end') {
|
||||
$depth--;
|
||||
// skip token assignment on last iteration
|
||||
if ($depth == 0) break;
|
||||
}
|
||||
$child_tokens[] = $tokens[$j];
|
||||
}
|
||||
|
||||
// $i is index of start token
|
||||
// $j is index of end token
|
||||
|
||||
// have DTD child def validate children
|
||||
$element_def = $this->info[$tokens[$i]->name];
|
||||
$result = $element_def->child_def->validateChildren($child_tokens);
|
||||
|
||||
// process result
|
||||
if ($result === true) {
|
||||
|
||||
// leave the nodes as is
|
||||
|
||||
} elseif($result === false) {
|
||||
|
||||
// WARNING WARNING WARNING!!!
|
||||
// While for the original DTD, there will never be
|
||||
// cascading removal, more complex ones may have such
|
||||
// a problem.
|
||||
|
||||
// If you modify the info array such that an element
|
||||
// that requires children may contain a child that requires
|
||||
// children, you need to also scroll back and re-check that
|
||||
// elements parent node
|
||||
|
||||
$length = $j - $i + 1;
|
||||
|
||||
// remove entire node
|
||||
array_splice($tokens, $i, $length);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
|
||||
// ensure that we scroll to the next node
|
||||
$i--;
|
||||
|
||||
} else {
|
||||
|
||||
$length = $j - $i - 1;
|
||||
|
||||
// replace node with $result
|
||||
array_splice($tokens, $i + 1, $length, $result);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
$size += count($result);
|
||||
|
||||
}
|
||||
|
||||
// scroll to next node
|
||||
$i++;
|
||||
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
||||
|
||||
}
|
||||
|
||||
// remove implicit divs
|
||||
array_shift($tokens);
|
||||
array_pop($tokens);
|
||||
|
||||
return $tokens;
|
||||
|
||||
}
|
||||
|
||||
function validateAttributes($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class HTMLDTD_Element
|
||||
{
|
||||
|
||||
var $child_def;
|
||||
var $attr_def = array();
|
||||
|
||||
function HTMLDTD_Element($child_def, $attr_def = array()) {
|
||||
$this->child_def = $child_def;
|
||||
$this->attr_def = $attr_def;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
||||
// true = leave nodes as is
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
|
||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
||||
// to fix the tree)
|
||||
|
||||
// we may end up writing custom code for each HTML case
|
||||
// in order to make it self correcting
|
||||
class HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $dtd_regex;
|
||||
var $_pcre_regex;
|
||||
function HTMLPurifier_ChildDef($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
class HTMLPurifier_ChildDef_Simple extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $elements = array();
|
||||
function HTMLPurifier_ChildDef_Simple($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
function validateChildren() {
|
||||
trigger_error('Cannot call abstract function!', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Simple
|
||||
{
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// only altered behavior is that it returns an empty array
|
||||
// instead of a false (to delete the node)
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children) {
|
||||
$result = parent::validateChildren($tokens_of_children);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// placeholder
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
var $def;
|
||||
|
445
ChildDef.php
445
ChildDef.php
@ -1,442 +1,5 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_Definition
|
||||
{
|
||||
|
||||
var $generator;
|
||||
var $info = array();
|
||||
var $info_closes_p = array(
|
||||
// these are all block elements: blocks aren't allowed in P
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'dd' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'dt' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true
|
||||
);
|
||||
|
||||
function HTMLPurifier_Definition() {
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
}
|
||||
|
||||
function loadData() {
|
||||
// emulates the structure of the DTD
|
||||
|
||||
// entities: prefixed with e_ and _ replaces .
|
||||
// we don't use an array because that complicates interpolation
|
||||
// strings are used instead of arrays because if you use arrays,
|
||||
// you have to do some hideous manipulation with array_merge()
|
||||
|
||||
// these are condensed, remember, with bad stuff taken out
|
||||
|
||||
// transforms: font, menu, dir, center
|
||||
|
||||
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
||||
// and also know the assumptions the code makes about what this
|
||||
// contains for optimization purposes (see fixNesting)
|
||||
|
||||
$e_special_extra = 'img';
|
||||
$e_special_basic = 'br | span | bdo';
|
||||
$e_special = "$e_special_basic | $e_special_extra";
|
||||
$e_fontstyle_extra = 'big | small';
|
||||
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
||||
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
||||
$e_phrase_extra = 'sub | sup';
|
||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||
' | cite | abbr | acronym';
|
||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||
$e_inline_forms = ''; // humor the dtd
|
||||
$e_misc_inline = 'ins | del';
|
||||
$e_misc = "$e_misc_inline";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
||||
" | $e_inline_forms";
|
||||
// note the casing
|
||||
$e_Inline = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_inline".
|
||||
" | $e_misc_inline");
|
||||
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
||||
$e_lists = 'ul | ol | dl';
|
||||
$e_blocktext = 'pre | hr | blockquote | address';
|
||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block".
|
||||
" | $e_inline | $e_misc");
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
||||
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||
" | $e_inline_forms | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
||||
|
||||
$this->info['ins'] =
|
||||
$this->info['del'] =
|
||||
$this->info['blockquote'] =
|
||||
$this->info['dd'] =
|
||||
$this->info['li'] =
|
||||
$this->info['div'] = new HTMLDTD_Element($e_Flow);
|
||||
|
||||
$this->info['em'] =
|
||||
$this->info['strong'] =
|
||||
$this->info['dfn'] =
|
||||
$this->info['code'] =
|
||||
$this->info['samp'] =
|
||||
$this->info['kbd'] =
|
||||
$this->info['var'] =
|
||||
$this->info['code'] =
|
||||
$this->info['samp'] =
|
||||
$this->info['kbd'] =
|
||||
$this->info['var'] =
|
||||
$this->info['cite'] =
|
||||
$this->info['abbr'] =
|
||||
$this->info['acronym'] =
|
||||
$this->info['q'] =
|
||||
$this->info['sub'] =
|
||||
$this->info['tt'] =
|
||||
$this->info['sup'] =
|
||||
$this->info['i'] =
|
||||
$this->info['b'] =
|
||||
$this->info['big'] =
|
||||
$this->info['small'] =
|
||||
$this->info['u'] =
|
||||
$this->info['s'] =
|
||||
$this->info['strike'] =
|
||||
$this->info['bdo'] =
|
||||
$this->info['span'] =
|
||||
$this->info['dt'] =
|
||||
$this->info['p'] =
|
||||
$this->info['h1'] =
|
||||
$this->info['h2'] =
|
||||
$this->info['h3'] =
|
||||
$this->info['h4'] =
|
||||
$this->info['h5'] =
|
||||
$this->info['h6'] = new HTMLDTD_Element($e_Inline);
|
||||
|
||||
$this->info['ol'] =
|
||||
$this->info['ul'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Required('li')
|
||||
);
|
||||
|
||||
$this->info['dl'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Required('dt|dd')
|
||||
);
|
||||
$this->info['address'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline")
|
||||
);
|
||||
|
||||
$this->info['img'] =
|
||||
$this->info['br'] =
|
||||
$this->info['hr'] = new HTMLDTD_Element(new HTMLPurifier_ChildDef_Empty());
|
||||
|
||||
$this->info['pre'] = new HTMLDTD_Element($e_pre_content);
|
||||
|
||||
$this->info['a'] = new HTMLDTD_Element($e_a_content);
|
||||
|
||||
}
|
||||
|
||||
function purifyTokens($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$tokens = $this->removeForeignElements($tokens);
|
||||
$tokens = $this->makeWellFormed($tokens);
|
||||
$tokens = $this->fixNesting($tokens);
|
||||
$tokens = $this->validateAttributes($tokens);
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
function removeForeignElements($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$result = array();
|
||||
foreach($tokens as $token) {
|
||||
if (!empty( $token->is_tag )) {
|
||||
if (!isset($this->info[$token->name])) {
|
||||
// invalid tag, generate HTML and insert in
|
||||
$token = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
}
|
||||
} elseif ($token->type == 'comment') {
|
||||
// strip comments
|
||||
continue;
|
||||
} elseif ($token->type == 'text') {
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
function makeWellFormed($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
$result = array();
|
||||
$current_nesting = array();
|
||||
foreach ($tokens as $token) {
|
||||
if (empty( $token->is_tag )) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$info = $this->info[$token->name]; // assumption but valid
|
||||
|
||||
// test if it claims to be a start tag but is empty
|
||||
if ($info->child_def->type == 'empty' &&
|
||||
$token->type == 'start' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||
$token->attributes);
|
||||
continue;
|
||||
}
|
||||
|
||||
// test if it claims to be empty but really is a start tag
|
||||
if ($info->child_def->type != 'empty' &&
|
||||
$token->type == 'empty' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||
$token->attributes);
|
||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// automatically insert empty tags
|
||||
if ($token->type == 'empty') {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// we give start tags precedence, so automatically accept unless...
|
||||
// it's one of those special cases
|
||||
if ($token->type == 'start') {
|
||||
|
||||
// if there's a parent, check for special case
|
||||
if (!empty($current_nesting)) {
|
||||
$current_parent = array_pop($current_nesting);
|
||||
|
||||
// check if we're closing a P tag
|
||||
if ($current_parent->name == 'p' &&
|
||||
isset($this->info_closes_p[$token->name])
|
||||
) {
|
||||
$result[] = new HTMLPurifier_Token_End('p');
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if we're closing a LI tag
|
||||
if ($current_parent->name == 'li' &&
|
||||
$token->name == 'li'
|
||||
) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// this is more TIDY stuff
|
||||
// we should also get some TABLE related code
|
||||
// mismatched h#
|
||||
|
||||
$current_nesting[] = $current_parent; // undo the pop
|
||||
}
|
||||
|
||||
$result[] = $token;
|
||||
$current_nesting[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if ($token->type != 'end') continue;
|
||||
|
||||
// okay, we're dealing with a closing tag
|
||||
|
||||
// make sure that we have something open
|
||||
if (empty($current_nesting)) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// first, check for the simplest case: everything closes neatly
|
||||
|
||||
// current_nesting is modified
|
||||
$current_parent = array_pop($current_nesting);
|
||||
if ($current_parent->name == $token->name) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
|
||||
// undo the array_pop
|
||||
$current_nesting[] = $current_parent;
|
||||
|
||||
// okay, so we're trying to close the wrong tag
|
||||
|
||||
// scroll back the entire nest, trying to find our tag
|
||||
// feature could be to specify how far you'd like to go
|
||||
$size = count($current_nesting);
|
||||
// -2 because -1 is the last element, but we already checked that
|
||||
$skipped_tags = false;
|
||||
for ($i = $size - 2; $i >= 0; $i--) {
|
||||
if ($current_nesting[$i]->name == $token->name) {
|
||||
// current nesting is modified
|
||||
$skipped_tags = array_splice($current_nesting, $i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// we still didn't find the tag, so translate to text
|
||||
if ($skipped_tags === false) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// okay, we found it, close all the skipped tags
|
||||
// note that skipped tags contains the element we need closed
|
||||
$size = count($skipped_tags);
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
}
|
||||
|
||||
// done!
|
||||
|
||||
}
|
||||
|
||||
// we're at the end now, fix all still unclosed tags
|
||||
|
||||
if (!empty($current_nesting)) {
|
||||
$size = count($current_nesting);
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_End($current_nesting[$i]->name);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
function fixNesting($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
|
||||
// insert implicit "parent" node, will be removed at end
|
||||
array_unshift($tokens, new HTMLPurifier_Token_Start('div'));
|
||||
$tokens[] = new HTMLPurifier_Token_End('div');
|
||||
|
||||
for ($i = 0, $size = count($tokens) ; $i < $size; ) {
|
||||
|
||||
$child_tokens = array();
|
||||
|
||||
// scroll to the end of this node, and report number
|
||||
for ($j = $i, $depth = 0; ; $j++) {
|
||||
if ($tokens[$j]->type == 'start') {
|
||||
$depth++;
|
||||
// skip token assignment on first iteration
|
||||
if ($depth == 1) continue;
|
||||
} elseif ($tokens[$j]->type == 'end') {
|
||||
$depth--;
|
||||
// skip token assignment on last iteration
|
||||
if ($depth == 0) break;
|
||||
}
|
||||
$child_tokens[] = $tokens[$j];
|
||||
}
|
||||
|
||||
// $i is index of start token
|
||||
// $j is index of end token
|
||||
|
||||
// have DTD child def validate children
|
||||
$element_def = $this->info[$tokens[$i]->name];
|
||||
$result = $element_def->child_def->validateChildren($child_tokens);
|
||||
|
||||
// process result
|
||||
if ($result === true) {
|
||||
|
||||
// leave the nodes as is
|
||||
|
||||
} elseif($result === false) {
|
||||
|
||||
// WARNING WARNING WARNING!!!
|
||||
// While for the original DTD, there will never be
|
||||
// cascading removal, more complex ones may have such
|
||||
// a problem.
|
||||
|
||||
// If you modify the info array such that an element
|
||||
// that requires children may contain a child that requires
|
||||
// children, you need to also scroll back and re-check that
|
||||
// elements parent node
|
||||
|
||||
$length = $j - $i + 1;
|
||||
|
||||
// remove entire node
|
||||
array_splice($tokens, $i, $length);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
|
||||
// ensure that we scroll to the next node
|
||||
$i--;
|
||||
|
||||
} else {
|
||||
|
||||
$length = $j - $i - 1;
|
||||
|
||||
// replace node with $result
|
||||
array_splice($tokens, $i + 1, $length, $result);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
$size += count($result);
|
||||
|
||||
}
|
||||
|
||||
// scroll to next node
|
||||
$i++;
|
||||
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
||||
|
||||
}
|
||||
|
||||
// remove implicit divs
|
||||
array_shift($tokens);
|
||||
array_pop($tokens);
|
||||
|
||||
return $tokens;
|
||||
|
||||
}
|
||||
|
||||
function validateAttributes($tokens) {
|
||||
if (empty($this->info)) $this->loadData();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class HTMLDTD_Element
|
||||
{
|
||||
|
||||
var $child_def;
|
||||
var $attr_def = array();
|
||||
|
||||
function HTMLDTD_Element($child_def, $attr_def = array()) {
|
||||
$this->child_def = $child_def;
|
||||
$this->attr_def = $attr_def;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
||||
// true = leave nodes as is
|
||||
// false = delete parent node and all children
|
||||
@ -603,12 +166,4 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
var $def;
|
||||
function HTMLPurifier_AttrDef($def) {
|
||||
$this->def = $def;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
194
Definition.php
194
Definition.php
@ -85,7 +85,7 @@ class HTMLPurifier_Definition
|
||||
$this->info['blockquote'] =
|
||||
$this->info['dd'] =
|
||||
$this->info['li'] =
|
||||
$this->info['div'] = new HTMLDTD_Element($e_Flow);
|
||||
$this->info['div'] = new HTMLPurifier_ElementDef($e_Flow);
|
||||
|
||||
$this->info['em'] =
|
||||
$this->info['strong'] =
|
||||
@ -121,31 +121,31 @@ class HTMLPurifier_Definition
|
||||
$this->info['h3'] =
|
||||
$this->info['h4'] =
|
||||
$this->info['h5'] =
|
||||
$this->info['h6'] = new HTMLDTD_Element($e_Inline);
|
||||
$this->info['h6'] = new HTMLPurifier_ElementDef($e_Inline);
|
||||
|
||||
$this->info['ol'] =
|
||||
$this->info['ul'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ElementDef(
|
||||
new HTMLPurifier_ChildDef_Required('li')
|
||||
);
|
||||
|
||||
$this->info['dl'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ElementDef(
|
||||
new HTMLPurifier_ChildDef_Required('dt|dd')
|
||||
);
|
||||
$this->info['address'] =
|
||||
new HTMLDTD_Element(
|
||||
new HTMLPurifier_ElementDef(
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline")
|
||||
);
|
||||
|
||||
$this->info['img'] =
|
||||
$this->info['br'] =
|
||||
$this->info['hr'] = new HTMLDTD_Element(new HTMLPurifier_ChildDef_Empty());
|
||||
$this->info['hr'] = new HTMLPurifier_ElementDef(new HTMLPurifier_ChildDef_Empty());
|
||||
|
||||
$this->info['pre'] = new HTMLDTD_Element($e_pre_content);
|
||||
$this->info['pre'] = new HTMLPurifier_ElementDef($e_pre_content);
|
||||
|
||||
$this->info['a'] = new HTMLDTD_Element($e_a_content);
|
||||
$this->info['a'] = new HTMLPurifier_ElementDef($e_a_content);
|
||||
|
||||
}
|
||||
|
||||
@ -424,191 +424,17 @@ class HTMLPurifier_Definition
|
||||
|
||||
}
|
||||
|
||||
class HTMLDTD_Element
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
|
||||
var $child_def;
|
||||
var $attr_def = array();
|
||||
|
||||
function HTMLDTD_Element($child_def, $attr_def = array()) {
|
||||
function HTMLPurifier_ElementDef($child_def, $attr_def = array()) {
|
||||
$this->child_def = $child_def;
|
||||
$this->attr_def = $attr_def;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
||||
// true = leave nodes as is
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
|
||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
||||
// to fix the tree)
|
||||
|
||||
// we may end up writing custom code for each HTML case
|
||||
// in order to make it self correcting
|
||||
class HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $dtd_regex;
|
||||
var $_pcre_regex;
|
||||
function HTMLPurifier_ChildDef($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
class HTMLPurifier_ChildDef_Simple extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $elements = array();
|
||||
function HTMLPurifier_ChildDef_Simple($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
function validateChildren() {
|
||||
trigger_error('Cannot call abstract function!', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Simple
|
||||
{
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// only altered behavior is that it returns an empty array
|
||||
// instead of a false (to delete the node)
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children) {
|
||||
$result = parent::validateChildren($tokens_of_children);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// placeholder
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
var $def;
|
||||
function HTMLPurifier_AttrDef($def) {
|
||||
$this->def = $def;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@ -125,265 +125,4 @@ class Test_HTMLPurifier_ChildDef extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
class Test_HTMLPurifier_Definition extends UnitTestCase
|
||||
{
|
||||
|
||||
var $def, $lex;
|
||||
|
||||
function Test_HTMLPurifier_Definition() {
|
||||
$this->UnitTestCase();
|
||||
$this->def = new HTMLPurifier_Definition();
|
||||
$this->def->loadData();
|
||||
$this->lex = new HTMLPurifier_Lexer();
|
||||
}
|
||||
|
||||
function test_removeForeignElements() {
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = $inputs[0];
|
||||
|
||||
$inputs[1] = array(
|
||||
new HTMLPurifier_Token_Text('This is ')
|
||||
,new HTMLPurifier_Token_Start('b', array())
|
||||
,new HTMLPurifier_Token_Text('bold')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
,new HTMLPurifier_Token_Text(' text')
|
||||
);
|
||||
$expect[1] = $inputs[1];
|
||||
|
||||
$inputs[2] = array(
|
||||
new HTMLPurifier_Token_Start('asdf')
|
||||
,new HTMLPurifier_Token_End('asdf')
|
||||
,new HTMLPurifier_Token_Start('d', array('href' => 'bang!'))
|
||||
,new HTMLPurifier_Token_End('d')
|
||||
,new HTMLPurifier_Token_Start('pooloka')
|
||||
,new HTMLPurifier_Token_Start('poolasdf')
|
||||
,new HTMLPurifier_Token_Start('ds', array('moogle' => '&'))
|
||||
,new HTMLPurifier_Token_End('asdf')
|
||||
,new HTMLPurifier_Token_End('asdf')
|
||||
);
|
||||
$expect[2] = array(
|
||||
new HTMLPurifier_Token_Text('<asdf>')
|
||||
,new HTMLPurifier_Token_Text('</asdf>')
|
||||
,new HTMLPurifier_Token_Text('<d href="bang!">')
|
||||
,new HTMLPurifier_Token_Text('</d>')
|
||||
,new HTMLPurifier_Token_Text('<pooloka>')
|
||||
,new HTMLPurifier_Token_Text('<poolasdf>')
|
||||
,new HTMLPurifier_Token_Text('<ds moogle="&">')
|
||||
,new HTMLPurifier_Token_Text('</asdf>')
|
||||
,new HTMLPurifier_Token_Text('</asdf>')
|
||||
);
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->def->removeForeignElements($input);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function test_makeWellFormed() {
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = $inputs[0];
|
||||
|
||||
$inputs[1] = array(
|
||||
new HTMLPurifier_Token_Text('This is ')
|
||||
,new HTMLPurifier_Token_Start('b')
|
||||
,new HTMLPurifier_Token_Text('bold')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
,new HTMLPurifier_Token_Text(' text')
|
||||
,new HTMLPurifier_Token_Empty('br')
|
||||
);
|
||||
$expect[1] = $inputs[1];
|
||||
|
||||
$inputs[2] = array(
|
||||
new HTMLPurifier_Token_Start('b')
|
||||
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
||||
);
|
||||
$expect[2] = array(
|
||||
new HTMLPurifier_Token_Start('b')
|
||||
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
);
|
||||
|
||||
$inputs[3] = array(
|
||||
new HTMLPurifier_Token_Start('b')
|
||||
,new HTMLPurifier_Token_Start('i')
|
||||
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
);
|
||||
$expect[3] = array(
|
||||
new HTMLPurifier_Token_Start('b')
|
||||
,new HTMLPurifier_Token_Start('i')
|
||||
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
||||
,new HTMLPurifier_Token_End('i')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
);
|
||||
|
||||
$inputs[4] = array(
|
||||
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
||||
,new HTMLPurifier_Token_End('b')
|
||||
);
|
||||
$expect[4] = array(
|
||||
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
||||
,new HTMLPurifier_Token_Text('</b>')
|
||||
);
|
||||
|
||||
$inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;')));
|
||||
$expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;')));
|
||||
|
||||
$inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;')));
|
||||
$expect[6] = array(
|
||||
new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;'))
|
||||
,new HTMLPurifier_Token_End('div')
|
||||
);
|
||||
|
||||
// test automatic paragraph closing
|
||||
|
||||
$inputs[7] = array(
|
||||
new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 1')
|
||||
,new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 2')
|
||||
);
|
||||
$expect[7] = array(
|
||||
new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 1')
|
||||
,new HTMLPurifier_Token_End('p')
|
||||
,new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 2')
|
||||
,new HTMLPurifier_Token_End('p')
|
||||
);
|
||||
|
||||
$inputs[8] = array(
|
||||
new HTMLPurifier_Token_Start('div')
|
||||
,new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
||||
,new HTMLPurifier_Token_End('div')
|
||||
);
|
||||
$expect[8] = array(
|
||||
new HTMLPurifier_Token_Start('div')
|
||||
,new HTMLPurifier_Token_Start('p')
|
||||
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
||||
,new HTMLPurifier_Token_End('p')
|
||||
,new HTMLPurifier_Token_End('div')
|
||||
);
|
||||
|
||||
// automatic list closing
|
||||
|
||||
$inputs[9] = array(
|
||||
new HTMLPurifier_Token_Start('ol')
|
||||
|
||||
,new HTMLPurifier_Token_Start('li')
|
||||
,new HTMLPurifier_Token_Text('Item 1')
|
||||
|
||||
,new HTMLPurifier_Token_Start('li')
|
||||
,new HTMLPurifier_Token_Text('Item 2')
|
||||
|
||||
,new HTMLPurifier_Token_End('ol')
|
||||
);
|
||||
$expect[9] = array(
|
||||
new HTMLPurifier_Token_Start('ol')
|
||||
|
||||
,new HTMLPurifier_Token_Start('li')
|
||||
,new HTMLPurifier_Token_Text('Item 1')
|
||||
,new HTMLPurifier_Token_End('li')
|
||||
|
||||
,new HTMLPurifier_Token_Start('li')
|
||||
,new HTMLPurifier_Token_Text('Item 2')
|
||||
,new HTMLPurifier_Token_End('li')
|
||||
|
||||
,new HTMLPurifier_Token_End('ol')
|
||||
);
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->def->makeWellFormed($input);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function test_fixNesting() {
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
|
||||
// next id = 4
|
||||
|
||||
// legal inline nesting
|
||||
$inputs[0] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Bold text'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
);
|
||||
$expect[0] = $inputs[0];
|
||||
|
||||
// legal inline and block
|
||||
// as the parent element is considered FLOW
|
||||
$inputs[1] = array(
|
||||
new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')),
|
||||
new HTMLPurifier_Token_Text('Linky'),
|
||||
new HTMLPurifier_Token_End('a'),
|
||||
new HTMLPurifier_Token_Start('div'),
|
||||
new HTMLPurifier_Token_Text('Block element'),
|
||||
new HTMLPurifier_Token_End('div'),
|
||||
);
|
||||
$expect[1] = $inputs[1];
|
||||
|
||||
// illegal block in inline, element -> text
|
||||
$inputs[2] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Start('div'),
|
||||
new HTMLPurifier_Token_Text('Illegal Div'),
|
||||
new HTMLPurifier_Token_End('div'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
);
|
||||
$expect[2] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('<div>'),
|
||||
new HTMLPurifier_Token_Text('Illegal Div'),
|
||||
new HTMLPurifier_Token_Text('</div>'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
);
|
||||
|
||||
// test of empty set that's required, resulting in removal of node
|
||||
$inputs[3] = array(
|
||||
new HTMLPurifier_Token_Start('ul'),
|
||||
new HTMLPurifier_Token_End('ul')
|
||||
);
|
||||
$expect[3] = array();
|
||||
|
||||
// test illegal text which gets removed
|
||||
$inputs[4] = array(
|
||||
new HTMLPurifier_Token_Start('ul'),
|
||||
new HTMLPurifier_Token_Text('Illegal Text'),
|
||||
new HTMLPurifier_Token_Start('li'),
|
||||
new HTMLPurifier_Token_Text('Legal item'),
|
||||
new HTMLPurifier_Token_End('li'),
|
||||
new HTMLPurifier_Token_End('ul')
|
||||
);
|
||||
$expect[4] = array(
|
||||
new HTMLPurifier_Token_Start('ul'),
|
||||
new HTMLPurifier_Token_Start('li'),
|
||||
new HTMLPurifier_Token_Text('Legal item'),
|
||||
new HTMLPurifier_Token_End('li'),
|
||||
new HTMLPurifier_Token_End('ul')
|
||||
);
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->def->fixNesting($input);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -1,130 +1,5 @@
|
||||
<?php
|
||||
|
||||
class Test_HTMLPurifier_ChildDef extends UnitTestCase
|
||||
{
|
||||
|
||||
var $lex;
|
||||
var $gen;
|
||||
|
||||
function Test_HTMLPurifier_ChildDef() {
|
||||
$this->lex = new HTMLPurifier_Lexer();
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
parent::UnitTestCase();
|
||||
}
|
||||
|
||||
function assertSeries($inputs, $expect, $def) {
|
||||
foreach ($inputs as $i => $input) {
|
||||
$tokens = $this->lex->tokenizeHTML($input);
|
||||
$result = $def->validateChildren($tokens);
|
||||
if (is_bool($expect[$i])) {
|
||||
$this->assertIdentical($expect[$i], $result);
|
||||
} else {
|
||||
$result_html = $this->gen->generateFromTokens($result);
|
||||
$this->assertEqual($expect[$i], $result_html);
|
||||
paintIf($result_html, $result_html != $expect[$i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function test_complex() {
|
||||
|
||||
// the table definition
|
||||
$def = new HTMLPurifier_ChildDef(
|
||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
// we really don't care what's inside, because if it turns out
|
||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
||||
// anyway.
|
||||
$inputs[1] = '<tr></tr>';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
||||
'<tfoot></tfoot><tbody></tbody>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
||||
$expect[3] = true;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $def);
|
||||
|
||||
}
|
||||
|
||||
function test_simple() {
|
||||
|
||||
// simple is actually an abstract class
|
||||
// but we're unit testing some of the conv. functions it gives
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Simple('foobar | bang |gizmo');
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'foobar' => true
|
||||
,'bang' => true
|
||||
,'gizmo' => true
|
||||
));
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Simple(array('href', 'src'));
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'href' => true
|
||||
,'src' => true
|
||||
));
|
||||
}
|
||||
|
||||
function test_required_pcdata_forbidden() {
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = false;
|
||||
|
||||
$inputs[1] = '<dt>Term</dt>Text in an illegal location'.
|
||||
'<dd>Definition</dd><b>Illegal tag</b>';
|
||||
|
||||
$expect[1] = '<dt>Term</dt><dd>Definition</dd>';
|
||||
|
||||
$inputs[2] = 'How do you do!';
|
||||
$expect[2] = false;
|
||||
|
||||
// whitespace shouldn't trigger it
|
||||
$inputs[3] = "\n<dd>Definition</dd> ";
|
||||
$expect[3] = true;
|
||||
|
||||
$inputs[4] ='<dd>Definition</dd> <b></b> ';
|
||||
$expect[4] = '<dd>Definition</dd> ';
|
||||
|
||||
$inputs[5] = "\t ";
|
||||
$expect[5] = false;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $def);
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_allowed() {
|
||||
$def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
|
||||
$inputs[0] = '<b>Bold text</b><img />';
|
||||
$expect[0] = '<b>Bold text</b><img />';
|
||||
|
||||
$this->assertSeries($inputs, $expect, $def);
|
||||
}
|
||||
|
||||
function test_optional() {
|
||||
$def = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||
|
||||
$inputs[0] = '<b>Bold text</b><img />';
|
||||
$expect[0] = '<b>Bold text</b>';
|
||||
|
||||
$inputs[1] = 'Not allowed text';
|
||||
$expect[1] = '';
|
||||
|
||||
$this->assertSeries($inputs, $expect, $def);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class Test_HTMLPurifier_Definition extends UnitTestCase
|
||||
{
|
||||
|
||||
|
@ -12,6 +12,8 @@ require_once 'HTMLPurifier/HTMLPurifier.php';
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/Definition.php';
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
|
||||
$test = new GroupTest('HTMLPurifier');
|
||||
@ -20,6 +22,7 @@ $test->addTestFile('HTMLPurifier.php');
|
||||
$test->addTestFile('Lexer.php');
|
||||
//$test->addTestFile('Token.php');
|
||||
$test->addTestFile('Definition.php');
|
||||
$test->addTestFile('ChildDef.php');
|
||||
$test->addTestFile('Generator.php');
|
||||
|
||||
$test->run( new HtmlReporter() );
|
||||
|
Loading…
x
Reference in New Issue
Block a user