From 3b26e5dc5b92bec39ce5651470dde29b6cbf3c64 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 22 Nov 2006 18:55:15 +0000 Subject: [PATCH] [1.3.0] Refactored ChildDef classes into their own files git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@558 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 1 + library/HTMLPurifier/ChildDef.php | 387 ------------------ library/HTMLPurifier/ChildDef/Chameleon.php | 55 +++ library/HTMLPurifier/ChildDef/Custom.php | 75 ++++ library/HTMLPurifier/ChildDef/Empty.php | 22 + library/HTMLPurifier/ChildDef/Optional.php | 23 ++ library/HTMLPurifier/ChildDef/Required.php | 100 +++++ library/HTMLPurifier/ChildDef/Table.php | 142 +++++++ library/HTMLPurifier/HTMLDefinition.php | 5 + tests/HTMLPurifier/ChildDef/ChameleonTest.php | 35 ++ tests/HTMLPurifier/ChildDef/CustomTest.php | 24 ++ tests/HTMLPurifier/ChildDef/OptionalTest.php | 20 + tests/HTMLPurifier/ChildDef/RequiredTest.php | 69 ++++ tests/HTMLPurifier/ChildDef/TableTest.php | 51 +++ tests/HTMLPurifier/ChildDefHarness.php | 18 + tests/HTMLPurifier/ChildDefTest.php | 168 -------- tests/index.php | 6 +- 17 files changed, 645 insertions(+), 556 deletions(-) create mode 100644 library/HTMLPurifier/ChildDef/Chameleon.php create mode 100644 library/HTMLPurifier/ChildDef/Custom.php create mode 100644 library/HTMLPurifier/ChildDef/Empty.php create mode 100644 library/HTMLPurifier/ChildDef/Optional.php create mode 100644 library/HTMLPurifier/ChildDef/Required.php create mode 100644 library/HTMLPurifier/ChildDef/Table.php create mode 100644 tests/HTMLPurifier/ChildDef/ChameleonTest.php create mode 100644 tests/HTMLPurifier/ChildDef/CustomTest.php create mode 100644 tests/HTMLPurifier/ChildDef/OptionalTest.php create mode 100644 tests/HTMLPurifier/ChildDef/RequiredTest.php create mode 100644 tests/HTMLPurifier/ChildDef/TableTest.php create mode 100644 tests/HTMLPurifier/ChildDefHarness.php delete mode 100644 tests/HTMLPurifier/ChildDefTest.php diff --git a/NEWS b/NEWS index 635255f9..8e05466a 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.2.1, unknown release date (bugfix/minor feature release, may be dropped if 1.2.0 is stable) +. Refactored ChildDef classes into their own files 1.2.0, released 2006-11-19 # ID attributes now disabled by default. New directives: diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php index 19409b0d..7a0cc4c9 100644 --- a/library/HTMLPurifier/ChildDef.php +++ b/library/HTMLPurifier/ChildDef.php @@ -50,391 +50,4 @@ class HTMLPurifier_ChildDef } } -/** - * Custom validation class, accepts DTD child definitions - * - * @warning Currently this class is an all or nothing proposition, that is, - * it will only give a bool return value. - * @note This class is currently not used by any code, although it is unit - * tested. - */ -class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef -{ - var $type = 'custom'; - var $allow_empty = false; - /** - * Allowed child pattern as defined by the DTD - */ - var $dtd_regex; - /** - * PCRE regex derived from $dtd_regex - * @private - */ - var $_pcre_regex; - /** - * @param $dtd_regex Allowed child pattern from the DTD - */ - function HTMLPurifier_ChildDef_Custom($dtd_regex) { - $this->dtd_regex = $dtd_regex; - $this->_compileRegex(); - } - /** - * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) - */ - function _compileRegex() { - $raw = str_replace(' ', '', $this->dtd_regex); - if ($raw{0} != '(') { - $raw = "($raw)"; - } - $reg = str_replace(',', ',?', $raw); - $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); - $this->_pcre_regex = $reg; - } - function validateChildren($tokens_of_children, $config, &$context) { - $list_of_children = ''; - $nesting = 0; // depth into the nest - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) continue; - - $is_child = ($nesting == 0); // direct - - if ($token->type == 'start') { - $nesting++; - } elseif ($token->type == 'end') { - $nesting--; - } - - if ($is_child) { - $list_of_children .= $token->name . ','; - } - } - $list_of_children = rtrim($list_of_children, ','); - - $okay = - preg_match( - '/^'.$this->_pcre_regex.'$/', - $list_of_children - ); - - return (bool) $okay; - } -} - -/** - * Definition that allows a set of elements, but disallows empty children. - */ -class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef -{ - /** - * Lookup table of allowed elements. - */ - var $elements = array(); - /** - * @param $elements List of allowed element names (lowercase). - */ - function HTMLPurifier_ChildDef_Required($elements) { - if (is_string($elements)) { - $elements = str_replace(' ', '', $elements); - $elements = explode('|', $elements); - } - $elements = array_flip($elements); - foreach ($elements as $i => $x) $elements[$i] = true; - $this->elements = $elements; - $this->gen = new HTMLPurifier_Generator(); - } - var $allow_empty = false; - var $type = 'required'; - function validateChildren($tokens_of_children, $config, &$context) { - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // whether or not we're deleting a node - $is_deleting = false; - - // whether or not parsed character data is allowed - // this controls whether or not we silently drop a tag - // or generate escaped HTML from it - $pcdata_allowed = isset($this->elements['#PCDATA']); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - // some configuration - $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - $is_child = ($nesting == 0); - - if ($token->type == 'start') { - $nesting++; - } elseif ($token->type == 'end') { - $nesting--; - } - - if ($is_child) { - $is_deleting = false; - if (!isset($this->elements[$token->name])) { - $is_deleting = true; - if ($pcdata_allowed && $token->type == 'text') { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = new HTMLPurifier_Token_Text( - $this->gen->generateFromToken($token, $config) - ); - } - continue; - } - } - if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = - new HTMLPurifier_Token_Text( - $this->gen->generateFromToken( $token, $config ) - ); - } else { - // drop silently - } - } - if (empty($result)) return false; - if ($all_whitespace) return false; - if ($tokens_of_children == $result) return true; - return $result; - } -} - -/** - * Definition that allows a set of elements, and allows no children. - * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, - * really, one shouldn't inherit from the other. Only altered behavior - * is to overload a returned false with an array. Thus, it will never - * return false. - */ -class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required -{ - var $allow_empty = true; - var $type = 'optional'; - function validateChildren($tokens_of_children, $config, &$context) { - $result = parent::validateChildren($tokens_of_children, $config, $context); - if ($result === false) return array(); - return $result; - } -} - -/** - * Definition that disallows all elements. - * @warning validateChildren() in this class is actually never called, because - * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed - * before child definitions are parsed in earnest by - * HTMLPurifier_Strategy_FixNesting. - */ -class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef -{ - var $allow_empty = true; - var $type = 'empty'; - function HTMLPurifier_ChildDef_Empty() {} - function validateChildren($tokens_of_children, $config, &$context) { - return array(); - } -} - -/** - * Definition that uses different definitions depending on context. - * - * The del and ins tags are notable because they allow different types of - * elements depending on whether or not they're in a block or inline context. - * Chameleon allows this behavior to happen by using two different - * definitions depending on context. While this somewhat generalized, - * it is specifically intended for those two tags. - */ -class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef -{ - - /** - * Instance of the definition object to use when inline. Usually stricter. - */ - var $inline; - /** - * Instance of the definition object to use when block. - */ - var $block; - - /** - * @param $inline List of elements to allow when inline. - * @param $block List of elements to allow when block. - */ - function HTMLPurifier_ChildDef_Chameleon($inline, $block) { - $this->inline = new HTMLPurifier_ChildDef_Optional($inline); - $this->block = new HTMLPurifier_ChildDef_Optional($block); - } - - function validateChildren($tokens_of_children, $config, &$context) { - $parent_type = $context->get('ParentType'); - switch ($parent_type) { - case 'unknown': - case 'inline': - $result = $this->inline->validateChildren( - $tokens_of_children, $config, $context); - break; - case 'block': - $result = $this->block->validateChildren( - $tokens_of_children, $config, $context); - break; - default: - trigger_error('Invalid context', E_USER_ERROR); - return false; - } - return $result; - } -} - -/** - * Definition for tables - */ -class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef -{ - var $allow_empty = false; - var $type = 'table'; - function HTMLPurifier_ChildDef_Table() {} - function validateChildren($tokens_of_children, $config, &$context) { - if (empty($tokens_of_children)) return false; - - // this ensures that the loop gets run one last time before closing - // up. It's a little bit of a hack, but it works! Just make sure you - // get rid of the token later. - $tokens_of_children[] = false; - - // only one of these elements is allowed in a table - $caption = false; - $thead = false; - $tfoot = false; - - // as many of these as you want - $cols = array(); - $content = array(); - - $nesting = 0; // current depth so we can determine nodes - $is_collecting = false; // are we globbing together tokens to package - // into one of the collectors? - $collection = array(); // collected nodes - $tag_index = 0; // the first node might be whitespace, - // so this tells us where the start tag is - - foreach ($tokens_of_children as $token) { - $is_child = ($nesting == 0); - - if ($token === false) { - // terminating sequence started - } elseif ($token->type == 'start') { - $nesting++; - } elseif ($token->type == 'end') { - $nesting--; - } - - // handle node collection - if ($is_collecting) { - if ($is_child) { - // okay, let's stash the tokens away - // first token tells us the type of the collection - switch ($collection[$tag_index]->name) { - case 'tr': - case 'tbody': - $content[] = $collection; - break; - case 'caption': - if ($caption !== false) break; - $caption = $collection; - break; - case 'thead': - case 'tfoot': - // access the appropriate variable, $thead or $tfoot - $var = $collection[$tag_index]->name; - if ($$var === false) { - $$var = $collection; - } else { - // transmutate the first and less entries into - // tbody tags, and then put into content - $collection[$tag_index]->name = 'tbody'; - $collection[count($collection)-1]->name = 'tbody'; - $content[] = $collection; - } - break; - case 'colgroup': - $cols[] = $collection; - break; - } - $collection = array(); - $is_collecting = false; - $tag_index = 0; - } else { - // add the node to the collection - $collection[] = $token; - } - } - - // terminate - if ($token === false) break; - - if ($is_child) { - // determine what we're dealing with - if ($token->name == 'col') { - // the only empty tag in the possie, we can handle it - // immediately - $cols[] = array_merge($collection, array($token)); - $collection = array(); - $tag_index = 0; - continue; - } - switch($token->name) { - case 'caption': - case 'colgroup': - case 'thead': - case 'tfoot': - case 'tbody': - case 'tr': - $is_collecting = true; - $collection[] = $token; - continue; - default: - if ($token->type == 'text' && $token->is_whitespace) { - $collection[] = $token; - $tag_index++; - } - continue; - } - } - } - - if (empty($content)) return false; - - $ret = array(); - if ($caption !== false) $ret = array_merge($ret, $caption); - if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); - if ($thead !== false) $ret = array_merge($ret, $thead); - if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - foreach ($content as $token_array) $ret = array_merge($ret, $token_array); - if (!empty($collection) && $is_collecting == false){ - // grab the trailing space - $ret = array_merge($ret, $collection); - } - - array_pop($tokens_of_children); // remove phantom token - - return ($ret === $tokens_of_children) ? true : $ret; - - } -} - ?> diff --git a/library/HTMLPurifier/ChildDef/Chameleon.php b/library/HTMLPurifier/ChildDef/Chameleon.php new file mode 100644 index 00000000..22724646 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Chameleon.php @@ -0,0 +1,55 @@ +inline = new HTMLPurifier_ChildDef_Optional($inline); + $this->block = new HTMLPurifier_ChildDef_Optional($block); + } + + function validateChildren($tokens_of_children, $config, &$context) { + $parent_type = $context->get('ParentType'); + switch ($parent_type) { + case 'unknown': + case 'inline': + $result = $this->inline->validateChildren( + $tokens_of_children, $config, $context); + break; + case 'block': + $result = $this->block->validateChildren( + $tokens_of_children, $config, $context); + break; + default: + trigger_error('Invalid context', E_USER_ERROR); + return false; + } + return $result; + } +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/ChildDef/Custom.php b/library/HTMLPurifier/ChildDef/Custom.php new file mode 100644 index 00000000..de18cd70 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Custom.php @@ -0,0 +1,75 @@ +dtd_regex = $dtd_regex; + $this->_compileRegex(); + } + /** + * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) + */ + function _compileRegex() { + $raw = str_replace(' ', '', $this->dtd_regex); + if ($raw{0} != '(') { + $raw = "($raw)"; + } + $reg = str_replace(',', ',?', $raw); + $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); + $this->_pcre_regex = $reg; + } + function validateChildren($tokens_of_children, $config, &$context) { + $list_of_children = ''; + $nesting = 0; // depth into the nest + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) continue; + + $is_child = ($nesting == 0); // direct + + if ($token->type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + if ($is_child) { + $list_of_children .= $token->name . ','; + } + } + $list_of_children = rtrim($list_of_children, ','); + + $okay = + preg_match( + '/^'.$this->_pcre_regex.'$/', + $list_of_children + ); + + return (bool) $okay; + } +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/ChildDef/Empty.php b/library/HTMLPurifier/ChildDef/Empty.php new file mode 100644 index 00000000..1ab4fdd6 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Empty.php @@ -0,0 +1,22 @@ + \ No newline at end of file diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php new file mode 100644 index 00000000..cc888326 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Optional.php @@ -0,0 +1,23 @@ + \ No newline at end of file diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php new file mode 100644 index 00000000..0d80fbf8 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Required.php @@ -0,0 +1,100 @@ + $x) $elements[$i] = true; + $this->elements = $elements; + $this->gen = new HTMLPurifier_Generator(); + } + var $allow_empty = false; + var $type = 'required'; + function validateChildren($tokens_of_children, $config, &$context) { + // if there are no tokens, delete parent node + if (empty($tokens_of_children)) return false; + + // the new set of children + $result = array(); + + // current depth into the nest + $nesting = 0; + + // whether or not we're deleting a node + $is_deleting = false; + + // whether or not parsed character data is allowed + // this controls whether or not we silently drop a tag + // or generate escaped HTML from it + $pcdata_allowed = isset($this->elements['#PCDATA']); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + // some configuration + $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); + + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) { + $result[] = $token; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + $is_child = ($nesting == 0); + + if ($token->type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + if ($is_child) { + $is_deleting = false; + if (!isset($this->elements[$token->name])) { + $is_deleting = true; + if ($pcdata_allowed && $token->type == 'text') { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = new HTMLPurifier_Token_Text( + $this->gen->generateFromToken($token, $config) + ); + } + continue; + } + } + if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = + new HTMLPurifier_Token_Text( + $this->gen->generateFromToken( $token, $config ) + ); + } else { + // drop silently + } + } + if (empty($result)) return false; + if ($all_whitespace) return false; + if ($tokens_of_children == $result) return true; + return $result; + } +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/ChildDef/Table.php b/library/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 00000000..3534cdd0 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/Table.php @@ -0,0 +1,142 @@ +type == 'start') { + $nesting++; + } elseif ($token->type == 'end') { + $nesting--; + } + + // handle node collection + if ($is_collecting) { + if ($is_child) { + // okay, let's stash the tokens away + // first token tells us the type of the collection + switch ($collection[$tag_index]->name) { + case 'tr': + case 'tbody': + $content[] = $collection; + break; + case 'caption': + if ($caption !== false) break; + $caption = $collection; + break; + case 'thead': + case 'tfoot': + // access the appropriate variable, $thead or $tfoot + $var = $collection[$tag_index]->name; + if ($$var === false) { + $$var = $collection; + } else { + // transmutate the first and less entries into + // tbody tags, and then put into content + $collection[$tag_index]->name = 'tbody'; + $collection[count($collection)-1]->name = 'tbody'; + $content[] = $collection; + } + break; + case 'colgroup': + $cols[] = $collection; + break; + } + $collection = array(); + $is_collecting = false; + $tag_index = 0; + } else { + // add the node to the collection + $collection[] = $token; + } + } + + // terminate + if ($token === false) break; + + if ($is_child) { + // determine what we're dealing with + if ($token->name == 'col') { + // the only empty tag in the possie, we can handle it + // immediately + $cols[] = array_merge($collection, array($token)); + $collection = array(); + $tag_index = 0; + continue; + } + switch($token->name) { + case 'caption': + case 'colgroup': + case 'thead': + case 'tfoot': + case 'tbody': + case 'tr': + $is_collecting = true; + $collection[] = $token; + continue; + default: + if ($token->type == 'text' && $token->is_whitespace) { + $collection[] = $token; + $tag_index++; + } + continue; + } + } + } + + if (empty($content)) return false; + + $ret = array(); + if ($caption !== false) $ret = array_merge($ret, $caption); + if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); + if ($thead !== false) $ret = array_merge($ret, $thead); + if ($tfoot !== false) $ret = array_merge($ret, $tfoot); + foreach ($content as $token_array) $ret = array_merge($ret, $token_array); + if (!empty($collection) && $is_collecting == false){ + // grab the trailing space + $ret = array_merge($ret, $collection); + } + + array_pop($tokens_of_children); // remove phantom token + + return ($ret === $tokens_of_children) ? true : $ret; + + } +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 52e2654f..28776349 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -18,6 +18,11 @@ require_once 'HTMLPurifier/AttrTransform.php'; require_once 'HTMLPurifier/AttrTransform/BdoDir.php'; require_once 'HTMLPurifier/AttrTransform/ImgRequired.php'; require_once 'HTMLPurifier/ChildDef.php'; + require_once 'HTMLPurifier/ChildDef/Chameleon.php'; + require_once 'HTMLPurifier/ChildDef/Empty.php'; + require_once 'HTMLPurifier/ChildDef/Required.php'; + require_once 'HTMLPurifier/ChildDef/Optional.php'; + require_once 'HTMLPurifier/ChildDef/Table.php'; require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Token.php'; require_once 'HTMLPurifier/TagTransform.php'; diff --git a/tests/HTMLPurifier/ChildDef/ChameleonTest.php b/tests/HTMLPurifier/ChildDef/ChameleonTest.php new file mode 100644 index 00000000..b4181196 --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/ChameleonTest.php @@ -0,0 +1,35 @@ +obj = new HTMLPurifier_ChildDef_Chameleon( + 'b | i', // allowed only when in inline context + 'b | i | div' // allowed only when in block context + ); + + $this->assertResult( + 'Allowed.', true, + array(), array('ParentType' => 'inline') + ); + + $this->assertResult( + '
Not allowed.
', '', + array(), array('ParentType' => 'inline') + ); + + $this->assertResult( + '
Allowed.
', true, + array(), array('ParentType' => 'block') + ); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ChildDef/CustomTest.php b/tests/HTMLPurifier/ChildDef/CustomTest.php new file mode 100644 index 00000000..905c9e02 --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/CustomTest.php @@ -0,0 +1,24 @@ +obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)'); + + $this->assertResult('', false); + $this->assertResult('', false); + + $this->assertResult(''); + $this->assertResult('Dobfoo'. + 'foo'); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ChildDef/OptionalTest.php b/tests/HTMLPurifier/ChildDef/OptionalTest.php new file mode 100644 index 00000000..4e44e8b7 --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php @@ -0,0 +1,20 @@ +obj = new HTMLPurifier_ChildDef_Optional('b | i'); + + $this->assertResult('Bold text', 'Bold text'); + $this->assertResult('Not allowed text', ''); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ChildDef/RequiredTest.php b/tests/HTMLPurifier/ChildDef/RequiredTest.php new file mode 100644 index 00000000..d19eef9b --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/RequiredTest.php @@ -0,0 +1,69 @@ +assertEqual($def->elements, + array( + 'foobar' => true + ,'bang' => true + ,'gizmo' => true + )); + + $def = new HTMLPurifier_ChildDef_Required(array('href', 'src')); + $this->assertEqual($def->elements, + array( + 'href' => true + ,'src' => true + )); + + } + + function testPCDATAForbidden() { + + $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd'); + + $this->assertResult('', false); + $this->assertResult( + '
Term
Text in an illegal location'. + '
Definition
Illegal tag', + '
Term
Definition
'); + $this->assertResult('How do you do!', false); + + // whitespace shouldn't trigger it + $this->assertResult("\n
Definition
"); + + $this->assertResult( + '
Definition
', + '
Definition
' + ); + $this->assertResult("\t ", false); + + } + + function testPCDATAAllowed() { + + $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b'); + + $this->assertResult('Bold text', 'Bold text'); + + // with child escaping on + $this->assertResult( + 'Bold text', + 'Bold text<img />', + array( + 'Core.EscapeInvalidChildren' => true + ) + ); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ChildDef/TableTest.php b/tests/HTMLPurifier/ChildDef/TableTest.php new file mode 100644 index 00000000..d5780a63 --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/TableTest.php @@ -0,0 +1,51 @@ +obj = new HTMLPurifier_ChildDef_Table(); + + $this->assertResult('', false); + + // we're using empty tags to compact the tests: under real circumstances + // there would be contents in them + + $this->assertResult(''); + $this->assertResult(''. + 'asdf'); + $this->assertResult(''); + + // mixed up order + $this->assertResult( + '1', + '1'); + + // duplicates of singles + // - first caption serves + // - trailing tfoots/theads get turned into tbodys + $this->assertResult( + '11', + '11' + ); + + // errant text dropped (until bubbling is implemented) + $this->assertResult('foo', false); + + // whitespace sticks to the previous element, last whitespace is + // stationary + $this->assertResult("\n \n \n "); + $this->assertResult( + "\n\t\n\t\t\n\t\t\t", + "\n\t\t\n\t\n\t\t\t" + ); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ChildDefHarness.php b/tests/HTMLPurifier/ChildDefHarness.php new file mode 100644 index 00000000..37c3f406 --- /dev/null +++ b/tests/HTMLPurifier/ChildDefHarness.php @@ -0,0 +1,18 @@ +obj = null; + $this->func = 'validateChildren'; + $this->to_tokens = true; + $this->to_html = true; + } + +} + +?> diff --git a/tests/HTMLPurifier/ChildDefTest.php b/tests/HTMLPurifier/ChildDefTest.php deleted file mode 100644 index fed88ecf..00000000 --- a/tests/HTMLPurifier/ChildDefTest.php +++ /dev/null @@ -1,168 +0,0 @@ -obj = null; - $this->func = 'validateChildren'; - $this->to_tokens = true; - $this->to_html = true; - } - - function test_custom() { - - $this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)'); - - $this->assertResult('', false); - $this->assertResult('
', false); - - $this->assertResult(''); - $this->assertResult('Dobfoo'. - 'foo'); - - } - - function test_table() { - - // the table definition - $this->obj = new HTMLPurifier_ChildDef_Table(); - - $inputs = $expect = $config = array(); - - $this->assertResult('', false); - - // we're using empty tags to compact the tests: under real circumstances - // there would be contents in them - - $this->assertResult(''); - $this->assertResult(''. - 'asdf'); - $this->assertResult(''); - - // mixed up order - $this->assertResult( - '1', - '1'); - - // duplicates of singles - // - first caption serves - // - trailing tfoots/theads get turned into tbodys - $this->assertResult( - '11', - '11' - ); - - // errant text dropped (until bubbling is implemented) - $this->assertResult('foo', false); - - // whitespace sticks to the previous element, last whitespace is - // stationary - $this->assertResult("\n \n \n "); - $this->assertResult( - "\n\t\n\t\t\n\t\t\t", - "\n\t\t\n\t\n\t\t\t" - ); - - } - - function testParsing() { - - $def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo'); - $this->assertEqual($def->elements, - array( - 'foobar' => true - ,'bang' => true - ,'gizmo' => true - )); - - $def = new HTMLPurifier_ChildDef_Required(array('href', 'src')); - $this->assertEqual($def->elements, - array( - 'href' => true - ,'src' => true - )); - - } - - function test_required_pcdata_forbidden() { - - $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd'); - - $this->assertResult('', false); - $this->assertResult( - '
Term
Text in an illegal location'. - '
Definition
Illegal tag', - '
Term
Definition
'); - $this->assertResult('How do you do!', false); - - // whitespace shouldn't trigger it - $this->assertResult("\n
Definition
"); - - $this->assertResult( - '
Definition
', - '
Definition
' - ); - $this->assertResult("\t ", false); - - } - - function test_required_pcdata_allowed() { - - $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b'); - - $this->assertResult('Bold text', 'Bold text'); - - // with child escaping on - $this->assertResult( - 'Bold text', - 'Bold text<img />', - array( - 'Core.EscapeInvalidChildren' => true - ) - ); - - } - - function test_optional() { - - $this->obj = new HTMLPurifier_ChildDef_Optional('b | i'); - - $this->assertResult('Bold text', 'Bold text'); - $this->assertResult('Not allowed text', ''); - - } - - function test_chameleon() { - - $this->obj = new HTMLPurifier_ChildDef_Chameleon( - 'b | i', // allowed only when in inline context - 'b | i | div' // allowed only when in block context - ); - - $this->assertResult( - 'Allowed.', true, - array(), array('ParentType' => 'inline') - ); - - $this->assertResult( - '
Not allowed.
', '', - array(), array('ParentType' => 'inline') - ); - - $this->assertResult( - '
Allowed.
', true, - array(), array('ParentType' => 'block') - ); - - } - -} - -?> diff --git a/tests/index.php b/tests/index.php index 33ad68e6..36b58433 100644 --- a/tests/index.php +++ b/tests/index.php @@ -44,7 +44,11 @@ $test_files[] = 'ConfigSchemaTest.php'; $test_files[] = 'LexerTest.php'; $test_files[] = 'Lexer/DirectLexTest.php'; $test_files[] = 'TokenTest.php'; -$test_files[] = 'ChildDefTest.php'; +$test_files[] = 'ChildDef/RequiredTest.php'; +$test_files[] = 'ChildDef/OptionalTest.php'; +$test_files[] = 'ChildDef/ChameleonTest.php'; +$test_files[] = 'ChildDef/CustomTest.php'; +$test_files[] = 'ChildDef/TableTest.php'; $test_files[] = 'GeneratorTest.php'; $test_files[] = 'EntityLookupTest.php'; $test_files[] = 'Strategy/RemoveForeignElementsTest.php';