0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[1.3.0] Refactored ChildDef classes into their own files

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@558 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-22 18:55:15 +00:00
parent c5ea987069
commit 3b26e5dc5b
17 changed files with 645 additions and 556 deletions

1
NEWS
View File

@ -14,6 +14,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.2.1, unknown release date
(bugfix/minor feature release, may be dropped if 1.2.0 is stable)
. Refactored ChildDef classes into their own files
1.2.0, released 2006-11-19
# ID attributes now disabled by default. New directives:

View File

@ -50,391 +50,4 @@ class HTMLPurifier_ChildDef
}
}
/**
* Custom validation class, accepts DTD child definitions
*
* @warning Currently this class is an all or nothing proposition, that is,
* it will only give a bool return value.
* @note This class is currently not used by any code, although it is unit
* tested.
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
var $type = 'custom';
var $allow_empty = false;
/**
* Allowed child pattern as defined by the DTD
*/
var $dtd_regex;
/**
* PCRE regex derived from $dtd_regex
* @private
*/
var $_pcre_regex;
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
}
$reg = str_replace(',', ',?', $raw);
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, &$context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) continue;
$is_child = ($nesting == 0); // direct
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$list_of_children .= $token->name . ',';
}
}
$list_of_children = rtrim($list_of_children, ',');
$okay =
preg_match(
'/^'.$this->_pcre_regex.'$/',
$list_of_children
);
return (bool) $okay;
}
}
/**
* Definition that allows a set of elements, but disallows empty children.
*/
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
{
/**
* Lookup table of allowed elements.
*/
var $elements = array();
/**
* @param $elements List of allowed element names (lowercase).
*/
function HTMLPurifier_ChildDef_Required($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$elements = array_flip($elements);
foreach ($elements as $i => $x) $elements[$i] = true;
$this->elements = $elements;
$this->gen = new HTMLPurifier_Generator();
}
var $allow_empty = false;
var $type = 'required';
function validateChildren($tokens_of_children, $config, &$context) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed && $token->type == 'text') {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token, $config)
);
}
continue;
}
}
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token, $config )
);
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) return false;
if ($tokens_of_children == $result) return true;
return $result;
}
}
/**
* Definition that allows a set of elements, and allows no children.
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
* really, one shouldn't inherit from the other. Only altered behavior
* is to overload a returned false with an array. Thus, it will never
* return false.
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
var $allow_empty = true;
var $type = 'optional';
function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array();
return $result;
}
}
/**
* Definition that disallows all elements.
* @warning validateChildren() in this class is actually never called, because
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
* before child definitions are parsed in earnest by
* HTMLPurifier_Strategy_FixNesting.
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
var $allow_empty = true;
var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {}
function validateChildren($tokens_of_children, $config, &$context) {
return array();
}
}
/**
* Definition that uses different definitions depending on context.
*
* The del and ins tags are notable because they allow different types of
* elements depending on whether or not they're in a block or inline context.
* Chameleon allows this behavior to happen by using two different
* definitions depending on context. While this somewhat generalized,
* it is specifically intended for those two tags.
*/
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
{
/**
* Instance of the definition object to use when inline. Usually stricter.
*/
var $inline;
/**
* Instance of the definition object to use when block.
*/
var $block;
/**
* @param $inline List of elements to allow when inline.
* @param $block List of elements to allow when block.
*/
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
}
function validateChildren($tokens_of_children, $config, &$context) {
$parent_type = $context->get('ParentType');
switch ($parent_type) {
case 'unknown':
case 'inline':
$result = $this->inline->validateChildren(
$tokens_of_children, $config, $context);
break;
case 'block':
$result = $this->block->validateChildren(
$tokens_of_children, $config, $context);
break;
default:
trigger_error('Invalid context', E_USER_ERROR);
return false;
}
return $result;
}
}
/**
* Definition for tables
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
var $allow_empty = false;
var $type = 'table';
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing
// up. It's a little bit of a hack, but it works! Just make sure you
// get rid of the token later.
$tokens_of_children[] = false;
// only one of these elements is allowed in a table
$caption = false;
$thead = false;
$tfoot = false;
// as many of these as you want
$cols = array();
$content = array();
$nesting = 0; // current depth so we can determine nodes
$is_collecting = false; // are we globbing together tokens to package
// into one of the collectors?
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
if ($token === false) {
// terminating sequence started
} elseif ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
// handle node collection
if ($is_collecting) {
if ($is_child) {
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody':
$content[] = $collection;
break;
case 'caption':
if ($caption !== false) break;
$caption = $collection;
break;
case 'thead':
case 'tfoot':
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// transmutate the first and less entries into
// tbody tags, and then put into content
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
}
break;
case 'colgroup':
$cols[] = $collection;
break;
}
$collection = array();
$is_collecting = false;
$tag_index = 0;
} else {
// add the node to the collection
$collection[] = $token;
}
}
// terminate
if ($token === false) break;
if ($is_child) {
// determine what we're dealing with
if ($token->name == 'col') {
// the only empty tag in the possie, we can handle it
// immediately
$cols[] = array_merge($collection, array($token));
$collection = array();
$tag_index = 0;
continue;
}
switch($token->name) {
case 'caption':
case 'colgroup':
case 'thead':
case 'tfoot':
case 'tbody':
case 'tr':
$is_collecting = true;
$collection[] = $token;
continue;
default:
if ($token->type == 'text' && $token->is_whitespace) {
$collection[] = $token;
$tag_index++;
}
continue;
}
}
}
if (empty($content)) return false;
$ret = array();
if ($caption !== false) $ret = array_merge($ret, $caption);
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);
}
array_pop($tokens_of_children); // remove phantom token
return ($ret === $tokens_of_children) ? true : $ret;
}
}
?>

View File

@ -0,0 +1,55 @@
<?php
require_once 'HTMLPurifier/ChildDef.php';
/**
* Definition that uses different definitions depending on context.
*
* The del and ins tags are notable because they allow different types of
* elements depending on whether or not they're in a block or inline context.
* Chameleon allows this behavior to happen by using two different
* definitions depending on context. While this somewhat generalized,
* it is specifically intended for those two tags.
*/
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
{
/**
* Instance of the definition object to use when inline. Usually stricter.
*/
var $inline;
/**
* Instance of the definition object to use when block.
*/
var $block;
/**
* @param $inline List of elements to allow when inline.
* @param $block List of elements to allow when block.
*/
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
}
function validateChildren($tokens_of_children, $config, &$context) {
$parent_type = $context->get('ParentType');
switch ($parent_type) {
case 'unknown':
case 'inline':
$result = $this->inline->validateChildren(
$tokens_of_children, $config, $context);
break;
case 'block':
$result = $this->block->validateChildren(
$tokens_of_children, $config, $context);
break;
default:
trigger_error('Invalid context', E_USER_ERROR);
return false;
}
return $result;
}
}
?>

View File

@ -0,0 +1,75 @@
<?php
require_once 'HTMLPurifier/ChildDef.php';
/**
* Custom validation class, accepts DTD child definitions
*
* @warning Currently this class is an all or nothing proposition, that is,
* it will only give a bool return value.
* @note This class is currently not used by any code, although it is unit
* tested.
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
var $type = 'custom';
var $allow_empty = false;
/**
* Allowed child pattern as defined by the DTD
*/
var $dtd_regex;
/**
* PCRE regex derived from $dtd_regex
* @private
*/
var $_pcre_regex;
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
}
$reg = str_replace(',', ',?', $raw);
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, &$context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) continue;
$is_child = ($nesting == 0); // direct
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$list_of_children .= $token->name . ',';
}
}
$list_of_children = rtrim($list_of_children, ',');
$okay =
preg_match(
'/^'.$this->_pcre_regex.'$/',
$list_of_children
);
return (bool) $okay;
}
}
?>

View File

@ -0,0 +1,22 @@
<?php
require_once 'HTMLPurifier/ChildDef.php';
/**
* Definition that disallows all elements.
* @warning validateChildren() in this class is actually never called, because
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
* before child definitions are parsed in earnest by
* HTMLPurifier_Strategy_FixNesting.
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
var $allow_empty = true;
var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {}
function validateChildren($tokens_of_children, $config, &$context) {
return array();
}
}
?>

View File

@ -0,0 +1,23 @@
<?php
require_once 'HTMLPurifier/ChildDef/Required.php';
/**
* Definition that allows a set of elements, and allows no children.
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
* really, one shouldn't inherit from the other. Only altered behavior
* is to overload a returned false with an array. Thus, it will never
* return false.
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
var $allow_empty = true;
var $type = 'optional';
function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array();
return $result;
}
}
?>

View File

@ -0,0 +1,100 @@
<?php
require_once 'HTMLPurifier/ChildDef.php';
/**
* Definition that allows a set of elements, but disallows empty children.
*/
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
{
/**
* Lookup table of allowed elements.
*/
var $elements = array();
/**
* @param $elements List of allowed element names (lowercase).
*/
function HTMLPurifier_ChildDef_Required($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$elements = array_flip($elements);
foreach ($elements as $i => $x) $elements[$i] = true;
$this->elements = $elements;
$this->gen = new HTMLPurifier_Generator();
}
var $allow_empty = false;
var $type = 'required';
function validateChildren($tokens_of_children, $config, &$context) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed && $token->type == 'text') {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token, $config)
);
}
continue;
}
}
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token, $config )
);
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) return false;
if ($tokens_of_children == $result) return true;
return $result;
}
}
?>

View File

@ -0,0 +1,142 @@
<?php
require_once 'HTMLPurifier/ChildDef.php';
/**
* Definition for tables
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
var $allow_empty = false;
var $type = 'table';
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing
// up. It's a little bit of a hack, but it works! Just make sure you
// get rid of the token later.
$tokens_of_children[] = false;
// only one of these elements is allowed in a table
$caption = false;
$thead = false;
$tfoot = false;
// as many of these as you want
$cols = array();
$content = array();
$nesting = 0; // current depth so we can determine nodes
$is_collecting = false; // are we globbing together tokens to package
// into one of the collectors?
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
if ($token === false) {
// terminating sequence started
} elseif ($token->type == 'start') {
$nesting++;
} elseif ($token->type == 'end') {
$nesting--;
}
// handle node collection
if ($is_collecting) {
if ($is_child) {
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody':
$content[] = $collection;
break;
case 'caption':
if ($caption !== false) break;
$caption = $collection;
break;
case 'thead':
case 'tfoot':
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// transmutate the first and less entries into
// tbody tags, and then put into content
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
}
break;
case 'colgroup':
$cols[] = $collection;
break;
}
$collection = array();
$is_collecting = false;
$tag_index = 0;
} else {
// add the node to the collection
$collection[] = $token;
}
}
// terminate
if ($token === false) break;
if ($is_child) {
// determine what we're dealing with
if ($token->name == 'col') {
// the only empty tag in the possie, we can handle it
// immediately
$cols[] = array_merge($collection, array($token));
$collection = array();
$tag_index = 0;
continue;
}
switch($token->name) {
case 'caption':
case 'colgroup':
case 'thead':
case 'tfoot':
case 'tbody':
case 'tr':
$is_collecting = true;
$collection[] = $token;
continue;
default:
if ($token->type == 'text' && $token->is_whitespace) {
$collection[] = $token;
$tag_index++;
}
continue;
}
}
}
if (empty($content)) return false;
$ret = array();
if ($caption !== false) $ret = array_merge($ret, $caption);
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);
}
array_pop($tokens_of_children); // remove phantom token
return ($ret === $tokens_of_children) ? true : $ret;
}
}
?>

View File

@ -18,6 +18,11 @@ require_once 'HTMLPurifier/AttrTransform.php';
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
require_once 'HTMLPurifier/ChildDef/Empty.php';
require_once 'HTMLPurifier/ChildDef/Required.php';
require_once 'HTMLPurifier/ChildDef/Optional.php';
require_once 'HTMLPurifier/ChildDef/Table.php';
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Token.php';
require_once 'HTMLPurifier/TagTransform.php';

View File

@ -0,0 +1,35 @@
<?php
require_once 'HTMLPurifier/ChildDefHarness.php';
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
{
function test() {
$this->obj = new HTMLPurifier_ChildDef_Chameleon(
'b | i', // allowed only when in inline context
'b | i | div' // allowed only when in block context
);
$this->assertResult(
'<b>Allowed.</b>', true,
array(), array('ParentType' => 'inline')
);
$this->assertResult(
'<div>Not allowed.</div>', '',
array(), array('ParentType' => 'inline')
);
$this->assertResult(
'<div>Allowed.</div>', true,
array(), array('ParentType' => 'block')
);
}
}
?>

View File

@ -0,0 +1,24 @@
<?php
require_once 'HTMLPurifier/ChildDefHarness.php';
require_once 'HTMLPurifier/ChildDef/Custom.php';
class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
{
function test() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
$this->assertResult('', false);
$this->assertResult('<a /><a />', false);
$this->assertResult('<a /><b /><c /><d /><a /><b />');
$this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
'<a href="moo" /><b>foo</b>');
}
}
?>

View File

@ -0,0 +1,20 @@
<?php
require_once 'HTMLPurifier/ChildDefHarness.php';
require_once 'HTMLPurifier/ChildDef/Optional.php';
class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
{
function test() {
$this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
$this->assertResult('Not allowed text', '');
}
}
?>

View File

@ -0,0 +1,69 @@
<?php
require_once 'HTMLPurifier/ChildDefHarness.php';
require_once 'HTMLPurifier/ChildDef/Required.php';
class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
{
function testParsing() {
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
$this->assertEqual($def->elements,
array(
'foobar' => true
,'bang' => true
,'gizmo' => true
));
$def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
$this->assertEqual($def->elements,
array(
'href' => true
,'src' => true
));
}
function testPCDATAForbidden() {
$this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
$this->assertResult('', false);
$this->assertResult(
'<dt>Term</dt>Text in an illegal location'.
'<dd>Definition</dd><b>Illegal tag</b>',
'<dt>Term</dt><dd>Definition</dd>');
$this->assertResult('How do you do!', false);
// whitespace shouldn't trigger it
$this->assertResult("\n<dd>Definition</dd> ");
$this->assertResult(
'<dd>Definition</dd> <b></b> ',
'<dd>Definition</dd> '
);
$this->assertResult("\t ", false);
}
function testPCDATAAllowed() {
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
// with child escaping on
$this->assertResult(
'<b>Bold text</b><img />',
'<b>Bold text</b>&lt;img /&gt;',
array(
'Core.EscapeInvalidChildren' => true
)
);
}
}
?>

View File

@ -0,0 +1,51 @@
<?php
require_once 'HTMLPurifier/ChildDefHarness.php';
require_once 'HTMLPurifier/ChildDef/Table.php';
class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
{
function test() {
$this->obj = new HTMLPurifier_ChildDef_Table();
$this->assertResult('', false);
// we're using empty tags to compact the tests: under real circumstances
// there would be contents in them
$this->assertResult('<tr />');
$this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
'<tr><td>asdf</td></tr></tbody>');
$this->assertResult('<col /><col /><col /><tr />');
// mixed up order
$this->assertResult(
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
// duplicates of singles
// - first caption serves
// - trailing tfoots/theads get turned into tbodys
$this->assertResult(
'<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
);
// errant text dropped (until bubbling is implemented)
$this->assertResult('foo', false);
// whitespace sticks to the previous element, last whitespace is
// stationary
$this->assertResult("\n <tr />\n <tr />\n ");
$this->assertResult(
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
);
}
}
?>

View File

@ -0,0 +1,18 @@
<?php
require_once 'HTMLPurifier/Harness.php';
require_once 'HTMLPurifier/ChildDef.php';
class HTMLPurifier_ChildDefHarness extends HTMLPurifier_Harness
{
function setUp() {
$this->obj = null;
$this->func = 'validateChildren';
$this->to_tokens = true;
$this->to_html = true;
}
}
?>

View File

@ -1,168 +0,0 @@
<?php
require_once 'HTMLPurifier/Harness.php';
require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php';
require_once 'HTMLPurifier/Generator.php';
class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
{
function setUp() {
$this->obj = null;
$this->func = 'validateChildren';
$this->to_tokens = true;
$this->to_html = true;
}
function test_custom() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
$this->assertResult('', false);
$this->assertResult('<a /><a />', false);
$this->assertResult('<a /><b /><c /><d /><a /><b />');
$this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
'<a href="moo" /><b>foo</b>');
}
function test_table() {
// the table definition
$this->obj = new HTMLPurifier_ChildDef_Table();
$inputs = $expect = $config = array();
$this->assertResult('', false);
// we're using empty tags to compact the tests: under real circumstances
// there would be contents in them
$this->assertResult('<tr />');
$this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
'<tr><td>asdf</td></tr></tbody>');
$this->assertResult('<col /><col /><col /><tr />');
// mixed up order
$this->assertResult(
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
// duplicates of singles
// - first caption serves
// - trailing tfoots/theads get turned into tbodys
$this->assertResult(
'<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
);
// errant text dropped (until bubbling is implemented)
$this->assertResult('foo', false);
// whitespace sticks to the previous element, last whitespace is
// stationary
$this->assertResult("\n <tr />\n <tr />\n ");
$this->assertResult(
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
);
}
function testParsing() {
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
$this->assertEqual($def->elements,
array(
'foobar' => true
,'bang' => true
,'gizmo' => true
));
$def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
$this->assertEqual($def->elements,
array(
'href' => true
,'src' => true
));
}
function test_required_pcdata_forbidden() {
$this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
$this->assertResult('', false);
$this->assertResult(
'<dt>Term</dt>Text in an illegal location'.
'<dd>Definition</dd><b>Illegal tag</b>',
'<dt>Term</dt><dd>Definition</dd>');
$this->assertResult('How do you do!', false);
// whitespace shouldn't trigger it
$this->assertResult("\n<dd>Definition</dd> ");
$this->assertResult(
'<dd>Definition</dd> <b></b> ',
'<dd>Definition</dd> '
);
$this->assertResult("\t ", false);
}
function test_required_pcdata_allowed() {
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
// with child escaping on
$this->assertResult(
'<b>Bold text</b><img />',
'<b>Bold text</b>&lt;img /&gt;',
array(
'Core.EscapeInvalidChildren' => true
)
);
}
function test_optional() {
$this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
$this->assertResult('Not allowed text', '');
}
function test_chameleon() {
$this->obj = new HTMLPurifier_ChildDef_Chameleon(
'b | i', // allowed only when in inline context
'b | i | div' // allowed only when in block context
);
$this->assertResult(
'<b>Allowed.</b>', true,
array(), array('ParentType' => 'inline')
);
$this->assertResult(
'<div>Not allowed.</div>', '',
array(), array('ParentType' => 'inline')
);
$this->assertResult(
'<div>Allowed.</div>', true,
array(), array('ParentType' => 'block')
);
}
}
?>

View File

@ -44,7 +44,11 @@ $test_files[] = 'ConfigSchemaTest.php';
$test_files[] = 'LexerTest.php';
$test_files[] = 'Lexer/DirectLexTest.php';
$test_files[] = 'TokenTest.php';
$test_files[] = 'ChildDefTest.php';
$test_files[] = 'ChildDef/RequiredTest.php';
$test_files[] = 'ChildDef/OptionalTest.php';
$test_files[] = 'ChildDef/ChameleonTest.php';
$test_files[] = 'ChildDef/CustomTest.php';
$test_files[] = 'ChildDef/TableTest.php';
$test_files[] = 'GeneratorTest.php';
$test_files[] = 'EntityLookupTest.php';
$test_files[] = 'Strategy/RemoveForeignElementsTest.php';