From aa249be067492f55e7b16747f8d41e627a2e84ef Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 3 Aug 2006 01:03:23 +0000 Subject: [PATCH] Fix chameleon behavior with ins and del. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@145 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/ChildDef.php | 28 +++++++++++++++ library/HTMLPurifier/Definition.php | 26 +++++++++++--- library/HTMLPurifier/Strategy/FixNesting.php | 21 ++++++++--- tests/HTMLPurifier/ChildDefTest.php | 35 +++++++++++++++++-- .../HTMLPurifier/Strategy/FixNestingTest.php | 4 +++ 5 files changed, 101 insertions(+), 13 deletions(-) diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php index 6555ea6e..080059a4 100644 --- a/library/HTMLPurifier/ChildDef.php +++ b/library/HTMLPurifier/ChildDef.php @@ -175,4 +175,32 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef } } +class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef +{ + + var $inline; + var $block; + + function HTMLPurifier_ChildDef_Chameleon($inline, $block) { + $this->inline = new HTMLPurifier_ChildDef_Optional($inline); + $this->block = new HTMLPurifier_ChildDef_Optional($block); + } + + function validateChildren($tokens_of_children, $context) { + switch ($context) { + case 'unknown': + case 'inline': + $result = $this->inline->validateChildren($tokens_of_children); + break; + case 'block': + $result = $this->block->validateChildren($tokens_of_children); + break; + default: + trigger_error('Invalid context', E_USER_ERROR); + return false; + } + return $result; + } +} + ?> \ No newline at end of file diff --git a/library/HTMLPurifier/Definition.php b/library/HTMLPurifier/Definition.php index ae1b390d..07ae2ef5 100644 --- a/library/HTMLPurifier/Definition.php +++ b/library/HTMLPurifier/Definition.php @@ -105,15 +105,15 @@ class HTMLPurifier_Definition $e_misc = "$e_misc_inline"; $e_inline = "a | $e_special | $e_fontstyle | $e_phrase". " | $e_inline_forms"; + $e__inline = "#PCDATA | $e_inline | $e_misc_inline"; // note the casing - $e_Inline = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_inline". - " | $e_misc_inline"); + $e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline); $e_heading = 'h1|h2|h3|h4|h5|h6'; $e_lists = 'ul | ol | dl'; $e_blocktext = 'pre | hr | blockquote | address'; $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table"; - $e_Flow = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block". - " | $e_inline | $e_misc"); + $e__flow = "#PCDATA | $e_block | $e_inline | $e_misc"; + $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow); $e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special". " | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline"); $e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a". @@ -123,7 +123,8 @@ class HTMLPurifier_Definition $e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused $this->info['ins']->child = - $this->info['del']->child = + $this->info['del']->child = new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow); + $this->info['blockquote']->child= $this->info['dd']->child = $this->info['li']->child = @@ -193,6 +194,20 @@ class HTMLPurifier_Definition $this->info['th']->child = $e_Flow; $this->info['td']->child = $e_Flow; + ////////////////////////////////////////////////////////////////////// + // info[]->type : defines the type of the element (block or inline) + + // reuses $e_Inline and $e_block + + foreach ($e_Inline->elements as $name) { + $this->info[$name]->type = 'inline'; + } + + $e_Block = new HTMLPurifier_ChildDef_Optional($e_block); + foreach ($e_Block->elements as $name) { + $this->info[$name]->type = 'block'; + } + ////////////////////////////////////////////////////////////////////// // info[]->attr : defines allowed attributes for elements @@ -249,6 +264,7 @@ class HTMLPurifier_ElementDef var $attr = array(); var $auto_close = array(); var $child; + var $type = 'unknown'; } diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index 6f41d308..bd7ecf92 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -47,12 +47,27 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // $i is index of start token // $j is index of end token + // calculate parent information + if ($count = count($stack)) { + $parent_index = $stack[$count-1]; + $parent_name = $tokens[$parent_index]->name; + $parent_def = $this->definition->info[$parent_name]; + } else { + $parent_index = $parent_name = $parent_def = null; + } + + // calculate context + if (isset($parent_def)) { + $context = $parent_def->type; + } else { + $context = 'unknown'; + } // DEFINITION CALL $child_def = $this->definition->info[$tokens[$i]->name]->child; // have DTD child def validate children - $result = $child_def->validateChildren($child_tokens); + $result = $child_def->validateChildren($child_tokens, $context); // process result if ($result === true) { @@ -79,10 +94,6 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // current node is now the next possible start node // unless it turns out that we need to do a double-check - $parent_index = $stack[count($stack)-1]; - $parent_name = $tokens[$parent_index]->name; - $parent_def = $this->definition->info[$parent_name]; - if (!$parent_def->child->allow_empty) { // we need to do a double-check $i = $parent_index; diff --git a/tests/HTMLPurifier/ChildDefTest.php b/tests/HTMLPurifier/ChildDefTest.php index e86d2b4c..4e2f651a 100644 --- a/tests/HTMLPurifier/ChildDefTest.php +++ b/tests/HTMLPurifier/ChildDefTest.php @@ -16,15 +16,21 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase parent::UnitTestCase(); } - function assertSeries($inputs, $expect, $def) { + function assertSeries($inputs, $expect, $def, $context = array()) { foreach ($inputs as $i => $input) { $tokens = $this->lex->tokenizeHTML($input); - $result = $def->validateChildren($tokens); + + if (isset($context[$i])) { + $result = $def->validateChildren($tokens, $context[$i]); + } else { + $result = $def->validateChildren($tokens); + } + if (is_bool($expect[$i])) { $this->assertIdentical($expect[$i], $result); } else { $result_html = $this->gen->generateFromTokens($result); - $this->assertEqual($expect[$i], $result_html); + $this->assertEqual($expect[$i], $result_html, "Test $i: %s"); paintIf($result_html, $result_html != $expect[$i]); } } @@ -124,6 +130,29 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase $this->assertSeries($inputs, $expect, $def); } + function test_chameleon() { + + $def = new HTMLPurifier_ChildDef_Chameleon( + 'b | i', // allowed only when in inline context + 'b | i | div' // allowed only when in block context + ); + + $inputs[0] = 'Allowed.'; + $expect[0] = true; + $context[0] = 'inline'; + + $inputs[1] = '
Not allowed.
'; + $expect[1] = ''; + $context[1] = 'inline'; + + $inputs[2] = '
Allowed.
'; + $expect[2] = true; + $context[2] = 'block'; + + $this->assertSeries($inputs, $expect, $def, $context); + + } + } ?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index 7b4f5bc0..064b0f8f 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -61,6 +61,10 @@ class HTMLPurifier_Strategy_FixNestingTest $inputs[10] = '
'; $expect[10] = ''; + // block in inline ins not allowed + $inputs[11] = '
Not allowed!
'; + $expect[11] = '<div>Not allowed!</div>'; + $this->assertStrategyWorks($strategy, $inputs, $expect); }