From e013bc9126a97aff2feb8d4786a77e4c280ef666 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 1 Aug 2008 20:52:06 -0400 Subject: [PATCH] Fix bug involving autoclose and inline elements in strict
. The newest autoclose code uses the elements property in whether or not an element should be closed by a particular tag. The heuristic is simple; if the element doesn't allow that tag as a child, it closes the parent container. This doesn't work, however, with
, which while not allowing inline styles under Strict doctypes, requires them to be passed through MakeWellFormed. The fix was to transition MakeWellFormed to call a method to retrieve the elements, and then have StrictBlockquote implement a special version of this method. Future versions of HTML Purifier may be more flexible in this regard--further study of the HTML5 specification is required. Signed-off-by: Edward Z. Yang --- NEWS | 1 + library/HTMLPurifier/ChildDef.php | 8 +++++ .../ChildDef/StrictBlockquote.php | 31 ++++++++++++++----- .../HTMLPurifier/Strategy/MakeWellFormed.php | 2 +- .../HTMLT/strict-blockquote-with-inline.htmlt | 6 ++++ .../Strategy/MakeWellFormedTest.php | 9 ++++++ 6 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 tests/HTMLPurifier/HTMLT/strict-blockquote-with-inline.htmlt diff --git a/NEWS b/NEWS index 7aacdc20..4f1c09e5 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier Requested by Chris. - Fix error in documentation regarding %Filter.ExtractStyleBlocks - Prevent ]]> from triggering %Core.ConvertDocumentToFragment +- Fix bug with inline elements in blockquotes conflicting with strict doctype . Strategy_MakeWellFormed now operates in-place, saving memory and allowing for more interesting filter-backtracking . New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php index 0cc34567..38f3692f 100644 --- a/library/HTMLPurifier/ChildDef.php +++ b/library/HTMLPurifier/ChildDef.php @@ -24,6 +24,14 @@ abstract class HTMLPurifier_ChildDef */ public $elements = array(); + /** + * Get lookup of tag names that should not close this element automatically. + * All other elements will do so. + */ + public function getNonAutoCloseElements($config) { + return $this->elements; + } + /** * Validates nodes according to definition and returns modification. * diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php index ecdb17ff..ec0890d9 100644 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -10,16 +10,19 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi public $allow_empty = true; public $type = 'strictblockquote'; protected $init = false; + + /** + * @note We don't want MakeWellFormed to auto-close inline elements since + * they might be allowed. + */ + public function getNonAutoCloseElements($config) { + $this->init($config); + return $this->fake_elements; + } + public function validateChildren($tokens_of_children, $config, $context) { - $def = $config->getHTMLDefinition(); - if (!$this->init) { - // allow all inline elements - $this->real_elements = $this->elements; - $this->fake_elements = $def->info_content_sets['Flow']; - $this->fake_elements['#PCDATA'] = true; - $this->init = true; - } + $this->init($config); // trick the parent class into thinking it allows more $this->elements = $this->fake_elements; @@ -29,6 +32,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi if ($result === false) return array(); if ($result === true) $result = $tokens_of_children; + $def = $config->getHTMLDefinition(); $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); $is_inline = false; @@ -68,5 +72,16 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi if ($is_inline) $ret[] = $block_wrap_end; return $ret; } + + private function init($config) { + if (!$this->init) { + $def = $config->getHTMLDefinition(); + // allow all inline elements + $this->real_elements = $this->elements; + $this->fake_elements = $def->info_content_sets['Flow']; + $this->fake_elements['#PCDATA'] = true; + $this->init = true; + } + } } diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index c0e9d3a2..cf68f6d8 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -140,7 +140,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $parent = array_pop($this->currentNesting); if (isset($definition->info[$parent->name])) { - $elements = $definition->info[$parent->name]->child->elements; + $elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config); $autoclose = !isset($elements[$token->name]); } else { $autoclose = false; diff --git a/tests/HTMLPurifier/HTMLT/strict-blockquote-with-inline.htmlt b/tests/HTMLPurifier/HTMLT/strict-blockquote-with-inline.htmlt new file mode 100644 index 00000000..d1825b95 --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/strict-blockquote-with-inline.htmlt @@ -0,0 +1,6 @@ +--INI-- +HTML.Doctype = "XHTML 1.0 Strict" +--HTML-- +
Illegal contents
+--EXPECT-- +

Illegal contents

diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index f1399b23..816bdbca 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -93,5 +93,14 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn ); } + function testBlockquoteWithInline() { + $this->config->set('HTML', 'Doctype', 'XHTML 1.0 Strict'); + $this->assertResult( + // This is actually invalid, but will be fixed by + // ChildDef_StrictBlockquote + '
foobar
' + ); + } + }