From 6a6c0ed5d7bc0eeab6174c4b9accd4cbe3ea32e6 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 22 Mar 2011 00:26:41 +0000 Subject: [PATCH] Don't autoclose if no parents support the tag. Signed-off-by: Edward Z. Yang --- NEWS | 4 + .../HTMLPurifier/Strategy/MakeWellFormed.php | 101 ++++++++++++++---- .../Strategy/MakeWellFormedTest.php | 7 ++ 3 files changed, 90 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index 486860d0..e198c9ce 100644 --- a/NEWS +++ b/NEWS @@ -45,6 +45,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier for reporting. - Fix harmless notice from indexing into empty string. Thanks Matthijs Kooijman for reporting. +- Don't autoclose no parent elements are able to support the element + that triggered the autoclose. In particular fixes strange behavior + of stray
  • tags. Thanks pkuliga@gmail.com for reporting and + Neike Taika-Tessaro for debugging assistance. 4.2.0, released 2010-09-15 ! Added %Core.RemoveProcessingInstructions, which lets you remove diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index d3f01578..c7aa1bb8 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -2,6 +2,14 @@ /** * Takes tokens makes them well-formed (balance end tags, etc.) + * + * Specification of the armor attributes this strategy uses: + * + * - MakeWellFormed_TagClosedError: This armor field is used to + * suppress tag closed errors for certain tokens [TagClosedSuppress], + * in particular, if a tag was generated automatically by HTML + * Purifier, we may rely on our infrastructure to close it for us + * and shouldn't report an error to the user [TagClosedAuto]. */ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy { @@ -43,6 +51,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // local variables $generator = new HTMLPurifier_Generator($config, $context); $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + // used for autoclose early abortion + $global_parent_allowed_elements = array(); + if (isset($definition->info[$definition->info_parent])) { + // may be unset under testing circumstances + $global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config); + } $e = $context->get('ErrorCollector', true); $t = false; // token index $i = false; // injector index @@ -102,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // -- end INJECTOR -- - // a note on punting: + // a note on reprocessing: // In order to reduce code duplication, whenever some code needs // to make HTML changes in order to make things "correct", the // new HTML gets sent through the purifier, regardless of its @@ -149,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $top_nesting = array_pop($this->stack); $this->stack[] = $top_nesting; - // send error + // send error [TagClosedSuppress] if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); } @@ -211,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // ...unless they also have to close their parent if (!empty($this->stack)) { + // Performance note: you might think that it's rather + // inefficient, recalculating the autoclose information + // for every tag that a token closes (since when we + // do an autoclose, we push a new token into the + // stream and then /process/ that, before + // re-processing this token.) But this is + // necessary, because an injector can make an + // arbitrary transformations to the autoclosing + // tokens we introduce, so things may have changed + // in the meantime. Also, doing the inefficient thing is + // "easy" to reason about (for certain perverse definitions + // of "easy") + $parent = array_pop($this->stack); $this->stack[] = $parent; @@ -243,24 +270,51 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } if ($autoclose) { - // errors need to be updated - $new_token = new HTMLPurifier_Token_End($parent->name); - $new_token->start = $parent; - if ($carryover) { - $element = clone $parent; - $element->armor['MakeWellFormed_TagClosedError'] = true; - $element->carryover = true; - $this->processToken(array($new_token, $token, $element)); - } else { - $this->insertBefore($new_token); - } - if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { - if (!$carryover) { - $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); - } else { - $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + // check if this autoclose is doomed to fail + // (this rechecks $parent, which his harmless) + $autoclose_ok = isset($global_parent_allowed_elements[$token->name]); + if (!$autoclose_ok) { + foreach ($this->stack as $ancestor) { + $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config); + if (isset($elements[$token->name])) { + $autoclose_ok = true; + break; + } + if ($definition->info[$token->name]->wrap) { + $wrapname = $definition->info[$token->name]->wrap; + $wrapdef = $definition->info[$wrapname]; + $wrap_elements = $wrapdef->child->getAllowedElements($config); + if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) { + $autoclose_ok = true; + break; + } + } } } + if ($autoclose_ok) { + // errors need to be updated + $new_token = new HTMLPurifier_Token_End($parent->name); + $new_token->start = $parent; + if ($carryover) { + $element = clone $parent; + // [TagClosedAuto] + $element->armor['MakeWellFormed_TagClosedError'] = true; + $element->carryover = true; + $this->processToken(array($new_token, $token, $element)); + } else { + $this->insertBefore($new_token); + } + // [TagClosedSuppress] + if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { + if (!$carryover) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); + } else { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + } + } + } else { + $this->remove(); + } $reprocess = true; continue; } @@ -366,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($e) { for ($j = $c - 1; $j > 0; $j--) { // notice we exclude $j == 0, i.e. the current ending tag, from - // the errors... + // the errors... [TagClosedSuppress] if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); } @@ -381,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $new_token->start = $skipped_tags[$j]; array_unshift($replace, $new_token); if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { + // [TagClosedAuto] $element = clone $skipped_tags[$j]; $element->carryover = true; $element->armor['MakeWellFormed_TagClosedError'] = true; @@ -449,7 +504,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } /** - * Inserts a token before the current token. Cursor now points to this token + * Inserts a token before the current token. Cursor now points to + * this token. You must reprocess after this. */ private function insertBefore($token) { array_splice($this->tokens, $this->t, 0, array($token)); @@ -457,14 +513,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy /** * Removes current token. Cursor now points to new token occupying previously - * occupied space. + * occupied space. You must reprocess after this. */ private function remove() { array_splice($this->tokens, $this->t, 1); } /** - * Swap current token with new token. Cursor points to new token (no change). + * Swap current token with new token. Cursor points to new token (no + * change). You must reprocess after this. */ private function swap($token) { $this->tokens[$this->t] = $token; diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index 34fc1831..75ee646e 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -137,6 +137,13 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn ); } + function testNoAutocloseIfNoParentsCanAccomodateTag() { + $this->assertResult( + '
  • foo
  • ', + '
    foo
    ' + ); + } + } // vim: et sw=4 sts=4