From f8b47c64ddfc8d3b88ebcffcaf25a85474dd4625 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 27 Jun 2008 01:33:48 -0400 Subject: [PATCH] Make Strategy_MakeWellFormed operate in place. Previously, MakeWellFormed processed tokens and appended them onto an output array, which was presumably immutable and inaccessible to Injectors. By having MakeWellFormed operate directly on the input array, the strategy saves memory and will also allow for a rewind implementation, as a unifying the two arrays allows Injectors to easily determine an index behind them they'd like to reset state to. Signed-off-by: Edward Z. Yang --- NEWS | 2 + .../HTMLPurifier/Strategy/MakeWellFormed.php | 93 ++++++++++++++----- 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index 86302c9f..40c9dd9a 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! %Output.AttrSort for when you need your attributes in alphabetical order to deal with a bug in FCKEditor. Requested by frank farmer. ! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith. +. Strategy_MakeWellFormed now operates in-place, saving memory and allowing + for more interesting filter-backtracking 3.1.1, released 2008-06-19 # %URI.Munge now, by default, does not munge resources (for example, ) diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index 1ca62711..b0001f0b 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -17,7 +17,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $definition = $config->getHTMLDefinition(); // local variables - $result = array(); $generator = new HTMLPurifier_Generator($config, $context); $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $e = $context->get('ErrorCollector', true); @@ -26,7 +25,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->currentNesting = array(); $this->inputIndex = false; $this->inputTokens =& $tokens; - $this->outputTokens =& $result; // context variables $context->register('CurrentNesting', $this->currentNesting); @@ -88,7 +86,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; + //echo '
'; //printTokens($tokens, $this->inputIndex); + //var_dump($this->currentNesting); foreach ($this->injectors as $injector) { if ($injector->skip > 0) $injector->skip--; @@ -142,9 +142,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // the parent if (!isset($parent_info->child->elements[$token->name])) { if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); - // close the parent, then re-loop to reprocess token - $result[] = new HTMLPurifier_Token_End($parent->name); - $this->inputIndex--; + // insert parent end tag before this tag; + // end tag isn't processed, but this tag is processed again + $this->insertBefore(new HTMLPurifier_Token_End($parent->name)); continue; } @@ -167,17 +167,21 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } // sanity check: we should be dealing with a closing tag - if (!$token instanceof HTMLPurifier_Token_End) continue; + if (!$token instanceof HTMLPurifier_Token_End) { + $this->remove(); + continue; + } // make sure that we have something open if (empty($this->currentNesting)) { if ($escape_invalid_tags) { if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); - $result[] = new HTMLPurifier_Token_Text( + $this->swap(new HTMLPurifier_Token_Text( $generator->generateFromToken($token) - ); - } elseif ($e) { - $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); + )); + } else { + $this->remove(); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); } continue; } @@ -185,7 +189,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // first, check for the simplest case: everything closes neatly $current_parent = array_pop($this->currentNesting); if ($current_parent->name == $token->name) { - $result[] = $token; foreach ($this->injectors as $i => $injector) { $injector->notifyEnd($token); } @@ -213,29 +216,33 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // we still didn't find the tag, so remove if ($skipped_tags === false) { if ($escape_invalid_tags) { - $result[] = new HTMLPurifier_Token_Text( + $this->swap(new HTMLPurifier_Token_Text( $generator->generateFromToken($token) - ); + )); if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); - } elseif ($e) { - $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); + } else { + $this->remove(); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); } continue; } // okay, we found it, close all the skipped tags // note that skipped tags contains the element we need closed + $this->remove(); for ($i = count($skipped_tags) - 1; $i >= 0; $i--) { // please don't redefine $i! if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); } - $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + $this->insertAfter($new_token); + //printTokens($tokens, $this->inputIndex); + //var_dump($this->currentNesting); foreach ($this->injectors as $injector) { $injector->notifyEnd($new_token); } } - } $context->destroy('CurrentNesting'); @@ -252,7 +259,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); } - $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); + // instead of splice, since we know this is the end + $tokens[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); foreach ($this->injectors as $injector) { $injector->notifyEnd($new_token); } @@ -261,11 +269,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy unset($this->outputTokens, $this->injectors, $this->currentInjector, $this->currentNesting, $this->inputTokens, $this->inputIndex); - - return $result; + return $tokens; } - function processToken($token, $config, $context) { + /** + * Inserts a token before the current token. Cursor now points to this token. + */ + protected function insertBefore($token) { + array_splice($this->inputTokens, $this->inputIndex, 0, array($token)); + } + + /** + * Inserts a token after the current token. Cursor now points to this token. + */ + protected function insertAfter($token) { + array_splice($this->inputTokens, ++$this->inputIndex, 0, array($token)); + } + + /** + * Removes current token. Cursor now points to previous token. + */ + protected function remove() { + array_splice($this->inputTokens, $this->inputIndex--, 1); + } + + /** + * Swap current token with new token. Cursor points to new token (no change). + */ + protected function swap($token) { + array_splice($this->inputTokens, $this->inputIndex, 1, array($token)); + } + + /** + * Processes arbitrary token values for complicated substitution patterns. + * In general: + * + * If $token is an array, it is a list of tokens to substitute for the + * current token. These tokens then get individually processed. + * + * If $token is a regular token, it is swapped with the current token, + * and the stack is updated. + * + * If $token is false, the current token is deleted. + */ + protected function processToken($token, $config, $context) { if (is_array($token)) { // the original token was overloaded by an injector, time // to some fancy acrobatics @@ -289,12 +336,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } } elseif ($token) { // regular case - $this->outputTokens[] = $token; + $this->swap($token); if ($token instanceof HTMLPurifier_Token_Start) { $this->currentNesting[] = $token; } elseif ($token instanceof HTMLPurifier_Token_End) { array_pop($this->currentNesting); // not actually used } + } else { + $this->remove(); } }