0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-19 18:55:19 +00:00

Make Strategy_MakeWellFormed operate in place.

Previously, MakeWellFormed processed tokens and appended them onto an output
array, which was presumably immutable and inaccessible to Injectors. By
having MakeWellFormed operate directly on the input array, the strategy
saves memory and will also allow for a rewind implementation, as a unifying
the two arrays allows Injectors to easily determine an index behind them they'd
like to reset state to.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-06-27 01:33:48 -04:00
parent a5ceb1e22a
commit f8b47c64dd
2 changed files with 73 additions and 22 deletions

2
NEWS
View File

@ -15,6 +15,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! %Output.AttrSort for when you need your attributes in alphabetical order to ! %Output.AttrSort for when you need your attributes in alphabetical order to
deal with a bug in FCKEditor. Requested by frank farmer. deal with a bug in FCKEditor. Requested by frank farmer.
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith. ! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
for more interesting filter-backtracking
3.1.1, released 2008-06-19 3.1.1, released 2008-06-19
# %URI.Munge now, by default, does not munge resources (for example, <img src="">) # %URI.Munge now, by default, does not munge resources (for example, <img src="">)

View File

@ -17,7 +17,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
// local variables // local variables
$result = array();
$generator = new HTMLPurifier_Generator($config, $context); $generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$e = $context->get('ErrorCollector', true); $e = $context->get('ErrorCollector', true);
@ -26,7 +25,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->currentNesting = array(); $this->currentNesting = array();
$this->inputIndex = false; $this->inputIndex = false;
$this->inputTokens =& $tokens; $this->inputTokens =& $tokens;
$this->outputTokens =& $result;
// context variables // context variables
$context->register('CurrentNesting', $this->currentNesting); $context->register('CurrentNesting', $this->currentNesting);
@ -88,7 +86,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if all goes well, this token will be passed through unharmed // if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex]; $token = $tokens[$this->inputIndex];
//echo '<hr>';
//printTokens($tokens, $this->inputIndex); //printTokens($tokens, $this->inputIndex);
//var_dump($this->currentNesting);
foreach ($this->injectors as $injector) { foreach ($this->injectors as $injector) {
if ($injector->skip > 0) $injector->skip--; if ($injector->skip > 0) $injector->skip--;
@ -142,9 +142,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// the parent // the parent
if (!isset($parent_info->child->elements[$token->name])) { if (!isset($parent_info->child->elements[$token->name])) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// close the parent, then re-loop to reprocess token // insert parent end tag before this tag;
$result[] = new HTMLPurifier_Token_End($parent->name); // end tag isn't processed, but this tag is processed again
$this->inputIndex--; $this->insertBefore(new HTMLPurifier_Token_End($parent->name));
continue; continue;
} }
@ -167,17 +167,21 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
} }
// sanity check: we should be dealing with a closing tag // sanity check: we should be dealing with a closing tag
if (!$token instanceof HTMLPurifier_Token_End) continue; if (!$token instanceof HTMLPurifier_Token_End) {
$this->remove();
continue;
}
// make sure that we have something open // make sure that we have something open
if (empty($this->currentNesting)) { if (empty($this->currentNesting)) {
if ($escape_invalid_tags) { if ($escape_invalid_tags) {
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
$result[] = new HTMLPurifier_Token_Text( $this->swap(new HTMLPurifier_Token_Text(
$generator->generateFromToken($token) $generator->generateFromToken($token)
); ));
} elseif ($e) { } else {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); $this->remove();
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
} }
continue; continue;
} }
@ -185,7 +189,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// first, check for the simplest case: everything closes neatly // first, check for the simplest case: everything closes neatly
$current_parent = array_pop($this->currentNesting); $current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) { if ($current_parent->name == $token->name) {
$result[] = $token;
foreach ($this->injectors as $i => $injector) { foreach ($this->injectors as $i => $injector) {
$injector->notifyEnd($token); $injector->notifyEnd($token);
} }
@ -213,29 +216,33 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// we still didn't find the tag, so remove // we still didn't find the tag, so remove
if ($skipped_tags === false) { if ($skipped_tags === false) {
if ($escape_invalid_tags) { if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text( $this->swap(new HTMLPurifier_Token_Text(
$generator->generateFromToken($token) $generator->generateFromToken($token)
); ));
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
} elseif ($e) { } else {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); $this->remove();
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
} }
continue; continue;
} }
// okay, we found it, close all the skipped tags // okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed // note that skipped tags contains the element we need closed
$this->remove();
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) { for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
// please don't redefine $i! // please don't redefine $i!
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
} }
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
$this->insertAfter($new_token);
//printTokens($tokens, $this->inputIndex);
//var_dump($this->currentNesting);
foreach ($this->injectors as $injector) { foreach ($this->injectors as $injector) {
$injector->notifyEnd($new_token); $injector->notifyEnd($new_token);
} }
} }
} }
$context->destroy('CurrentNesting'); $context->destroy('CurrentNesting');
@ -252,7 +259,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
} }
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); // instead of splice, since we know this is the end
$tokens[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
foreach ($this->injectors as $injector) { foreach ($this->injectors as $injector) {
$injector->notifyEnd($new_token); $injector->notifyEnd($new_token);
} }
@ -261,11 +269,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
unset($this->outputTokens, $this->injectors, $this->currentInjector, unset($this->outputTokens, $this->injectors, $this->currentInjector,
$this->currentNesting, $this->inputTokens, $this->inputIndex); $this->currentNesting, $this->inputTokens, $this->inputIndex);
return $tokens;
return $result;
} }
function processToken($token, $config, $context) { /**
* Inserts a token before the current token. Cursor now points to this token.
*/
protected function insertBefore($token) {
array_splice($this->inputTokens, $this->inputIndex, 0, array($token));
}
/**
* Inserts a token after the current token. Cursor now points to this token.
*/
protected function insertAfter($token) {
array_splice($this->inputTokens, ++$this->inputIndex, 0, array($token));
}
/**
* Removes current token. Cursor now points to previous token.
*/
protected function remove() {
array_splice($this->inputTokens, $this->inputIndex--, 1);
}
/**
* Swap current token with new token. Cursor points to new token (no change).
*/
protected function swap($token) {
array_splice($this->inputTokens, $this->inputIndex, 1, array($token));
}
/**
* Processes arbitrary token values for complicated substitution patterns.
* In general:
*
* If $token is an array, it is a list of tokens to substitute for the
* current token. These tokens then get individually processed.
*
* If $token is a regular token, it is swapped with the current token,
* and the stack is updated.
*
* If $token is false, the current token is deleted.
*/
protected function processToken($token, $config, $context) {
if (is_array($token)) { if (is_array($token)) {
// the original token was overloaded by an injector, time // the original token was overloaded by an injector, time
// to some fancy acrobatics // to some fancy acrobatics
@ -289,12 +336,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
} }
} elseif ($token) { } elseif ($token) {
// regular case // regular case
$this->outputTokens[] = $token; $this->swap($token);
if ($token instanceof HTMLPurifier_Token_Start) { if ($token instanceof HTMLPurifier_Token_Start) {
$this->currentNesting[] = $token; $this->currentNesting[] = $token;
} elseif ($token instanceof HTMLPurifier_Token_End) { } elseif ($token instanceof HTMLPurifier_Token_End) {
array_pop($this->currentNesting); // not actually used array_pop($this->currentNesting); // not actually used
} }
} else {
$this->remove();
} }
} }