mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-23 14:27:02 +00:00
Make Strategy_MakeWellFormed operate in place.
Previously, MakeWellFormed processed tokens and appended them onto an output array, which was presumably immutable and inaccessible to Injectors. By having MakeWellFormed operate directly on the input array, the strategy saves memory and will also allow for a rewind implementation, as a unifying the two arrays allows Injectors to easily determine an index behind them they'd like to reset state to. Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
parent
a5ceb1e22a
commit
f8b47c64dd
2
NEWS
2
NEWS
@ -15,6 +15,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
! %Output.AttrSort for when you need your attributes in alphabetical order to
|
! %Output.AttrSort for when you need your attributes in alphabetical order to
|
||||||
deal with a bug in FCKEditor. Requested by frank farmer.
|
deal with a bug in FCKEditor. Requested by frank farmer.
|
||||||
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
|
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
|
||||||
|
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
|
||||||
|
for more interesting filter-backtracking
|
||||||
|
|
||||||
3.1.1, released 2008-06-19
|
3.1.1, released 2008-06-19
|
||||||
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
|
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
|
||||||
|
@ -17,7 +17,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
|
|
||||||
// local variables
|
// local variables
|
||||||
$result = array();
|
|
||||||
$generator = new HTMLPurifier_Generator($config, $context);
|
$generator = new HTMLPurifier_Generator($config, $context);
|
||||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||||
$e = $context->get('ErrorCollector', true);
|
$e = $context->get('ErrorCollector', true);
|
||||||
@ -26,7 +25,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$this->currentNesting = array();
|
$this->currentNesting = array();
|
||||||
$this->inputIndex = false;
|
$this->inputIndex = false;
|
||||||
$this->inputTokens =& $tokens;
|
$this->inputTokens =& $tokens;
|
||||||
$this->outputTokens =& $result;
|
|
||||||
|
|
||||||
// context variables
|
// context variables
|
||||||
$context->register('CurrentNesting', $this->currentNesting);
|
$context->register('CurrentNesting', $this->currentNesting);
|
||||||
@ -88,7 +86,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// if all goes well, this token will be passed through unharmed
|
// if all goes well, this token will be passed through unharmed
|
||||||
$token = $tokens[$this->inputIndex];
|
$token = $tokens[$this->inputIndex];
|
||||||
|
|
||||||
|
//echo '<hr>';
|
||||||
//printTokens($tokens, $this->inputIndex);
|
//printTokens($tokens, $this->inputIndex);
|
||||||
|
//var_dump($this->currentNesting);
|
||||||
|
|
||||||
foreach ($this->injectors as $injector) {
|
foreach ($this->injectors as $injector) {
|
||||||
if ($injector->skip > 0) $injector->skip--;
|
if ($injector->skip > 0) $injector->skip--;
|
||||||
@ -142,9 +142,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// the parent
|
// the parent
|
||||||
if (!isset($parent_info->child->elements[$token->name])) {
|
if (!isset($parent_info->child->elements[$token->name])) {
|
||||||
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||||
// close the parent, then re-loop to reprocess token
|
// insert parent end tag before this tag;
|
||||||
$result[] = new HTMLPurifier_Token_End($parent->name);
|
// end tag isn't processed, but this tag is processed again
|
||||||
$this->inputIndex--;
|
$this->insertBefore(new HTMLPurifier_Token_End($parent->name));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,17 +167,21 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sanity check: we should be dealing with a closing tag
|
// sanity check: we should be dealing with a closing tag
|
||||||
if (!$token instanceof HTMLPurifier_Token_End) continue;
|
if (!$token instanceof HTMLPurifier_Token_End) {
|
||||||
|
$this->remove();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// make sure that we have something open
|
// make sure that we have something open
|
||||||
if (empty($this->currentNesting)) {
|
if (empty($this->currentNesting)) {
|
||||||
if ($escape_invalid_tags) {
|
if ($escape_invalid_tags) {
|
||||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$this->swap(new HTMLPurifier_Token_Text(
|
||||||
$generator->generateFromToken($token)
|
$generator->generateFromToken($token)
|
||||||
);
|
));
|
||||||
} elseif ($e) {
|
} else {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
$this->remove();
|
||||||
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -185,7 +189,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// first, check for the simplest case: everything closes neatly
|
// first, check for the simplest case: everything closes neatly
|
||||||
$current_parent = array_pop($this->currentNesting);
|
$current_parent = array_pop($this->currentNesting);
|
||||||
if ($current_parent->name == $token->name) {
|
if ($current_parent->name == $token->name) {
|
||||||
$result[] = $token;
|
|
||||||
foreach ($this->injectors as $i => $injector) {
|
foreach ($this->injectors as $i => $injector) {
|
||||||
$injector->notifyEnd($token);
|
$injector->notifyEnd($token);
|
||||||
}
|
}
|
||||||
@ -213,29 +216,33 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// we still didn't find the tag, so remove
|
// we still didn't find the tag, so remove
|
||||||
if ($skipped_tags === false) {
|
if ($skipped_tags === false) {
|
||||||
if ($escape_invalid_tags) {
|
if ($escape_invalid_tags) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$this->swap(new HTMLPurifier_Token_Text(
|
||||||
$generator->generateFromToken($token)
|
$generator->generateFromToken($token)
|
||||||
);
|
));
|
||||||
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
||||||
} elseif ($e) {
|
} else {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
$this->remove();
|
||||||
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// okay, we found it, close all the skipped tags
|
// okay, we found it, close all the skipped tags
|
||||||
// note that skipped tags contains the element we need closed
|
// note that skipped tags contains the element we need closed
|
||||||
|
$this->remove();
|
||||||
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
|
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
|
||||||
// please don't redefine $i!
|
// please don't redefine $i!
|
||||||
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
||||||
}
|
}
|
||||||
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
$new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||||
|
$this->insertAfter($new_token);
|
||||||
|
//printTokens($tokens, $this->inputIndex);
|
||||||
|
//var_dump($this->currentNesting);
|
||||||
foreach ($this->injectors as $injector) {
|
foreach ($this->injectors as $injector) {
|
||||||
$injector->notifyEnd($new_token);
|
$injector->notifyEnd($new_token);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$context->destroy('CurrentNesting');
|
$context->destroy('CurrentNesting');
|
||||||
@ -252,7 +259,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
||||||
}
|
}
|
||||||
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
// instead of splice, since we know this is the end
|
||||||
|
$tokens[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
||||||
foreach ($this->injectors as $injector) {
|
foreach ($this->injectors as $injector) {
|
||||||
$injector->notifyEnd($new_token);
|
$injector->notifyEnd($new_token);
|
||||||
}
|
}
|
||||||
@ -261,11 +269,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
unset($this->outputTokens, $this->injectors, $this->currentInjector,
|
unset($this->outputTokens, $this->injectors, $this->currentInjector,
|
||||||
$this->currentNesting, $this->inputTokens, $this->inputIndex);
|
$this->currentNesting, $this->inputTokens, $this->inputIndex);
|
||||||
|
return $tokens;
|
||||||
return $result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function processToken($token, $config, $context) {
|
/**
|
||||||
|
* Inserts a token before the current token. Cursor now points to this token.
|
||||||
|
*/
|
||||||
|
protected function insertBefore($token) {
|
||||||
|
array_splice($this->inputTokens, $this->inputIndex, 0, array($token));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts a token after the current token. Cursor now points to this token.
|
||||||
|
*/
|
||||||
|
protected function insertAfter($token) {
|
||||||
|
array_splice($this->inputTokens, ++$this->inputIndex, 0, array($token));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes current token. Cursor now points to previous token.
|
||||||
|
*/
|
||||||
|
protected function remove() {
|
||||||
|
array_splice($this->inputTokens, $this->inputIndex--, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Swap current token with new token. Cursor points to new token (no change).
|
||||||
|
*/
|
||||||
|
protected function swap($token) {
|
||||||
|
array_splice($this->inputTokens, $this->inputIndex, 1, array($token));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes arbitrary token values for complicated substitution patterns.
|
||||||
|
* In general:
|
||||||
|
*
|
||||||
|
* If $token is an array, it is a list of tokens to substitute for the
|
||||||
|
* current token. These tokens then get individually processed.
|
||||||
|
*
|
||||||
|
* If $token is a regular token, it is swapped with the current token,
|
||||||
|
* and the stack is updated.
|
||||||
|
*
|
||||||
|
* If $token is false, the current token is deleted.
|
||||||
|
*/
|
||||||
|
protected function processToken($token, $config, $context) {
|
||||||
if (is_array($token)) {
|
if (is_array($token)) {
|
||||||
// the original token was overloaded by an injector, time
|
// the original token was overloaded by an injector, time
|
||||||
// to some fancy acrobatics
|
// to some fancy acrobatics
|
||||||
@ -289,12 +336,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
} elseif ($token) {
|
} elseif ($token) {
|
||||||
// regular case
|
// regular case
|
||||||
$this->outputTokens[] = $token;
|
$this->swap($token);
|
||||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||||
$this->currentNesting[] = $token;
|
$this->currentNesting[] = $token;
|
||||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||||
array_pop($this->currentNesting); // not actually used
|
array_pop($this->currentNesting); // not actually used
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
$this->remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user