mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
Use a Zipper to process MakeWellFormed, removing quadratic behavior.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
82bcc62058
commit
8f401f769e
@ -503,7 +503,7 @@
|
|||||||
</directive>
|
</directive>
|
||||||
<directive id="Core.EscapeInvalidTags">
|
<directive id="Core.EscapeInvalidTags">
|
||||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
||||||
<line>66</line>
|
<line>72</line>
|
||||||
</file>
|
</file>
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
<line>26</line>
|
<line>26</line>
|
||||||
|
@ -35,19 +35,16 @@ abstract class HTMLPurifier_Injector
|
|||||||
protected $currentNesting;
|
protected $currentNesting;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reference to InputTokens variable in Context. This is an array
|
* Reference to current token.
|
||||||
* list of the input tokens that are being processed.
|
* @type HTMLPurifier_Token
|
||||||
* @type array
|
|
||||||
*/
|
*/
|
||||||
protected $inputTokens;
|
protected $currentToken;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reference to InputIndex variable in Context. This is an integer
|
* Reference to InputZipper variable in Context.
|
||||||
* array index for $this->inputTokens that indicates what token
|
* @type HTMLPurifier_Zipper
|
||||||
* is currently being processed.
|
|
||||||
* @type int
|
|
||||||
*/
|
*/
|
||||||
protected $inputIndex;
|
protected $inputZipper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Array of elements and attributes this injector creates and therefore
|
* Array of elements and attributes this injector creates and therefore
|
||||||
@ -58,33 +55,33 @@ abstract class HTMLPurifier_Injector
|
|||||||
public $needed = array();
|
public $needed = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Index of inputTokens to rewind to.
|
* Number of elements to rewind backwards (relative).
|
||||||
* @type bool|int
|
* @type bool|int
|
||||||
*/
|
*/
|
||||||
protected $rewind = false;
|
protected $rewindOffset = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rewind to a spot to re-perform processing. This is useful if you
|
* Rewind to a spot to re-perform processing. This is useful if you
|
||||||
* deleted a node, and now need to see if this change affected any
|
* deleted a node, and now need to see if this change affected any
|
||||||
* earlier nodes. Rewinding does not affect other injectors, and can
|
* earlier nodes. Rewinding does not affect other injectors, and can
|
||||||
* result in infinite loops if not used carefully.
|
* result in infinite loops if not used carefully.
|
||||||
* @param bool|int $index
|
* @param bool|int $offset
|
||||||
* @warning HTML Purifier will prevent you from fast-forwarding with this
|
* @warning HTML Purifier will prevent you from fast-forwarding with this
|
||||||
* function.
|
* function.
|
||||||
*/
|
*/
|
||||||
public function rewind($index)
|
public function rewindOffset($offset)
|
||||||
{
|
{
|
||||||
$this->rewind = $index;
|
$this->rewindOffset = $offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves rewind, and then unsets it.
|
* Retrieves rewind offset, and then unsets it.
|
||||||
* @return bool|int
|
* @return bool|int
|
||||||
*/
|
*/
|
||||||
public function getRewind()
|
public function getRewindOffset()
|
||||||
{
|
{
|
||||||
$r = $this->rewind;
|
$r = $this->rewindOffset;
|
||||||
$this->rewind = false;
|
$this->rewindOffset = false;
|
||||||
return $r;
|
return $r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,8 +105,8 @@ abstract class HTMLPurifier_Injector
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
$this->currentNesting =& $context->get('CurrentNesting');
|
$this->currentNesting =& $context->get('CurrentNesting');
|
||||||
$this->inputTokens =& $context->get('InputTokens');
|
$this->currentToken =& $context->get('CurrentToken');
|
||||||
$this->inputIndex =& $context->get('InputIndex');
|
$this->inputZipper =& $context->get('InputZipper');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,14 +180,14 @@ abstract class HTMLPurifier_Injector
|
|||||||
protected function forward(&$i, &$current)
|
protected function forward(&$i, &$current)
|
||||||
{
|
{
|
||||||
if ($i === null) {
|
if ($i === null) {
|
||||||
$i = $this->inputIndex + 1;
|
$i = count($this->inputZipper->back) - 1;
|
||||||
} else {
|
} else {
|
||||||
$i++;
|
$i--;
|
||||||
}
|
}
|
||||||
if (!isset($this->inputTokens[$i])) {
|
if ($i < 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
$current = $this->inputTokens[$i];
|
$current = $this->inputZipper->back[$i];
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,35 +234,17 @@ abstract class HTMLPurifier_Injector
|
|||||||
protected function backward(&$i, &$current)
|
protected function backward(&$i, &$current)
|
||||||
{
|
{
|
||||||
if ($i === null) {
|
if ($i === null) {
|
||||||
$i = $this->inputIndex - 1;
|
$i = count($this->inputZipper->front) - 1;
|
||||||
} else {
|
} else {
|
||||||
$i--;
|
$i--;
|
||||||
}
|
}
|
||||||
if ($i < 0) {
|
if ($i < 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
$current = $this->inputTokens[$i];
|
$current = $this->inputZipper->front[$i];
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the iterator at the current position. Use in a do {} while;
|
|
||||||
* loop to force the _forward and _backward functions to start at the
|
|
||||||
* current location.
|
|
||||||
* @warning Please prevent previous references from interfering with this
|
|
||||||
* functions by setting $i = null beforehand!
|
|
||||||
* @param int $i Current integer index variable for inputTokens
|
|
||||||
* @param HTMLPurifier_Token $current Current token variable.
|
|
||||||
* Do NOT use $token, as that variable is also a reference
|
|
||||||
*/
|
|
||||||
protected function current(&$i, &$current)
|
|
||||||
{
|
|
||||||
if ($i === null) {
|
|
||||||
$i = $this->inputIndex;
|
|
||||||
}
|
|
||||||
$current = $this->inputTokens[$i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handler that is called when a text token is processed
|
* Handler that is called when a text token is processed
|
||||||
*/
|
*/
|
||||||
|
@ -307,13 +307,13 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
|||||||
*/
|
*/
|
||||||
private function _pLookAhead()
|
private function _pLookAhead()
|
||||||
{
|
{
|
||||||
$this->current($i, $current);
|
if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
|
||||||
if ($current instanceof HTMLPurifier_Token_Start) {
|
|
||||||
$nesting = 1;
|
$nesting = 1;
|
||||||
} else {
|
} else {
|
||||||
$nesting = 0;
|
$nesting = 0;
|
||||||
}
|
}
|
||||||
$ok = false;
|
$ok = false;
|
||||||
|
$i = null;
|
||||||
while ($this->forwardUntilEndToken($i, $current, $nesting)) {
|
while ($this->forwardUntilEndToken($i, $current, $nesting)) {
|
||||||
$result = $this->_checkNeedsP($current);
|
$result = $this->_checkNeedsP($current);
|
||||||
if ($result !== null) {
|
if ($result !== null) {
|
||||||
|
@ -57,8 +57,9 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$next = false;
|
$next = false;
|
||||||
for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
|
$deleted = 1; // the current tag
|
||||||
$next = $this->inputTokens[$i];
|
for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) {
|
||||||
|
$next = $this->inputZipper->back[$i];
|
||||||
if ($next instanceof HTMLPurifier_Token_Text) {
|
if ($next instanceof HTMLPurifier_Token_Text) {
|
||||||
if ($next->is_whitespace) {
|
if ($next->is_whitespace) {
|
||||||
continue;
|
continue;
|
||||||
@ -82,16 +83,16 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
|
|||||||
if (isset($token->attr['id']) || isset($token->attr['name'])) {
|
if (isset($token->attr['id']) || isset($token->attr['name'])) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$token = $i - $this->inputIndex + 1;
|
$token = $deleted + 1;
|
||||||
for ($b = $this->inputIndex - 1; $b > 0; $b--) {
|
for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) {
|
||||||
$prev = $this->inputTokens[$b];
|
$prev = $this->inputZipper->front[$b];
|
||||||
if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) {
|
if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// This is safe because we removed the token that triggered this.
|
// This is safe because we removed the token that triggered this.
|
||||||
$this->rewind($b - 1);
|
$this->rewindOffset($b+$deleted);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,10 +21,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
protected $tokens;
|
protected $tokens;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Current index in $tokens.
|
* Current token.
|
||||||
* @type int
|
* @type HTMLPurifier_Token
|
||||||
*/
|
*/
|
||||||
protected $t;
|
protected $token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Zipper managing the true state.
|
||||||
|
* @type HTMLPurifier_Zipper
|
||||||
|
*/
|
||||||
|
protected $zipper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Current nesting of elements.
|
* Current nesting of elements.
|
||||||
@ -67,23 +73,25 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// used for autoclose early abortion
|
// used for autoclose early abortion
|
||||||
$global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
|
$global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
|
||||||
$e = $context->get('ErrorCollector', true);
|
$e = $context->get('ErrorCollector', true);
|
||||||
$t = false; // token index
|
|
||||||
$i = false; // injector index
|
$i = false; // injector index
|
||||||
$token = false; // the current token
|
list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
|
||||||
|
if ($token === NULL) {
|
||||||
|
return array();
|
||||||
|
}
|
||||||
$reprocess = false; // whether or not to reprocess the same token
|
$reprocess = false; // whether or not to reprocess the same token
|
||||||
$stack = array();
|
$stack = array();
|
||||||
|
|
||||||
// member variables
|
// member variables
|
||||||
$this->stack =& $stack;
|
$this->stack =& $stack;
|
||||||
$this->t =& $t;
|
|
||||||
$this->tokens =& $tokens;
|
$this->tokens =& $tokens;
|
||||||
|
$this->token =& $token;
|
||||||
|
$this->zipper =& $zipper;
|
||||||
$this->config = $config;
|
$this->config = $config;
|
||||||
$this->context = $context;
|
$this->context = $context;
|
||||||
|
|
||||||
// context variables
|
// context variables
|
||||||
$context->register('CurrentNesting', $stack);
|
$context->register('CurrentNesting', $stack);
|
||||||
$context->register('InputIndex', $t);
|
$context->register('InputZipper', $zipper);
|
||||||
$context->register('InputTokens', $tokens);
|
|
||||||
$context->register('CurrentToken', $token);
|
$context->register('CurrentToken', $token);
|
||||||
|
|
||||||
// -- begin INJECTOR --
|
// -- begin INJECTOR --
|
||||||
@ -142,32 +150,28 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// punt ($reprocess = true; continue;) and it does that for us.
|
// punt ($reprocess = true; continue;) and it does that for us.
|
||||||
|
|
||||||
// isset is in loop because $tokens size changes during loop exec
|
// isset is in loop because $tokens size changes during loop exec
|
||||||
for ($t = 0;
|
for (;;
|
||||||
$t == 0 || isset($tokens[$t - 1]);
|
// only increment if we don't need to reprocess
|
||||||
// only increment if we don't need to reprocess
|
$reprocess ? $reprocess = false : $token = $zipper->next($token)) {
|
||||||
$reprocess ? $reprocess = false : $t++) {
|
|
||||||
|
|
||||||
// check for a rewind
|
// check for a rewind
|
||||||
if (is_int($i) && $i >= 0) {
|
if (is_int($i)) {
|
||||||
// possibility: disable rewinding if the current token has a
|
// possibility: disable rewinding if the current token has a
|
||||||
// rewind set on it already. This would offer protection from
|
// rewind set on it already. This would offer protection from
|
||||||
// infinite loop, but might hinder some advanced rewinding.
|
// infinite loop, but might hinder some advanced rewinding.
|
||||||
$rewind_to = $this->injectors[$i]->getRewind();
|
$rewind_offset = $this->injectors[$i]->getRewindOffset();
|
||||||
if (is_int($rewind_to) && $rewind_to < $t) {
|
if (is_int($rewind_offset)) {
|
||||||
if ($rewind_to < 0) {
|
for ($j = 0; $j < $rewind_offset; $j++) {
|
||||||
$rewind_to = 0;
|
if (empty($zipper->front)) break;
|
||||||
}
|
$token = $zipper->prev($token);
|
||||||
while ($t > $rewind_to) {
|
|
||||||
$t--;
|
|
||||||
$prev = $tokens[$t];
|
|
||||||
// indicate that other injectors should not process this token,
|
// indicate that other injectors should not process this token,
|
||||||
// but we need to reprocess it
|
// but we need to reprocess it
|
||||||
unset($prev->skip[$i]);
|
unset($token->skip[$i]);
|
||||||
$prev->rewind = $i;
|
$token->rewind = $i;
|
||||||
if ($prev instanceof HTMLPurifier_Token_Start) {
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||||
array_pop($this->stack);
|
array_pop($this->stack);
|
||||||
} elseif ($prev instanceof HTMLPurifier_Token_End) {
|
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||||
$this->stack[] = $prev->start;
|
$this->stack[] = $token->start;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -175,7 +179,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
|
|
||||||
// handle case of document end
|
// handle case of document end
|
||||||
if (!isset($tokens[$t])) {
|
if ($token === NULL) {
|
||||||
// kill processing if stack is empty
|
// kill processing if stack is empty
|
||||||
if (empty($this->stack)) {
|
if (empty($this->stack)) {
|
||||||
break;
|
break;
|
||||||
@ -191,16 +195,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
|
|
||||||
// append, don't splice, since this is the end
|
// append, don't splice, since this is the end
|
||||||
$tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
|
$token = new HTMLPurifier_Token_End($top_nesting->name);
|
||||||
|
|
||||||
// punt!
|
// punt!
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$token = $tokens[$t];
|
//echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
|
||||||
|
|
||||||
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
|
|
||||||
//flush();
|
//flush();
|
||||||
|
|
||||||
// quick-check: if it's not a tag, no need to process
|
// quick-check: if it's not a tag, no need to process
|
||||||
@ -213,8 +215,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$injector->handleText($token);
|
// XXX fuckup
|
||||||
$this->processToken($token, $i);
|
$r = $token;
|
||||||
|
$injector->handleText($r);
|
||||||
|
$token = $this->processToken($r, $i);
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -243,9 +247,11 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$ok = true;
|
$ok = true;
|
||||||
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
||||||
// claims to be empty but really is a start tag
|
// claims to be empty but really is a start tag
|
||||||
$this->swap(new HTMLPurifier_Token_End($token->name));
|
// NB: this assignment is required
|
||||||
$this->insertBefore(
|
$old_token = $token;
|
||||||
new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor)
|
$token = new HTMLPurifier_Token_End($token->name);
|
||||||
|
$token = $this->insertBefore(
|
||||||
|
new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
|
||||||
);
|
);
|
||||||
// punt (since we had to modify the input stream in a non-trivial way)
|
// punt (since we had to modify the input stream in a non-trivial way)
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
@ -293,7 +299,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$elements = $wrapdef->child->getAllowedElements($config);
|
$elements = $wrapdef->child->getAllowedElements($config);
|
||||||
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
|
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
|
||||||
$newtoken = new HTMLPurifier_Token_Start($wrapname);
|
$newtoken = new HTMLPurifier_Token_Start($wrapname);
|
||||||
$this->insertBefore($newtoken);
|
$token = $this->insertBefore($newtoken);
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -330,15 +336,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// errors need to be updated
|
// errors need to be updated
|
||||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||||
$new_token->start = $parent;
|
$new_token->start = $parent;
|
||||||
if ($carryover) {
|
|
||||||
$element = clone $parent;
|
|
||||||
// [TagClosedAuto]
|
|
||||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
|
||||||
$element->carryover = true;
|
|
||||||
$this->processToken(array($new_token, $token, $element));
|
|
||||||
} else {
|
|
||||||
$this->insertBefore($new_token);
|
|
||||||
}
|
|
||||||
// [TagClosedSuppress]
|
// [TagClosedSuppress]
|
||||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||||
if (!$carryover) {
|
if (!$carryover) {
|
||||||
@ -347,8 +344,17 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if ($carryover) {
|
||||||
|
$element = clone $parent;
|
||||||
|
// [TagClosedAuto]
|
||||||
|
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||||
|
$element->carryover = true;
|
||||||
|
$token = $this->processToken(array($new_token, $token, $element));
|
||||||
|
} else {
|
||||||
|
$token = $this->insertBefore($new_token);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
$this->remove();
|
$token = $this->remove();
|
||||||
}
|
}
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
@ -366,14 +372,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$injector->handleElement($token);
|
$r = $token;
|
||||||
$this->processToken($token, $i);
|
$injector->handleElement($r);
|
||||||
|
$token = $this->processToken($r, $i);
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!$reprocess) {
|
if (!$reprocess) {
|
||||||
// ah, nothing interesting happened; do normal processing
|
// ah, nothing interesting happened; do normal processing
|
||||||
$this->swap($token);
|
|
||||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||||
$this->stack[] = $token;
|
$this->stack[] = $token;
|
||||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||||
@ -396,16 +402,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($e) {
|
if ($e) {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
||||||
}
|
}
|
||||||
$this->swap(
|
$token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
||||||
new HTMLPurifier_Token_Text(
|
|
||||||
$generator->generateFromToken($token)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
$this->remove();
|
|
||||||
if ($e) {
|
if ($e) {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
||||||
}
|
}
|
||||||
|
$token = $this->remove();
|
||||||
}
|
}
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
@ -425,8 +427,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$injector->handleEnd($token);
|
$r = $token;
|
||||||
$this->processToken($token, $i);
|
$injector->handleEnd($r);
|
||||||
|
$token = $this->processToken($r, $i);
|
||||||
$this->stack[] = $current_parent;
|
$this->stack[] = $current_parent;
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
break;
|
break;
|
||||||
@ -454,19 +457,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// we didn't find the tag, so remove
|
// we didn't find the tag, so remove
|
||||||
if ($skipped_tags === false) {
|
if ($skipped_tags === false) {
|
||||||
if ($escape_invalid_tags) {
|
if ($escape_invalid_tags) {
|
||||||
$this->swap(
|
|
||||||
new HTMLPurifier_Token_Text(
|
|
||||||
$generator->generateFromToken($token)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
if ($e) {
|
if ($e) {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
||||||
}
|
}
|
||||||
|
$token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
|
||||||
} else {
|
} else {
|
||||||
$this->remove();
|
|
||||||
if ($e) {
|
if ($e) {
|
||||||
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
||||||
}
|
}
|
||||||
|
$token = $this->remove();
|
||||||
}
|
}
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
@ -499,18 +498,17 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$replace[] = $element;
|
$replace[] = $element;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->processToken($replace);
|
$token = $this->processToken($replace);
|
||||||
$reprocess = true;
|
$reprocess = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$context->destroy('CurrentNesting');
|
|
||||||
$context->destroy('InputTokens');
|
|
||||||
$context->destroy('InputIndex');
|
|
||||||
$context->destroy('CurrentToken');
|
$context->destroy('CurrentToken');
|
||||||
|
$context->destroy('CurrentNesting');
|
||||||
|
$context->destroy('InputZipper');
|
||||||
|
|
||||||
unset($this->injectors, $this->stack, $this->tokens, $this->t);
|
unset($this->injectors, $this->stack, $this->tokens);
|
||||||
return $tokens;
|
return $zipper->toArray($token);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -560,7 +558,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// array(number nodes to delete, new node 1, new node 2, ...)
|
// array(number nodes to delete, new node 1, new node 2, ...)
|
||||||
|
|
||||||
$delete = array_shift($token);
|
$delete = array_shift($token);
|
||||||
$old = array_splice($this->tokens, $this->t, $delete, $token);
|
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
||||||
|
|
||||||
if ($injector > -1) {
|
if ($injector > -1) {
|
||||||
// determine appropriate skips
|
// determine appropriate skips
|
||||||
@ -571,6 +569,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $r;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -580,7 +580,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
*/
|
*/
|
||||||
private function insertBefore($token)
|
private function insertBefore($token)
|
||||||
{
|
{
|
||||||
array_splice($this->tokens, $this->t, 0, array($token));
|
// NB not $this->zipper->insertBefore(), due to positioning
|
||||||
|
// differences
|
||||||
|
return $this->zipper->splice($this->token, 0, array($token))[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -589,17 +591,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
*/
|
*/
|
||||||
private function remove()
|
private function remove()
|
||||||
{
|
{
|
||||||
array_splice($this->tokens, $this->t, 1);
|
return $this->zipper->delete();
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Swap current token with new token. Cursor points to new token (no
|
|
||||||
* change). You must reprocess after this.
|
|
||||||
* @param HTMLPurifier_Token $token
|
|
||||||
*/
|
|
||||||
private function swap($token)
|
|
||||||
{
|
|
||||||
$this->tokens[$this->t] = $token;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
class HTMLPurifier_Zipper
|
class HTMLPurifier_Zipper
|
||||||
{
|
{
|
||||||
private $front, $back;
|
public $front, $back;
|
||||||
|
|
||||||
public function __construct($front, $back) {
|
public function __construct($front, $back) {
|
||||||
$this->front = $front;
|
$this->front = $front;
|
||||||
@ -95,6 +95,14 @@ class HTMLPurifier_Zipper
|
|||||||
return empty($this->back) ? NULL : array_pop($this->back);
|
return empty($this->back) ? NULL : array_pop($this->back);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if we are at the end of the list.
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function done() {
|
||||||
|
return empty($this->back);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Insert element before hole.
|
* Insert element before hole.
|
||||||
* @param Element to insert
|
* @param Element to insert
|
||||||
@ -115,14 +123,16 @@ class HTMLPurifier_Zipper
|
|||||||
* Splice in multiple elements at hole. Functional specification
|
* Splice in multiple elements at hole. Functional specification
|
||||||
* in terms of array_splice:
|
* in terms of array_splice:
|
||||||
*
|
*
|
||||||
* $r1 = array_splice($arr, $i, $delete, $replacement);
|
* $arr1 = $arr;
|
||||||
|
* $old1 = array_splice($arr1, $i, $delete, $replacement);
|
||||||
*
|
*
|
||||||
* list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
|
* list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
|
||||||
* $t = $z->advance($t, $i);
|
* $t = $z->advance($t, $i);
|
||||||
* $t = $z->splice($t, $delete, $replacement);
|
* list($old2, $t) = $z->splice($t, $delete, $replacement);
|
||||||
* $r2 = $z->toArray($t);
|
* $arr2 = $z->toArray($t);
|
||||||
*
|
*
|
||||||
* assert($r1 === $r2);
|
* assert($old1 === $old2);
|
||||||
|
* assert($arr1 === $arr2);
|
||||||
*
|
*
|
||||||
* NB: the absolute index location after this operation is
|
* NB: the absolute index location after this operation is
|
||||||
* *unchanged!*
|
* *unchanged!*
|
||||||
@ -131,8 +141,10 @@ class HTMLPurifier_Zipper
|
|||||||
*/
|
*/
|
||||||
public function splice($t, $delete, $replacement) {
|
public function splice($t, $delete, $replacement) {
|
||||||
// delete
|
// delete
|
||||||
|
$old = array();
|
||||||
$r = $t;
|
$r = $t;
|
||||||
for ($i = $delete; $i > 0; $i--) {
|
for ($i = $delete; $i > 0; $i--) {
|
||||||
|
$old[] = $r;
|
||||||
$r = $this->delete();
|
$r = $this->delete();
|
||||||
}
|
}
|
||||||
// insert
|
// insert
|
||||||
@ -140,6 +152,6 @@ class HTMLPurifier_Zipper
|
|||||||
$this->insertAfter($r);
|
$this->insertAfter($r);
|
||||||
$r = $replacement[$i];
|
$r = $replacement[$i];
|
||||||
}
|
}
|
||||||
return $r;
|
return array($old, $r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_EndRewindInjector extends HTMLPurifie
|
|||||||
) {
|
) {
|
||||||
$token = false;
|
$token = false;
|
||||||
$prev->_InjectorTest_EndRewindInjector_delete = true;
|
$prev->_InjectorTest_EndRewindInjector_delete = true;
|
||||||
$this->rewind($i);
|
$this->rewindOffset(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
|
|||||||
$i->start->skip = array(0 => true, 1 => true);
|
$i->start->skip = array(0 => true, 1 => true);
|
||||||
$mock->expectAt(1, 'handleEnd', array($i));
|
$mock->expectAt(1, 'handleEnd', array($i));
|
||||||
$mock->expectCallCount('handleEnd', 2);
|
$mock->expectCallCount('handleEnd', 2);
|
||||||
$mock->setReturnValue('getRewind', false);
|
$mock->setReturnValue('getRewindOffset', false);
|
||||||
$this->config->set('AutoFormat.AutoParagraph', false);
|
$this->config->set('AutoFormat.AutoParagraph', false);
|
||||||
$this->config->set('AutoFormat.Linkify', false);
|
$this->config->set('AutoFormat.Linkify', false);
|
||||||
$this->config->set('AutoFormat.Custom', array($mock));
|
$this->config->set('AutoFormat.Custom', array($mock));
|
||||||
|
@ -16,7 +16,8 @@ class HTMLPurifier_ZipperTest extends HTMLPurifier_Harness
|
|||||||
$z->insertBefore(4);
|
$z->insertBefore(4);
|
||||||
$z->insertAfter(5);
|
$z->insertAfter(5);
|
||||||
$this->assertIdentical($z->toArray($t), array(0,1,4,3,5));
|
$this->assertIdentical($z->toArray($t), array(0,1,4,3,5));
|
||||||
$t = $z->splice($t, 2, array(6,7));
|
list($old, $t) = $z->splice($t, 2, array(6,7));
|
||||||
|
$this->assertIdentical($old, array(3,5));
|
||||||
$this->assertIdentical($t, 6);
|
$this->assertIdentical($t, 6);
|
||||||
$this->assertIdentical($z->toArray($t), array(0,1,4,6,7));
|
$this->assertIdentical($z->toArray($t), array(0,1,4,6,7));
|
||||||
}
|
}
|
||||||
|
@ -182,10 +182,34 @@ function printTokens($tokens, $index = null)
|
|||||||
$string = '<pre>';
|
$string = '<pre>';
|
||||||
$generator = new HTMLPurifier_Generator(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
|
$generator = new HTMLPurifier_Generator(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
|
||||||
foreach ($tokens as $i => $token) {
|
foreach ($tokens as $i => $token) {
|
||||||
if ($index === $i) $string .= '[<strong>';
|
$string .= printToken($generator, $token, $i, $index == $i);
|
||||||
$string .= "<sup>$i</sup>";
|
}
|
||||||
$string .= $generator->escape($generator->generateFromToken($token));
|
$string .= '</pre>';
|
||||||
if ($index === $i) $string .= '</strong>]';
|
echo $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function printToken($generator, $token, $i, $isCursor)
|
||||||
|
{
|
||||||
|
$string = "";
|
||||||
|
if ($isCursor) $string .= '[<strong>';
|
||||||
|
$string .= "<sup>$i</sup>";
|
||||||
|
$string .= $generator->escape($generator->generateFromToken($token));
|
||||||
|
if ($isCursor) $string .= '</strong>]';
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function printZipper($zipper, $token)
|
||||||
|
{
|
||||||
|
$string = '<pre>';
|
||||||
|
$generator = new HTMLPurifier_Generator(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
|
||||||
|
foreach ($zipper->front as $i => $t) {
|
||||||
|
$string .= printToken($generator, $t, $i, false);
|
||||||
|
}
|
||||||
|
if ($token !== NULL) {
|
||||||
|
$string .= printToken($generator, $token, "", true);
|
||||||
|
}
|
||||||
|
for ($i = count($zipper->back)-1; $i >= 0; $i--) {
|
||||||
|
$string .= printToken($generator, $zipper->back[$i], $i, false);
|
||||||
}
|
}
|
||||||
$string .= '</pre>';
|
$string .= '</pre>';
|
||||||
echo $string;
|
echo $string;
|
||||||
|
Loading…
Reference in New Issue
Block a user