2006-07-23 23:29:12 +00:00
|
|
|
<?php
|
|
|
|
|
2006-08-20 21:59:41 +00:00
|
|
|
/**
|
|
|
|
* Takes tokens makes them well-formed (balance end tags, etc.)
|
|
|
|
*/
|
2006-07-23 23:29:12 +00:00
|
|
|
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|
|
|
{
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 17:44:27 +00:00
|
|
|
/**
|
2008-10-01 08:10:41 +00:00
|
|
|
* Array stream of tokens being processed.
|
2007-06-24 17:44:27 +00:00
|
|
|
*/
|
2008-10-01 08:10:41 +00:00
|
|
|
protected $tokens;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
/**
|
|
|
|
* Current index in $tokens.
|
|
|
|
*/
|
|
|
|
protected $t;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
/**
|
|
|
|
* Current nesting of elements.
|
|
|
|
*/
|
|
|
|
protected $stack;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
/**
|
|
|
|
* Injectors active in this stream processing.
|
|
|
|
*/
|
|
|
|
protected $injectors;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
/**
|
|
|
|
* Current instance of HTMLPurifier_Config.
|
|
|
|
*/
|
|
|
|
protected $config;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
/**
|
|
|
|
* Current instance of HTMLPurifier_Context.
|
|
|
|
*/
|
|
|
|
protected $context;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-01-05 00:10:43 +00:00
|
|
|
public function execute($tokens, $config, $context) {
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-08-31 20:33:07 +00:00
|
|
|
$definition = $config->getHTMLDefinition();
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-10-02 22:50:59 +00:00
|
|
|
// local variables
|
2008-05-26 04:05:48 +00:00
|
|
|
$generator = new HTMLPurifier_Generator($config, $context);
|
2007-10-02 22:50:59 +00:00
|
|
|
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
2007-12-05 01:26:28 +00:00
|
|
|
$e = $context->get('ErrorCollector', true);
|
2008-10-01 08:10:41 +00:00
|
|
|
$t = false; // token index
|
|
|
|
$i = false; // injector index
|
|
|
|
$token = false; // the current token
|
|
|
|
$reprocess = false; // whether or not to reprocess the same token
|
|
|
|
$stack = array();
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-10-02 22:50:59 +00:00
|
|
|
// member variables
|
2008-10-01 08:10:41 +00:00
|
|
|
$this->stack =& $stack;
|
|
|
|
$this->t =& $t;
|
|
|
|
$this->tokens =& $tokens;
|
|
|
|
$this->config = $config;
|
|
|
|
$this->context = $context;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-10-02 22:50:59 +00:00
|
|
|
// context variables
|
2008-10-01 08:10:41 +00:00
|
|
|
$context->register('CurrentNesting', $stack);
|
|
|
|
$context->register('InputIndex', $t);
|
2007-12-05 01:26:28 +00:00
|
|
|
$context->register('InputTokens', $tokens);
|
2008-10-01 08:10:41 +00:00
|
|
|
$context->register('CurrentToken', $token);
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 04:22:28 +00:00
|
|
|
// -- begin INJECTOR --
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 17:44:27 +00:00
|
|
|
$this->injectors = array();
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 21:35:34 +00:00
|
|
|
$injectors = $config->getBatch('AutoFormat');
|
2008-06-09 01:23:05 +00:00
|
|
|
$def_injectors = $definition->info_injector;
|
2007-06-24 21:35:34 +00:00
|
|
|
$custom_injectors = $injectors['Custom'];
|
|
|
|
unset($injectors['Custom']); // special case
|
|
|
|
foreach ($injectors as $injector => $b) {
|
|
|
|
$injector = "HTMLPurifier_Injector_$injector";
|
2007-06-28 13:06:15 +00:00
|
|
|
if (!$b) continue;
|
|
|
|
$this->injectors[] = new $injector;
|
2007-06-24 02:27:57 +00:00
|
|
|
}
|
2008-06-09 01:23:05 +00:00
|
|
|
foreach ($def_injectors as $injector) {
|
|
|
|
// assumed to be objects
|
|
|
|
$this->injectors[] = $injector;
|
|
|
|
}
|
2007-06-24 21:35:34 +00:00
|
|
|
foreach ($custom_injectors as $injector) {
|
|
|
|
if (is_string($injector)) {
|
|
|
|
$injector = "HTMLPurifier_Injector_$injector";
|
|
|
|
$injector = new $injector;
|
|
|
|
}
|
|
|
|
$this->injectors[] = $injector;
|
2007-06-24 02:45:38 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 17:44:27 +00:00
|
|
|
// give the injectors references to the definition and context
|
|
|
|
// variables for performance reasons
|
2008-10-01 08:10:41 +00:00
|
|
|
foreach ($this->injectors as $ix => $injector) {
|
2007-12-05 01:26:28 +00:00
|
|
|
$error = $injector->prepare($config, $context);
|
2007-06-28 13:06:15 +00:00
|
|
|
if (!$error) continue;
|
2008-10-01 08:10:41 +00:00
|
|
|
array_splice($this->injectors, $ix, 1); // rm the injector
|
2007-12-05 01:26:28 +00:00
|
|
|
trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
|
2007-06-24 17:44:27 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-24 04:22:28 +00:00
|
|
|
// -- end INJECTOR --
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
// a note on punting:
|
|
|
|
// In order to reduce code duplication, whenever some code needs
|
|
|
|
// to make HTML changes in order to make things "correct", the
|
|
|
|
// new HTML gets sent through the purifier, regardless of its
|
|
|
|
// status. This means that if we add a start token, because it
|
|
|
|
// was totally necessary, we don't have to update nesting; we just
|
|
|
|
// punt ($reprocess = true; continue;) and it does that for us.
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-12-05 01:26:28 +00:00
|
|
|
// isset is in loop because $tokens size changes during loop exec
|
Implement Injector->handleEnd, with lots of refactoring for injector.
Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors. By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.
This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.
We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-10-01 04:54:51 +00:00
|
|
|
for (
|
2008-10-01 08:10:41 +00:00
|
|
|
$t = 0;
|
|
|
|
$t == 0 || isset($tokens[$t - 1]);
|
2008-10-01 07:14:28 +00:00
|
|
|
// only increment if we don't need to reprocess
|
2008-10-01 08:10:41 +00:00
|
|
|
$reprocess ? $reprocess = false : $t++
|
Implement Injector->handleEnd, with lots of refactoring for injector.
Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors. By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.
This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.
We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-10-01 04:54:51 +00:00
|
|
|
) {
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// check for a rewind
|
|
|
|
if (is_int($i) && $i >= 0) {
|
2008-10-01 08:10:41 +00:00
|
|
|
// possibility: disable rewinding if the current token has a
|
|
|
|
// rewind set on it already. This would offer protection from
|
|
|
|
// infinite loop, but might hinder some advanced rewinding.
|
2008-10-01 07:14:28 +00:00
|
|
|
$rewind_to = $this->injectors[$i]->getRewind();
|
2008-10-01 08:10:41 +00:00
|
|
|
if (is_int($rewind_to) && $rewind_to < $t) {
|
2008-10-01 07:14:28 +00:00
|
|
|
if ($rewind_to < 0) $rewind_to = 0;
|
2008-10-01 08:10:41 +00:00
|
|
|
while ($t > $rewind_to) {
|
|
|
|
$t--;
|
|
|
|
$prev = $tokens[$t];
|
2008-10-01 07:14:28 +00:00
|
|
|
// indicate that other injectors should not process this token,
|
|
|
|
// but we need to reprocess it
|
|
|
|
unset($prev->skip[$i]);
|
|
|
|
$prev->rewind = $i;
|
2008-10-01 08:10:41 +00:00
|
|
|
if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
|
|
|
|
elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
|
2008-10-01 07:14:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
$i = false;
|
Implement Injector->handleEnd, with lots of refactoring for injector.
Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors. By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.
This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.
We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-10-01 04:54:51 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
Implement Injector->handleEnd, with lots of refactoring for injector.
Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors. By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.
This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.
We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-10-01 04:54:51 +00:00
|
|
|
// handle case of document end
|
2008-10-01 08:10:41 +00:00
|
|
|
if (!isset($tokens[$t])) {
|
2008-10-01 07:14:28 +00:00
|
|
|
// kill processing if stack is empty
|
2008-10-01 08:10:41 +00:00
|
|
|
if (empty($this->stack)) break;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// peek
|
2008-10-01 08:10:41 +00:00
|
|
|
$top_nesting = array_pop($this->stack);
|
|
|
|
$this->stack[] = $top_nesting;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// send error
|
|
|
|
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
|
|
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// append, don't splice, since this is the end
|
|
|
|
$tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// punt!
|
|
|
|
$reprocess = true;
|
|
|
|
continue;
|
Implement Injector->handleEnd, with lots of refactoring for injector.
Previous design of injector streaming involved editability only to start, empty
and text tokens, because they could be safely modified without causing formedness
errors. By modifying notifyEnd to operate before MakeWellFormed's safeguards
kick into effect, it can be converted into a handle function, allowing for
arbitrary modification of end tags.
This change involved quite a bit of restructuring of the MakeWellFormed code,
including the moving of end of document tags to inside the loop, so rewinding
on those tags would be functional, increased reuse of the end tag codepath by
code that inserts end tags (as they could be changed out from under you), and
processToken modified to have an extra parameter to force re-processing of
a token if the original token was an end token.
We're not exactly sure if handleEnd works at this point, but the important
talking point about this refactoring is that nothing else broke. Also, a number
of convenience functions were moved from AutoParagraph to the Injector
supertype (specifically: forward, forwardToEndToken, backward, and current).
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2008-10-01 04:54:51 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// if all goes well, this token will be passed through unharmed
|
2008-10-01 08:10:41 +00:00
|
|
|
$token = $tokens[$t];
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-06-27 05:33:48 +00:00
|
|
|
//echo '<hr>';
|
2008-10-01 08:10:41 +00:00
|
|
|
//printTokens($tokens, $t);
|
|
|
|
//var_dump($this->stack);
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// quick-check: if it's not a tag, no need to process
|
2008-10-01 07:14:28 +00:00
|
|
|
if (empty($token->is_tag)) {
|
2008-01-19 20:23:01 +00:00
|
|
|
if ($token instanceof HTMLPurifier_Token_Text) {
|
2008-10-01 07:14:28 +00:00
|
|
|
foreach ($this->injectors as $i => $injector) {
|
|
|
|
if (isset($token->skip[$i])) continue;
|
|
|
|
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
|
|
|
$injector->handleText($token);
|
|
|
|
$this->processToken($token, $i);
|
|
|
|
$reprocess = true;
|
|
|
|
break;
|
|
|
|
}
|
2007-06-22 21:32:56 +00:00
|
|
|
}
|
2008-10-01 08:10:41 +00:00
|
|
|
// another possibility is a comment
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-07-05 07:11:29 +00:00
|
|
|
if (isset($definition->info[$token->name])) {
|
|
|
|
$type = $definition->info[$token->name]->child->type;
|
|
|
|
} else {
|
|
|
|
$type = false; // Type is unknown, treat accordingly
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-29 00:24:59 +00:00
|
|
|
// quick tag checks: anything that's *not* an end tag
|
|
|
|
$ok = false;
|
2008-07-05 07:11:29 +00:00
|
|
|
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
2008-10-01 08:10:41 +00:00
|
|
|
// claims to be a start tag but is empty
|
2007-06-29 00:24:59 +00:00
|
|
|
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
|
|
|
|
$ok = true;
|
2008-07-05 07:11:29 +00:00
|
|
|
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
2007-06-29 00:24:59 +00:00
|
|
|
// claims to be empty but really is a start tag
|
2008-10-01 07:14:28 +00:00
|
|
|
$this->swap(new HTMLPurifier_Token_End($token->name));
|
|
|
|
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
|
2008-10-01 08:10:41 +00:00
|
|
|
// punt (since we had to modify the input stream in a non-trivial way)
|
2008-10-01 07:14:28 +00:00
|
|
|
$reprocess = true;
|
|
|
|
continue;
|
2008-01-19 20:23:01 +00:00
|
|
|
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
|
2007-06-29 00:24:59 +00:00
|
|
|
// real empty token
|
|
|
|
$ok = true;
|
2008-01-19 20:23:01 +00:00
|
|
|
} elseif ($token instanceof HTMLPurifier_Token_Start) {
|
2007-06-29 00:24:59 +00:00
|
|
|
// start tag
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// ...unless they also have to close their parent
|
2008-10-01 08:10:41 +00:00
|
|
|
if (!empty($this->stack)) {
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
$parent = array_pop($this->stack);
|
|
|
|
$this->stack[] = $parent;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-07-05 07:11:29 +00:00
|
|
|
if (isset($definition->info[$parent->name])) {
|
2008-08-02 00:52:06 +00:00
|
|
|
$elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config);
|
2008-07-05 07:11:29 +00:00
|
|
|
$autoclose = !isset($elements[$token->name]);
|
|
|
|
} else {
|
|
|
|
$autoclose = false;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-07-05 07:11:29 +00:00
|
|
|
if ($autoclose) {
|
2007-06-26 19:33:37 +00:00
|
|
|
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
2008-10-01 08:10:41 +00:00
|
|
|
// insert parent end tag before this tag
|
2008-06-27 20:09:14 +00:00
|
|
|
$new_token = new HTMLPurifier_Token_End($parent->name);
|
|
|
|
$new_token->start = $parent;
|
|
|
|
$this->insertBefore($new_token);
|
2008-10-01 08:10:41 +00:00
|
|
|
$reprocess = true;
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-07-23 23:29:12 +00:00
|
|
|
}
|
2007-06-29 00:24:59 +00:00
|
|
|
$ok = true;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-29 00:24:59 +00:00
|
|
|
if ($ok) {
|
2007-12-05 01:26:28 +00:00
|
|
|
foreach ($this->injectors as $i => $injector) {
|
2008-10-01 07:14:28 +00:00
|
|
|
if (isset($token->skip[$i])) continue;
|
|
|
|
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
|
|
|
$injector->handleElement($token);
|
|
|
|
$this->processToken($token, $i);
|
|
|
|
$reprocess = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!$reprocess) {
|
|
|
|
// ah, nothing interesting happened; do normal processing
|
|
|
|
$this->swap($token);
|
|
|
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
2008-10-01 08:10:41 +00:00
|
|
|
$this->stack[] = $token;
|
2008-10-01 07:14:28 +00:00
|
|
|
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
|
|
|
throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
|
2007-06-24 02:45:38 +00:00
|
|
|
}
|
2007-06-23 17:44:28 +00:00
|
|
|
}
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// sanity check: we should be dealing with a closing tag
|
2008-06-27 05:33:48 +00:00
|
|
|
if (!$token instanceof HTMLPurifier_Token_End) {
|
2008-10-01 08:10:41 +00:00
|
|
|
throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
|
2008-06-27 05:33:48 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-07-23 23:29:12 +00:00
|
|
|
// make sure that we have something open
|
2008-10-01 08:10:41 +00:00
|
|
|
if (empty($this->stack)) {
|
2006-08-15 23:58:18 +00:00
|
|
|
if ($escape_invalid_tags) {
|
2007-06-26 19:33:37 +00:00
|
|
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
|
2008-06-27 05:33:48 +00:00
|
|
|
$this->swap(new HTMLPurifier_Token_Text(
|
2008-05-26 04:05:48 +00:00
|
|
|
$generator->generateFromToken($token)
|
2008-06-27 05:33:48 +00:00
|
|
|
));
|
|
|
|
} else {
|
|
|
|
$this->remove();
|
|
|
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
|
2006-08-15 23:58:18 +00:00
|
|
|
}
|
2008-10-01 08:10:41 +00:00
|
|
|
$reprocess = true;
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 19:40:31 +00:00
|
|
|
// first, check for the simplest case: everything closes neatly.
|
|
|
|
// Eventually, everything passes through here; if there are problems
|
|
|
|
// we modify the input stream accordingly and then punt, so that
|
|
|
|
// the tokens get processed again.
|
2008-10-01 08:10:41 +00:00
|
|
|
$current_parent = array_pop($this->stack);
|
2006-07-23 23:29:12 +00:00
|
|
|
if ($current_parent->name == $token->name) {
|
2008-06-27 20:09:14 +00:00
|
|
|
$token->start = $current_parent;
|
2008-10-01 19:40:31 +00:00
|
|
|
foreach ($this->injectors as $i => $injector) {
|
|
|
|
if (isset($token->skip[$i])) continue;
|
|
|
|
if ($token->rewind !== null && $token->rewind !== $i) continue;
|
|
|
|
$injector->handleEnd($token);
|
|
|
|
$this->processToken($token, $i);
|
|
|
|
$this->stack[] = $current_parent;
|
|
|
|
$reprocess = true;
|
|
|
|
break;
|
|
|
|
}
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-07-23 23:29:12 +00:00
|
|
|
// okay, so we're trying to close the wrong tag
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// undo the pop previous pop
|
2008-10-01 08:10:41 +00:00
|
|
|
$this->stack[] = $current_parent;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-23 17:11:05 +00:00
|
|
|
// scroll back the entire nest, trying to find our tag.
|
|
|
|
// (feature could be to specify how far you'd like to go)
|
2008-10-01 08:10:41 +00:00
|
|
|
$size = count($this->stack);
|
2006-07-23 23:29:12 +00:00
|
|
|
// -2 because -1 is the last element, but we already checked that
|
|
|
|
$skipped_tags = false;
|
2008-10-01 08:10:41 +00:00
|
|
|
for ($j = $size - 2; $j >= 0; $j--) {
|
|
|
|
if ($this->stack[$j]->name == $token->name) {
|
|
|
|
$skipped_tags = array_slice($this->stack, $j);
|
2006-07-23 23:29:12 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
// we didn't find the tag, so remove
|
2006-07-23 23:29:12 +00:00
|
|
|
if ($skipped_tags === false) {
|
2006-08-15 23:58:18 +00:00
|
|
|
if ($escape_invalid_tags) {
|
2008-06-27 05:33:48 +00:00
|
|
|
$this->swap(new HTMLPurifier_Token_Text(
|
2008-05-26 04:05:48 +00:00
|
|
|
$generator->generateFromToken($token)
|
2008-06-27 05:33:48 +00:00
|
|
|
));
|
2007-06-26 19:33:37 +00:00
|
|
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
|
2008-06-27 05:33:48 +00:00
|
|
|
} else {
|
|
|
|
$this->remove();
|
|
|
|
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
|
2006-08-15 23:58:18 +00:00
|
|
|
}
|
2008-10-01 08:10:41 +00:00
|
|
|
$reprocess = true;
|
2006-07-23 23:29:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
// do errors, in REVERSE $j order: a,b,c with </a></b></c>
|
|
|
|
$c = count($skipped_tags);
|
|
|
|
if ($e) {
|
|
|
|
for ($j = $c - 1; $j > 0; $j--) {
|
|
|
|
// notice we exclude $j == 0, i.e. the current ending tag, from
|
|
|
|
// the errors...
|
|
|
|
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
|
|
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
|
|
|
}
|
2007-06-26 15:07:07 +00:00
|
|
|
}
|
2006-07-23 23:29:12 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
// insert tags, in FORWARD $j order: c,b,a with </a></b></c>
|
|
|
|
for ($j = 1; $j < $c; $j++) {
|
|
|
|
// ...as well as from the insertions
|
|
|
|
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
|
|
|
|
$new_token->start = $skipped_tags[$j];
|
|
|
|
$this->insertBefore($new_token);
|
|
|
|
}
|
|
|
|
$reprocess = true;
|
2008-10-01 19:40:31 +00:00
|
|
|
continue;
|
2006-07-23 23:29:12 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2007-06-26 19:33:37 +00:00
|
|
|
$context->destroy('CurrentNesting');
|
|
|
|
$context->destroy('InputTokens');
|
|
|
|
$context->destroy('InputIndex');
|
|
|
|
$context->destroy('CurrentToken');
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 08:10:41 +00:00
|
|
|
unset($this->injectors, $this->stack, $this->tokens, $this->t);
|
2008-06-27 05:33:48 +00:00
|
|
|
return $tokens;
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-06-27 05:33:48 +00:00
|
|
|
/**
|
2008-10-01 07:14:28 +00:00
|
|
|
* Processes arbitrary token values for complicated substitution patterns.
|
|
|
|
* In general:
|
2008-12-06 07:28:20 +00:00
|
|
|
*
|
2008-10-01 07:14:28 +00:00
|
|
|
* If $token is an array, it is a list of tokens to substitute for the
|
2008-10-01 08:10:41 +00:00
|
|
|
* current token. These tokens then get individually processed. If there
|
|
|
|
* is a leading integer in the list, that integer determines how many
|
|
|
|
* tokens from the stream should be removed.
|
2008-12-06 07:28:20 +00:00
|
|
|
*
|
2008-10-01 08:10:41 +00:00
|
|
|
* If $token is a regular token, it is swapped with the current token.
|
2008-12-06 07:28:20 +00:00
|
|
|
*
|
2008-10-01 07:14:28 +00:00
|
|
|
* If $token is false, the current token is deleted.
|
2008-12-06 07:28:20 +00:00
|
|
|
*
|
2008-10-01 08:10:41 +00:00
|
|
|
* If $token is an integer, that number of tokens (with the first token
|
|
|
|
* being the current one) will be deleted.
|
2008-12-06 07:28:20 +00:00
|
|
|
*
|
2008-10-01 08:10:41 +00:00
|
|
|
* @param $token Token substitution value
|
|
|
|
* @param $injector Injector that performed the substitution; default is if
|
|
|
|
* this is not an injector related operation.
|
2008-10-01 07:14:28 +00:00
|
|
|
*/
|
|
|
|
protected function processToken($token, $injector = -1) {
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// normalize forms of token
|
|
|
|
if (is_object($token)) $token = array(1, $token);
|
|
|
|
if (is_int($token)) $token = array($token);
|
|
|
|
if ($token === false) $token = array(1);
|
|
|
|
if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
|
|
|
|
if (!is_int($token[0])) array_unshift($token, 1);
|
|
|
|
if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
// $token is now an array with the following form:
|
|
|
|
// array(number nodes to delete, new node 1, new node 2, ...)
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
$delete = array_shift($token);
|
2008-10-01 08:10:41 +00:00
|
|
|
$old = array_splice($this->tokens, $this->t, $delete, $token);
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
if ($injector > -1) {
|
|
|
|
// determine appropriate skips
|
|
|
|
$oldskip = isset($old[0]) ? $old[0]->skip : array();
|
|
|
|
foreach ($token as $object) {
|
|
|
|
$object->skip = $oldskip;
|
|
|
|
$object->skip[$injector] = true;
|
|
|
|
}
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-10-01 07:14:28 +00:00
|
|
|
/**
|
|
|
|
* Inserts a token before the current token. Cursor now points to this token
|
2008-06-27 05:33:48 +00:00
|
|
|
*/
|
2008-10-01 08:10:41 +00:00
|
|
|
private function insertBefore($token) {
|
|
|
|
array_splice($this->tokens, $this->t, 0, array($token));
|
2008-06-27 05:33:48 +00:00
|
|
|
}
|
2008-10-01 07:14:28 +00:00
|
|
|
|
2008-06-27 05:33:48 +00:00
|
|
|
/**
|
2008-10-01 08:10:41 +00:00
|
|
|
* Removes current token. Cursor now points to new token occupying previously
|
|
|
|
* occupied space.
|
2008-06-27 05:33:48 +00:00
|
|
|
*/
|
2008-10-01 08:10:41 +00:00
|
|
|
private function remove() {
|
|
|
|
array_splice($this->tokens, $this->t, 1);
|
2006-07-23 23:29:12 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-06-27 05:33:48 +00:00
|
|
|
/**
|
2008-10-01 08:10:41 +00:00
|
|
|
* Swap current token with new token. Cursor points to new token (no change).
|
2008-06-27 05:33:48 +00:00
|
|
|
*/
|
2008-10-01 08:10:41 +00:00
|
|
|
private function swap($token) {
|
|
|
|
$this->tokens[$this->t] = $token;
|
2008-06-27 05:33:48 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-07-23 23:29:12 +00:00
|
|
|
}
|
|
|
|
|