mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 23:28:42 +00:00
Document skips in more detail, #116.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
parent
4047a6230b
commit
353c96f156
@ -165,7 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if (empty($zipper->front)) break;
|
if (empty($zipper->front)) break;
|
||||||
$token = $zipper->prev($token);
|
$token = $zipper->prev($token);
|
||||||
// indicate that other injectors should not process this token,
|
// indicate that other injectors should not process this token,
|
||||||
// but we need to reprocess it
|
// but we need to reprocess it. See Note [Injector skips]
|
||||||
unset($token->skip[$i]);
|
unset($token->skip[$i]);
|
||||||
$token->rewind = $i;
|
$token->rewind = $i;
|
||||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||||
@ -210,6 +210,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($token instanceof HTMLPurifier_Token_Text) {
|
if ($token instanceof HTMLPurifier_Token_Text) {
|
||||||
foreach ($this->injectors as $i => $injector) {
|
foreach ($this->injectors as $i => $injector) {
|
||||||
if (isset($token->skip[$i])) {
|
if (isset($token->skip[$i])) {
|
||||||
|
// See Note [Injector skips]
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
@ -367,6 +368,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($ok) {
|
if ($ok) {
|
||||||
foreach ($this->injectors as $i => $injector) {
|
foreach ($this->injectors as $i => $injector) {
|
||||||
if (isset($token->skip[$i])) {
|
if (isset($token->skip[$i])) {
|
||||||
|
// See Note [Injector skips]
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
@ -422,6 +424,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$token->start = $current_parent;
|
$token->start = $current_parent;
|
||||||
foreach ($this->injectors as $i => $injector) {
|
foreach ($this->injectors as $i => $injector) {
|
||||||
if (isset($token->skip[$i])) {
|
if (isset($token->skip[$i])) {
|
||||||
|
// See Note [Injector skips]
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ($token->rewind !== null && $token->rewind !== $i) {
|
if ($token->rewind !== null && $token->rewind !== $i) {
|
||||||
@ -566,7 +569,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
|
||||||
|
|
||||||
if ($injector > -1) {
|
if ($injector > -1) {
|
||||||
// determine appropriate skips
|
// See Note [Injector skips]
|
||||||
|
// Determine appropriate skips. Here's what the code does:
|
||||||
|
// *If* we deleted one or more tokens, copy the skips
|
||||||
|
// of those tokens into the skips of the new tokens (in $token).
|
||||||
|
// Also, mark the newly inserted tokens as having come from
|
||||||
|
// $injector.
|
||||||
$oldskip = isset($old[0]) ? $old[0]->skip : array();
|
$oldskip = isset($old[0]) ? $old[0]->skip : array();
|
||||||
foreach ($token as $object) {
|
foreach ($token as $object) {
|
||||||
$object->skip = $oldskip;
|
$object->skip = $oldskip;
|
||||||
@ -602,4 +610,50 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note [Injector skips]
|
||||||
|
// ~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
// When I originally designed this class, the idea behind the 'skip'
|
||||||
|
// property of HTMLPurifier_Token was to help avoid infinite loops
|
||||||
|
// in injector processing. For example, suppose you wrote an injector
|
||||||
|
// that bolded swear words. Naively, you might write it so that
|
||||||
|
// whenever you saw ****, you replaced it with <strong>****</strong>.
|
||||||
|
//
|
||||||
|
// When this happens, we will reprocess all of the tokens with the
|
||||||
|
// other injectors. Now there is an opportunity for infinite loop:
|
||||||
|
// if we rerun the swear-word injector on these tokens, we might
|
||||||
|
// see **** and then reprocess again to get
|
||||||
|
// <strong><strong>****</strong></strong> ad infinitum.
|
||||||
|
//
|
||||||
|
// Thus, the idea of a skip is that once we process a token with
|
||||||
|
// an injector, we mark all of those tokens as having "come from"
|
||||||
|
// the injector, and we never run the injector again on these
|
||||||
|
// tokens.
|
||||||
|
//
|
||||||
|
// There were two more complications, however:
|
||||||
|
//
|
||||||
|
// - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
|
||||||
|
// you had <b><i></i></b>, after you removed the <i></i>, you
|
||||||
|
// really would like this injector to go back and reprocess
|
||||||
|
// the <b> tag, discovering that it is now empty and can be
|
||||||
|
// removed. So we reintroduced the possibility of infinite looping
|
||||||
|
// by adding a "rewind" function, which let you go back to an
|
||||||
|
// earlier point in the token stream and reprocess it with injectors.
|
||||||
|
// Needless to say, we need to UN-skip the token so it gets
|
||||||
|
// reprocessed.
|
||||||
|
//
|
||||||
|
// - Suppose that you successfuly process a token, replace it with
|
||||||
|
// one with your skip mark, but now another injector wants to
|
||||||
|
// process the skipped token with another token. Should you continue
|
||||||
|
// to skip that new token, or reprocess it? If you reprocess,
|
||||||
|
// you can end up with an infinite loop where one injector converts
|
||||||
|
// <a> to <b>, and then another injector converts it back. So
|
||||||
|
// we inherit the skips, but for some reason, I thought that we
|
||||||
|
// should inherit the skip from the first token of the token
|
||||||
|
// that we deleted. Why? Well, it seems to work OK.
|
||||||
|
//
|
||||||
|
// If I were to redesign this functionality, I would absolutely not
|
||||||
|
// go about doing it this way: the semantics are just not very well
|
||||||
|
// defined, and in any case you probably wanted to operate on trees,
|
||||||
|
// not token streams.
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
// vim: et sw=4 sts=4
|
||||||
|
@ -26,7 +26,7 @@ abstract class HTMLPurifier_Token
|
|||||||
public $armor = array();
|
public $armor = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used during MakeWellFormed.
|
* Used during MakeWellFormed. See Note [Injector skips]
|
||||||
* @type
|
* @type
|
||||||
*/
|
*/
|
||||||
public $skip;
|
public $skip;
|
||||||
|
Loading…
Reference in New Issue
Block a user