0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00

[3.1.0] Convert tokens to use instanceof, reducing memory footprint and improving comparison speed.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1509 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2008-01-19 20:23:01 +00:00
parent dd8ef4d3f5
commit 5eee08c548
16 changed files with 78 additions and 70 deletions

2
NEWS
View File

@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
- Autoclose now operates iteratively, i.e. <span><span><div> now has
both span tags closed.
. Plugins now get their own changelogs according to project conventions.
. Convert tokens to use instanceof, reducing memory footprint and
improving comparison speed.
3.0.0, released 2008-01-06
# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained

View File

@ -34,7 +34,10 @@ class HTMLPurifier_AttrValidator
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
if (
!$token instanceof HTMLPurifier_Token_Start &&
!$token instanceof HTMLPurifier_Token_Empty
) return $token;
// create alias to global definition array, see also $defs
// DEFINITION CALL

View File

@ -69,9 +69,9 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$is_child = ($nesting == 0); // direct
if ($token->type == 'start') {
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token->type == 'end') {
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}

View File

@ -71,9 +71,9 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$is_child = ($nesting == 0);
if ($token->type == 'start') {
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token->type == 'end') {
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
@ -81,7 +81,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed && $token->type == 'text') {
if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
@ -91,7 +91,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
continue;
}
}
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =

View File

@ -45,8 +45,8 @@ extends HTMLPurifier_ChildDef_Required
if (!$is_inline) {
if (!$depth) {
if (
($token->type == 'text' && !$token->is_whitespace) ||
($token->type != 'text' && !isset($this->elements[$token->name]))
($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
(!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
@ -55,7 +55,7 @@ extends HTMLPurifier_ChildDef_Required
} else {
if (!$depth) {
// starting tokens have been inline text / empty
if ($token->type == 'start' || $token->type == 'empty') {
if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
if (isset($this->elements[$token->name])) {
// ended
$ret[] = $block_wrap_end;
@ -65,8 +65,8 @@ extends HTMLPurifier_ChildDef_Required
}
}
$ret[] = $token;
if ($token->type == 'start') $depth++;
if ($token->type == 'end') $depth--;
if ($token instanceof HTMLPurifier_Token_Start) $depth++;
if ($token instanceof HTMLPurifier_Token_End) $depth--;
}
if ($is_inline) $ret[] = $block_wrap_end;
return $ret;

View File

@ -41,9 +41,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
if ($token === false) {
// terminating sequence started
} elseif ($token->type == 'start') {
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token->type == 'end') {
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
@ -112,7 +112,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$collection[] = $token;
continue;
default:
if ($token->type == 'text' && $token->is_whitespace) {
if ($token instanceof HTMLPurifier_Token_Text && $token->is_whitespace) {
$collection[] = $token;
$tag_index++;
}

View File

@ -89,7 +89,7 @@ class HTMLPurifier_Generator
if (!$tokens) return '';
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
if ($this->_scriptFix && $tokens[$i]->name === 'script'
&& $i + 2 < $size && $tokens[$i+2]->type == 'end') {
&& $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
// script special case
// the contents of the script block must be ONE token
// for this to work
@ -139,21 +139,21 @@ class HTMLPurifier_Generator
* @return Generated HTML
*/
public function generateFromToken($token) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
if (!$token instanceof HTMLPurifier_Token) return '';
if ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
} elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
. '>';
} elseif ($token->type == 'text') {
} elseif ($token instanceof HTMLPurifier_Token_Text) {
return $this->escape($token->data);
} else {
@ -168,7 +168,7 @@ class HTMLPurifier_Generator
* --> somewhere inside the script contents.
*/
public function generateScriptFromToken($token) {
if ($token->type != 'text') return $this->generateFromToken($token);
if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
// return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
// more advanced version:
// thanks <http://lachy.id.au/log/2005/05/script-comments>

View File

@ -72,7 +72,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// a double newline in them
$nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start'){
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){
if (!$this->_isInline($this->inputTokens[$i])) {
// we haven't found a double-newline, and
// we've hit a block element, so don't paragraph
@ -81,11 +81,11 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
}
$nesting++;
}
if ($this->inputTokens[$i]->type == 'end') {
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) {
if ($nesting <= 0) break;
$nesting--;
}
if ($this->inputTokens[$i]->type == 'text') {
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
// found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
@ -117,12 +117,12 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// check if this token is adjacent to the parent token
$prev = $this->inputTokens[$this->inputIndex - 1];
if ($prev->type != 'start') {
if (!$prev instanceof HTMLPurifier_Token_Start) {
// not adjacent, we can abort early
// add lead paragraph tag if our token is inline
// and the previous tag was an end paragraph
if (
$prev->name == 'p' && $prev->type == 'end' &&
$prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End &&
$this->_isInline($token)
) {
$token = array($this->_pStart(), $token);
@ -139,9 +139,9 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// early if possible
$j = 1; // current nesting, one is due to parent (we recalculate current token)
for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start') $j++;
if ($this->inputTokens[$i]->type == 'end') $j--;
if ($this->inputTokens[$i]->type == 'text') {
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++;
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--;
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
break;
@ -248,14 +248,14 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if (!$needs_end) {
// Start of the checks one after the current token's index
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') {
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) {
$remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
}
// check if we can abort early (whitespace means we carry-on!)
if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break;
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break;
// end tags will automatically be handled by MakeWellFormed,
// so we don't have to worry about them
if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break;
}
} else {
$remove_paragraph_end = false;

View File

@ -74,7 +74,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);

View File

@ -91,12 +91,12 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// scroll to the end of this node, report number, and collect
// all children
for ($j = $i, $depth = 0; ; $j++) {
if ($tokens[$j]->type == 'start') {
if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
$depth++;
// skip token assignment on first iteration, this is the
// token we currently are on
if ($depth == 1) continue;
} elseif ($tokens[$j]->type == 'end') {
} elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
$depth--;
// skip token assignment on last iteration, this is the
// end token of the token we're currently on
@ -287,8 +287,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// Test if the token indeed is a start tag, if not, move forward
// and test again.
$size = count($tokens);
while ($i < $size and $tokens[$i]->type != 'start') {
if ($tokens[$i]->type == 'end') {
while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
// pop a token index off the stack if we ended a node
array_pop($stack);
// pop an exclusion lookup off exclusion stack if

View File

@ -110,7 +110,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// quick-check: if it's not a tag, no need to process
if (empty( $token->is_tag )) {
if ($token->type === 'text') {
if ($token instanceof HTMLPurifier_Token_Text) {
// injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $injector) {
if (!$injector->skip) $injector->handleText($token);
@ -128,21 +128,21 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// quick tag checks: anything that's *not* an end tag
$ok = false;
if ($info->type == 'empty' && $token->type == 'start') {
if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// test if it claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
$ok = true;
} elseif ($info->type != 'empty' && $token->type == 'empty' ) {
} elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$token = array(
new HTMLPurifier_Token_Start($token->name, $token->attr),
new HTMLPurifier_Token_End($token->name)
);
$ok = true;
} elseif ($token->type == 'empty') {
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
// real empty token
$ok = true;
} elseif ($token->type == 'start') {
} elseif ($token instanceof HTMLPurifier_Token_Start) {
// start tag
// ...unless they also have to close their parent
@ -181,7 +181,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
// sanity check: we should be dealing with a closing tag
if ($token->type != 'end') continue;
if (!$token instanceof HTMLPurifier_Token_End) continue;
// make sure that we have something open
if (empty($this->currentNesting)) {
@ -304,9 +304,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
} elseif ($token) {
// regular case
$this->outputTokens[] = $token;
if ($token->type == 'start') {
if ($token instanceof HTMLPurifier_Token_Start) {
$this->currentNesting[] = $token;
} elseif ($token->type == 'end') {
} elseif ($token instanceof HTMLPurifier_Token_End) {
array_pop($this->currentNesting); // not actually used
}
}

View File

@ -116,7 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// mostly everything's good, but
// we need to make sure required attributes are in order
if (
($token->type === 'start' || $token->type === 'empty') &&
($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
@ -135,9 +135,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token->armor['ValidateAttributes'] = true;
}
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
$textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') {
} elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
$textify_comments = false;
}
@ -151,9 +151,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// check if we need to destroy all of the tag's children
// CAN BE GENERICIZED
if (isset($hidden_elements[$token->name])) {
if ($token->type == 'start') {
if ($token instanceof HTMLPurifier_Token_Start) {
$remove_until = $token->name;
} elseif ($token->type == 'empty') {
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
// do nothing: we're still looking
} else {
$remove_until = false;
@ -164,7 +164,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
continue;
}
} elseif ($token->type == 'comment') {
} elseif ($token instanceof HTMLPurifier_Token_Comment) {
// textify comments in script tags when they are allowed
if ($textify_comments !== false) {
$data = $token->data;
@ -174,7 +174,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
continue;
}
} elseif ($token->type == 'text') {
} elseif ($token instanceof HTMLPurifier_Token_Text) {
} else {
continue;
}

View File

@ -25,7 +25,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// only process tokens that have attributes,
// namely start and empty tags
if ($token->type !== 'start' && $token->type !== 'empty') continue;
if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue;
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) continue;

View File

@ -38,7 +38,7 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
public function transform($tag, $config, $context) {
if ($tag->type == 'end') {
if ($tag instanceof HTMLPurifier_Token_End) {
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
return $new_tag;

View File

@ -25,7 +25,7 @@ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
if (!is_null($this->style) &&
($new_tag->type == 'start' || $new_tag->type == 'empty')
($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty)
) {
$this->prependCSS($new_tag->attr, $this->style);
}

View File

@ -27,6 +27,20 @@ class HTMLPurifier_Token {
public function copy() {
return unserialize(serialize($this));
}
public function __get($n) {
if ($n === 'type') {
trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
switch (get_class($this)) {
case 'HTMLPurifier_Token_Start': return 'start';
case 'HTMLPurifier_Token_Empty': return 'empty';
case 'HTMLPurifier_Token_End': return 'end';
case 'HTMLPurifier_Token_Text': return 'text';
case 'HTMLPurifier_Token_Comment': return 'comment';
default: return null;
}
}
}
}
/**
@ -84,18 +98,12 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
/**
* Concrete start token class.
*/
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
public $type = 'start';
}
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag {}
/**
* Concrete empty token class.
*/
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
public $type = 'empty';
}
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag {}
/**
* Concrete end token class.
@ -104,10 +112,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
* is for optimization reasons, as under normal circumstances, the Lexers
* do not pass attributes.
*/
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
public $type = 'end';
}
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag {}
/**
* Concrete text token class.
@ -122,7 +127,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
{
public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
public $type = 'text';
public $data; /**< Parsed character data of text. */
public $is_whitespace; /**< Bool indicating if node is whitespace. */
@ -145,7 +149,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
{
public $data; /**< Character data within comment. */
public $type = 'comment';
/**
* Transparent constructor.
*