diff --git a/NEWS b/NEWS index f62bca8c..f9858c0e 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier - Autoclose now operates iteratively, i.e.
now has both span tags closed. . Plugins now get their own changelogs according to project conventions. +. Convert tokens to use instanceof, reducing memory footprint and + improving comparison speed. 3.0.0, released 2008-01-06 # HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained diff --git a/library/HTMLPurifier/AttrValidator.php b/library/HTMLPurifier/AttrValidator.php index 092cf152..3b2bd4b3 100644 --- a/library/HTMLPurifier/AttrValidator.php +++ b/library/HTMLPurifier/AttrValidator.php @@ -34,7 +34,10 @@ class HTMLPurifier_AttrValidator $current_token =& $context->get('CurrentToken', true); if (!$current_token) $context->register('CurrentToken', $token); - if ($token->type !== 'start' && $token->type !== 'empty') return $token; + if ( + !$token instanceof HTMLPurifier_Token_Start && + !$token instanceof HTMLPurifier_Token_Empty + ) return $token; // create alias to global definition array, see also $defs // DEFINITION CALL diff --git a/library/HTMLPurifier/ChildDef/Custom.php b/library/HTMLPurifier/ChildDef/Custom.php index 2b062d0b..5f5210fe 100644 --- a/library/HTMLPurifier/ChildDef/Custom.php +++ b/library/HTMLPurifier/ChildDef/Custom.php @@ -69,9 +69,9 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef $is_child = ($nesting == 0); // direct - if ($token->type == 'start') { + if ($token instanceof HTMLPurifier_Token_Start) { $nesting++; - } elseif ($token->type == 'end') { + } elseif ($token instanceof HTMLPurifier_Token_End) { $nesting--; } diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php index 0438b440..28ad0066 100644 --- a/library/HTMLPurifier/ChildDef/Required.php +++ b/library/HTMLPurifier/ChildDef/Required.php @@ -71,9 +71,9 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $is_child = ($nesting == 0); - if ($token->type == 'start') { + if ($token instanceof HTMLPurifier_Token_Start) { $nesting++; - } elseif ($token->type == 'end') { + } elseif ($token instanceof HTMLPurifier_Token_End) { $nesting--; } @@ -81,7 +81,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $is_deleting = false; if (!isset($this->elements[$token->name])) { $is_deleting = true; - if ($pcdata_allowed && $token->type == 'text') { + if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { $result[] = $token; } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = new HTMLPurifier_Token_Text( @@ -91,7 +91,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef continue; } } - if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) { + if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { $result[] = $token; } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php index b018d6ed..53a3523c 100644 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -45,8 +45,8 @@ extends HTMLPurifier_ChildDef_Required if (!$is_inline) { if (!$depth) { if ( - ($token->type == 'text' && !$token->is_whitespace) || - ($token->type != 'text' && !isset($this->elements[$token->name])) + ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || + (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) ) { $is_inline = true; $ret[] = $block_wrap_start; @@ -55,7 +55,7 @@ extends HTMLPurifier_ChildDef_Required } else { if (!$depth) { // starting tokens have been inline text / empty - if ($token->type == 'start' || $token->type == 'empty') { + if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { if (isset($this->elements[$token->name])) { // ended $ret[] = $block_wrap_end; @@ -65,8 +65,8 @@ extends HTMLPurifier_ChildDef_Required } } $ret[] = $token; - if ($token->type == 'start') $depth++; - if ($token->type == 'end') $depth--; + if ($token instanceof HTMLPurifier_Token_Start) $depth++; + if ($token instanceof HTMLPurifier_Token_End) $depth--; } if ($is_inline) $ret[] = $block_wrap_end; return $ret; diff --git a/library/HTMLPurifier/ChildDef/Table.php b/library/HTMLPurifier/ChildDef/Table.php index dd7f6b55..6994587b 100644 --- a/library/HTMLPurifier/ChildDef/Table.php +++ b/library/HTMLPurifier/ChildDef/Table.php @@ -41,9 +41,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef if ($token === false) { // terminating sequence started - } elseif ($token->type == 'start') { + } elseif ($token instanceof HTMLPurifier_Token_Start) { $nesting++; - } elseif ($token->type == 'end') { + } elseif ($token instanceof HTMLPurifier_Token_End) { $nesting--; } @@ -112,7 +112,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef $collection[] = $token; continue; default: - if ($token->type == 'text' && $token->is_whitespace) { + if ($token instanceof HTMLPurifier_Token_Text && $token->is_whitespace) { $collection[] = $token; $tag_index++; } diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index b4a6d6ed..981c5b9f 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -89,7 +89,7 @@ class HTMLPurifier_Generator if (!$tokens) return ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' - && $i + 2 < $size && $tokens[$i+2]->type == 'end') { + && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { // script special case // the contents of the script block must be ONE token // for this to work @@ -139,21 +139,21 @@ class HTMLPurifier_Generator * @return Generated HTML */ public function generateFromToken($token) { - if (!isset($token->type)) return ''; - if ($token->type == 'start') { + if (!$token instanceof HTMLPurifier_Token) return ''; + if ($token instanceof HTMLPurifier_Token_Start) { $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; - } elseif ($token->type == 'end') { + } elseif ($token instanceof HTMLPurifier_Token_End) { return 'name . '>'; - } elseif ($token->type == 'empty') { + } elseif ($token instanceof HTMLPurifier_Token_Empty) { $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) . '>'; - } elseif ($token->type == 'text') { + } elseif ($token instanceof HTMLPurifier_Token_Text) { return $this->escape($token->data); } else { @@ -168,7 +168,7 @@ class HTMLPurifier_Generator * --> somewhere inside the script contents. */ public function generateScriptFromToken($token) { - if ($token->type != 'text') return $this->generateFromToken($token); + if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); // return ''; // more advanced version: // thanks diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index cfa9094e..b3daa5e1 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -72,7 +72,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // a double newline in them $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { - if ($this->inputTokens[$i]->type == 'start'){ + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){ if (!$this->_isInline($this->inputTokens[$i])) { // we haven't found a double-newline, and // we've hit a block element, so don't paragraph @@ -81,11 +81,11 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector } $nesting++; } - if ($this->inputTokens[$i]->type == 'end') { + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) { if ($nesting <= 0) break; $nesting--; } - if ($this->inputTokens[$i]->type == 'text') { + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) { // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; @@ -117,12 +117,12 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // check if this token is adjacent to the parent token $prev = $this->inputTokens[$this->inputIndex - 1]; - if ($prev->type != 'start') { + if (!$prev instanceof HTMLPurifier_Token_Start) { // not adjacent, we can abort early // add lead paragraph tag if our token is inline // and the previous tag was an end paragraph if ( - $prev->name == 'p' && $prev->type == 'end' && + $prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End && $this->_isInline($token) ) { $token = array($this->_pStart(), $token); @@ -139,9 +139,9 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // early if possible $j = 1; // current nesting, one is due to parent (we recalculate current token) for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) { - if ($this->inputTokens[$i]->type == 'start') $j++; - if ($this->inputTokens[$i]->type == 'end') $j--; - if ($this->inputTokens[$i]->type == 'text') { + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++; + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--; + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) { if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; break; @@ -248,14 +248,14 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector if (!$needs_end) { // Start of the checks one after the current token's index for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { - if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') { + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) { $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]); } // check if we can abort early (whitespace means we carry-on!) - if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break; + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break; // end tags will automatically be handled by MakeWellFormed, // so we don't have to worry about them - if ($this->inputTokens[$i]->type == 'end') break; + if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break; } } else { $remove_paragraph_end = false; diff --git a/library/HTMLPurifier/Lexer/PEARSax3.php b/library/HTMLPurifier/Lexer/PEARSax3.php index c359a613..ba5022fc 100644 --- a/library/HTMLPurifier/Lexer/PEARSax3.php +++ b/library/HTMLPurifier/Lexer/PEARSax3.php @@ -74,7 +74,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer // HTMLSax3 seems to always send empty tags an extra close tag // check and ignore if you see it: // [TESTME] to make sure it doesn't overreach - if ($this->tokens[count($this->tokens)-1]->type == 'empty') { + if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { return true; } $this->tokens[] = new HTMLPurifier_Token_End($name); diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index 14e7ab83..ea48f8d3 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -91,12 +91,12 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // scroll to the end of this node, report number, and collect // all children for ($j = $i, $depth = 0; ; $j++) { - if ($tokens[$j]->type == 'start') { + if ($tokens[$j] instanceof HTMLPurifier_Token_Start) { $depth++; // skip token assignment on first iteration, this is the // token we currently are on if ($depth == 1) continue; - } elseif ($tokens[$j]->type == 'end') { + } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) { $depth--; // skip token assignment on last iteration, this is the // end token of the token we're currently on @@ -287,8 +287,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // Test if the token indeed is a start tag, if not, move forward // and test again. $size = count($tokens); - while ($i < $size and $tokens[$i]->type != 'start') { - if ($tokens[$i]->type == 'end') { + while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) { + if ($tokens[$i] instanceof HTMLPurifier_Token_End) { // pop a token index off the stack if we ended a node array_pop($stack); // pop an exclusion lookup off exclusion stack if diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index 438d3f0f..39c600ff 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -110,7 +110,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // quick-check: if it's not a tag, no need to process if (empty( $token->is_tag )) { - if ($token->type === 'text') { + if ($token instanceof HTMLPurifier_Token_Text) { // injector handler code; duplicated for performance reasons foreach ($this->injectors as $i => $injector) { if (!$injector->skip) $injector->handleText($token); @@ -128,21 +128,21 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // quick tag checks: anything that's *not* an end tag $ok = false; - if ($info->type == 'empty' && $token->type == 'start') { + if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { // test if it claims to be a start tag but is empty $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); $ok = true; - } elseif ($info->type != 'empty' && $token->type == 'empty' ) { + } elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { // claims to be empty but really is a start tag $token = array( new HTMLPurifier_Token_Start($token->name, $token->attr), new HTMLPurifier_Token_End($token->name) ); $ok = true; - } elseif ($token->type == 'empty') { + } elseif ($token instanceof HTMLPurifier_Token_Empty) { // real empty token $ok = true; - } elseif ($token->type == 'start') { + } elseif ($token instanceof HTMLPurifier_Token_Start) { // start tag // ...unless they also have to close their parent @@ -181,7 +181,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } // sanity check: we should be dealing with a closing tag - if ($token->type != 'end') continue; + if (!$token instanceof HTMLPurifier_Token_End) continue; // make sure that we have something open if (empty($this->currentNesting)) { @@ -304,9 +304,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } elseif ($token) { // regular case $this->outputTokens[] = $token; - if ($token->type == 'start') { + if ($token instanceof HTMLPurifier_Token_Start) { $this->currentNesting[] = $token; - } elseif ($token->type == 'end') { + } elseif ($token instanceof HTMLPurifier_Token_End) { array_pop($this->currentNesting); // not actually used } } diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php index 8c39f9a9..fe9e4025 100644 --- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -116,7 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // mostly everything's good, but // we need to make sure required attributes are in order if ( - ($token->type === 'start' || $token->type === 'empty') && + ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img) // ensure config option still works ) { @@ -135,9 +135,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $token->armor['ValidateAttributes'] = true; } - if (isset($hidden_elements[$token->name]) && $token->type == 'start') { + if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { $textify_comments = $token->name; - } elseif ($token->name === $textify_comments && $token->type == 'end') { + } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { $textify_comments = false; } @@ -151,9 +151,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // check if we need to destroy all of the tag's children // CAN BE GENERICIZED if (isset($hidden_elements[$token->name])) { - if ($token->type == 'start') { + if ($token instanceof HTMLPurifier_Token_Start) { $remove_until = $token->name; - } elseif ($token->type == 'empty') { + } elseif ($token instanceof HTMLPurifier_Token_Empty) { // do nothing: we're still looking } else { $remove_until = false; @@ -164,7 +164,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy } continue; } - } elseif ($token->type == 'comment') { + } elseif ($token instanceof HTMLPurifier_Token_Comment) { // textify comments in script tags when they are allowed if ($textify_comments !== false) { $data = $token->data; @@ -174,7 +174,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); continue; } - } elseif ($token->type == 'text') { + } elseif ($token instanceof HTMLPurifier_Token_Text) { } else { continue; } diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php index 8cb42fbb..8c2f20c5 100644 --- a/library/HTMLPurifier/Strategy/ValidateAttributes.php +++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php @@ -25,7 +25,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy // only process tokens that have attributes, // namely start and empty tags - if ($token->type !== 'start' && $token->type !== 'empty') continue; + if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue; // skip tokens that are armored if (!empty($token->armor['ValidateAttributes'])) continue; diff --git a/library/HTMLPurifier/TagTransform/Font.php b/library/HTMLPurifier/TagTransform/Font.php index 607199ed..a766c97e 100644 --- a/library/HTMLPurifier/TagTransform/Font.php +++ b/library/HTMLPurifier/TagTransform/Font.php @@ -38,7 +38,7 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform public function transform($tag, $config, $context) { - if ($tag->type == 'end') { + if ($tag instanceof HTMLPurifier_Token_End) { $new_tag = $tag->copy(); $new_tag->name = $this->transform_to; return $new_tag; diff --git a/library/HTMLPurifier/TagTransform/Simple.php b/library/HTMLPurifier/TagTransform/Simple.php index d1e52ba6..88195132 100644 --- a/library/HTMLPurifier/TagTransform/Simple.php +++ b/library/HTMLPurifier/TagTransform/Simple.php @@ -25,7 +25,7 @@ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform $new_tag = $tag->copy(); $new_tag->name = $this->transform_to; if (!is_null($this->style) && - ($new_tag->type == 'start' || $new_tag->type == 'empty') + ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty) ) { $this->prependCSS($new_tag->attr, $this->style); } diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php index 27eb9c12..9d7b0c00 100644 --- a/library/HTMLPurifier/Token.php +++ b/library/HTMLPurifier/Token.php @@ -27,6 +27,20 @@ class HTMLPurifier_Token { public function copy() { return unserialize(serialize($this)); } + + public function __get($n) { + if ($n === 'type') { + trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE); + switch (get_class($this)) { + case 'HTMLPurifier_Token_Start': return 'start'; + case 'HTMLPurifier_Token_Empty': return 'empty'; + case 'HTMLPurifier_Token_End': return 'end'; + case 'HTMLPurifier_Token_Text': return 'text'; + case 'HTMLPurifier_Token_Comment': return 'comment'; + default: return null; + } + } + } } /** @@ -84,18 +98,12 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract /** * Concrete start token class. */ -class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag -{ - public $type = 'start'; -} +class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag {} /** * Concrete empty token class. */ -class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag -{ - public $type = 'empty'; -} +class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag {} /** * Concrete end token class. @@ -104,10 +112,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag * is for optimization reasons, as under normal circumstances, the Lexers * do not pass attributes. */ -class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag -{ - public $type = 'end'; -} +class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag {} /** * Concrete text token class. @@ -122,7 +127,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token { public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */ - public $type = 'text'; public $data; /**< Parsed character data of text. */ public $is_whitespace; /**< Bool indicating if node is whitespace. */ @@ -145,7 +149,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token class HTMLPurifier_Token_Comment extends HTMLPurifier_Token { public $data; /**< Character data within comment. */ - public $type = 'comment'; /** * Transparent constructor. *