mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 23:28:42 +00:00
Major optimization on tokenizeDOM(), reduce execution time from 75% to 20% by passing tokens by reference and using a token factory.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@265 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
ed79facadf
commit
acd7ceb940
@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
require_once 'HTMLPurifier/TokenFactory.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||||
@ -25,6 +26,13 @@ require_once 'HTMLPurifier/Lexer.php';
|
|||||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
|
private $factory;
|
||||||
|
|
||||||
|
public function __construct() {
|
||||||
|
// setup the factory
|
||||||
|
$this->factory = new HTMLPurifier_TokenFactory();
|
||||||
|
}
|
||||||
|
|
||||||
public function tokenizeHTML($string, $config = null) {
|
public function tokenizeHTML($string, $config = null) {
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
@ -50,10 +58,12 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
@$doc->loadHTML($string); // mute all errors, handle it transparently
|
@$doc->loadHTML($string); // mute all errors, handle it transparently
|
||||||
|
|
||||||
return $this->tokenizeDOM(
|
$tokens = array();
|
||||||
|
$this->tokenizeDOM(
|
||||||
$doc->childNodes->item(1)-> // html
|
$doc->childNodes->item(1)-> // html
|
||||||
getElementsByTagName('body')->item(0) // body
|
getElementsByTagName('body')->item(0) // body
|
||||||
);
|
, $tokens);
|
||||||
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -66,33 +76,33 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
* tag you're dealing with.
|
* tag you're dealing with.
|
||||||
* @returns Tokens of node appended to previously passed tokens.
|
* @returns Tokens of node appended to previously passed tokens.
|
||||||
*/
|
*/
|
||||||
protected function tokenizeDOM($node, $tokens = array(), $collect = false) {
|
protected function tokenizeDOM($node, &$tokens, $collect = false) {
|
||||||
// recursive goodness!
|
// recursive goodness!
|
||||||
|
|
||||||
// intercept non element nodes
|
// intercept non element nodes
|
||||||
|
|
||||||
if ( !($node instanceof DOMElement) ) {
|
if ( !($node instanceof DOMElement) ) {
|
||||||
if ($node instanceof DOMComment) {
|
if ($node instanceof DOMComment) {
|
||||||
$tokens[] = new HTMLPurifier_Token_Comment($node->data);
|
$tokens[] = $this->factory->createComment($node->data);
|
||||||
} elseif ($node instanceof DOMText ||
|
} elseif ($node instanceof DOMText ||
|
||||||
$node instanceof DOMCharacterData) {
|
$node instanceof DOMCharacterData) {
|
||||||
$tokens[] = new HTMLPurifier_Token_Text($node->data);
|
$tokens[] = $this->factory->createText($node->data);
|
||||||
}
|
}
|
||||||
// quite possibly, the object wasn't handled, that's fine
|
// quite possibly, the object wasn't handled, that's fine
|
||||||
return $tokens;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We still have to make sure that the element actually IS empty
|
// We still have to make sure that the element actually IS empty
|
||||||
if (!$node->hasChildNodes()) {
|
if (!$node->hasChildNodes()) {
|
||||||
if ($collect) {
|
if ($collect) {
|
||||||
$tokens[] = new HTMLPurifier_Token_Empty(
|
$tokens[] = $this->factory->createEmpty(
|
||||||
$node->tagName,
|
$node->tagName,
|
||||||
$this->transformAttrToAssoc($node->attributes)
|
$this->transformAttrToAssoc($node->attributes)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ($collect) { // don't wrap on first iteration
|
if ($collect) { // don't wrap on first iteration
|
||||||
$tokens[] = new HTMLPurifier_Token_Start(
|
$tokens[] = $this->factory->createStart(
|
||||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||||
$this->transformAttrToAssoc($node->attributes)
|
$this->transformAttrToAssoc($node->attributes)
|
||||||
);
|
);
|
||||||
@ -100,15 +110,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
foreach ($node->childNodes as $node) {
|
foreach ($node->childNodes as $node) {
|
||||||
// remember, it's an accumulator. Otherwise, we'd have
|
// remember, it's an accumulator. Otherwise, we'd have
|
||||||
// to use array_merge
|
// to use array_merge
|
||||||
$tokens = $this->tokenizeDOM($node, $tokens, true);
|
$this->tokenizeDOM($node, $tokens, true);
|
||||||
}
|
}
|
||||||
if ($collect) {
|
if ($collect) {
|
||||||
$tokens[] = new HTMLPurifier_Token_End($tag_name);
|
$tokens[] = $this->factory->createEnd($tag_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $tokens;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -51,6 +51,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
|||||||
* @param $attributes Associative array of attributes.
|
* @param $attributes Associative array of attributes.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
||||||
|
//if ($attributes === null) var_dump(debug_backtrace());
|
||||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||||
foreach ($attributes as $key => $value) {
|
foreach ($attributes as $key => $value) {
|
||||||
// normalization only necessary when key is not lowercase
|
// normalization only necessary when key is not lowercase
|
||||||
|
51
library/HTMLPurifier/TokenFactory.php
Normal file
51
library/HTMLPurifier/TokenFactory.php
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Token.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_TokenFactory
|
||||||
|
{
|
||||||
|
|
||||||
|
// p stands for prototype
|
||||||
|
private $p_start, $p_end, $p_empty, $p_text, $p_comment;
|
||||||
|
|
||||||
|
public function __construct() {
|
||||||
|
$this->p_start = new HTMLPurifier_Token_Start('', array());
|
||||||
|
$this->p_end = new HTMLPurifier_Token_End('');
|
||||||
|
$this->p_empty = new HTMLPurifier_Token_Empty('', array());
|
||||||
|
$this->p_text = new HTMLPurifier_Token_Text('');
|
||||||
|
$this->p_comment= new HTMLPurifier_Token_Comment('');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function createStart($name, $attributes = array()) {
|
||||||
|
$p = clone $this->p_start;
|
||||||
|
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
||||||
|
return $p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function createEnd($name) {
|
||||||
|
$p = clone $this->p_end;
|
||||||
|
$p->HTMLPurifier_Token_Tag($name);
|
||||||
|
return $p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function createEmpty($name, $attributes = array()) {
|
||||||
|
$p = clone $this->p_empty;
|
||||||
|
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
||||||
|
return $p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function createText($data) {
|
||||||
|
$p = clone $this->p_text;
|
||||||
|
$p->HTMLPurifier_Token_Text($data);
|
||||||
|
return $p;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function createComment($data) {
|
||||||
|
$p = clone $this->p_comment;
|
||||||
|
$p->HTMLPurifier_Token_Comment($data);
|
||||||
|
return $p;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
19
tests/HTMLPurifier/TokenFactoryTest.php
Normal file
19
tests/HTMLPurifier/TokenFactoryTest.php
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/TokenFactory.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_TokenFactoryTest extends UnitTestCase
|
||||||
|
{
|
||||||
|
public function test() {
|
||||||
|
|
||||||
|
$factory = new HTMLPurifier_TokenFactory();
|
||||||
|
|
||||||
|
$regular = new HTMLPurifier_Token_Start('a', array('href' => 'about:blank'));
|
||||||
|
$generated = $factory->createStart('a', array('href' => 'about:blank'));
|
||||||
|
|
||||||
|
$this->assertEqual($regular, $generated);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -79,6 +79,10 @@ $test_files[] = 'AttrTransform/ImgRequiredTest.php';
|
|||||||
$test_files[] = 'URISchemeRegistryTest.php';
|
$test_files[] = 'URISchemeRegistryTest.php';
|
||||||
$test_files[] = 'URISchemeTest.php';
|
$test_files[] = 'URISchemeTest.php';
|
||||||
|
|
||||||
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
|
$test_files[] = 'TokenFactoryTest.php';
|
||||||
|
}
|
||||||
|
|
||||||
$test_file_lookup = array_flip($test_files);
|
$test_file_lookup = array_flip($test_files);
|
||||||
|
|
||||||
function htmlpurifier_path2class($path) {
|
function htmlpurifier_path2class($path) {
|
||||||
|
Loading…
Reference in New Issue
Block a user