diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index 0df13ae5..07b12d22 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -1,6 +1,7 @@
factory = new HTMLPurifier_TokenFactory();
+ }
+
public function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
@@ -50,10 +58,12 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
@$doc->loadHTML($string); // mute all errors, handle it transparently
- return $this->tokenizeDOM(
+ $tokens = array();
+ $this->tokenizeDOM(
$doc->childNodes->item(1)-> // html
getElementsByTagName('body')->item(0) // body
- );
+ , $tokens);
+ return $tokens;
}
/**
@@ -66,33 +76,33 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
* tag you're dealing with.
* @returns Tokens of node appended to previously passed tokens.
*/
- protected function tokenizeDOM($node, $tokens = array(), $collect = false) {
+ protected function tokenizeDOM($node, &$tokens, $collect = false) {
// recursive goodness!
// intercept non element nodes
if ( !($node instanceof DOMElement) ) {
if ($node instanceof DOMComment) {
- $tokens[] = new HTMLPurifier_Token_Comment($node->data);
+ $tokens[] = $this->factory->createComment($node->data);
} elseif ($node instanceof DOMText ||
$node instanceof DOMCharacterData) {
- $tokens[] = new HTMLPurifier_Token_Text($node->data);
+ $tokens[] = $this->factory->createText($node->data);
}
// quite possibly, the object wasn't handled, that's fine
- return $tokens;
+ return;
}
// We still have to make sure that the element actually IS empty
if (!$node->hasChildNodes()) {
if ($collect) {
- $tokens[] = new HTMLPurifier_Token_Empty(
+ $tokens[] = $this->factory->createEmpty(
$node->tagName,
$this->transformAttrToAssoc($node->attributes)
);
}
} else {
if ($collect) { // don't wrap on first iteration
- $tokens[] = new HTMLPurifier_Token_Start(
+ $tokens[] = $this->factory->createStart(
$tag_name = $node->tagName, // somehow, it get's dropped
$this->transformAttrToAssoc($node->attributes)
);
@@ -100,15 +110,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
foreach ($node->childNodes as $node) {
// remember, it's an accumulator. Otherwise, we'd have
// to use array_merge
- $tokens = $this->tokenizeDOM($node, $tokens, true);
+ $this->tokenizeDOM($node, $tokens, true);
}
if ($collect) {
- $tokens[] = new HTMLPurifier_Token_End($tag_name);
+ $tokens[] = $this->factory->createEnd($tag_name);
}
}
- return $tokens;
-
}
/**
diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php
index ed46621b..f53743b9 100644
--- a/library/HTMLPurifier/Token.php
+++ b/library/HTMLPurifier/Token.php
@@ -51,6 +51,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
* @param $attributes Associative array of attributes.
*/
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
+ //if ($attributes === null) var_dump(debug_backtrace());
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attributes as $key => $value) {
// normalization only necessary when key is not lowercase
diff --git a/library/HTMLPurifier/TokenFactory.php b/library/HTMLPurifier/TokenFactory.php
new file mode 100644
index 00000000..8c761fbb
--- /dev/null
+++ b/library/HTMLPurifier/TokenFactory.php
@@ -0,0 +1,51 @@
+p_start = new HTMLPurifier_Token_Start('', array());
+ $this->p_end = new HTMLPurifier_Token_End('');
+ $this->p_empty = new HTMLPurifier_Token_Empty('', array());
+ $this->p_text = new HTMLPurifier_Token_Text('');
+ $this->p_comment= new HTMLPurifier_Token_Comment('');
+ }
+
+ public function createStart($name, $attributes = array()) {
+ $p = clone $this->p_start;
+ $p->HTMLPurifier_Token_Tag($name, $attributes);
+ return $p;
+ }
+
+ public function createEnd($name) {
+ $p = clone $this->p_end;
+ $p->HTMLPurifier_Token_Tag($name);
+ return $p;
+ }
+
+ public function createEmpty($name, $attributes = array()) {
+ $p = clone $this->p_empty;
+ $p->HTMLPurifier_Token_Tag($name, $attributes);
+ return $p;
+ }
+
+ public function createText($data) {
+ $p = clone $this->p_text;
+ $p->HTMLPurifier_Token_Text($data);
+ return $p;
+ }
+
+ public function createComment($data) {
+ $p = clone $this->p_comment;
+ $p->HTMLPurifier_Token_Comment($data);
+ return $p;
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/TokenFactoryTest.php b/tests/HTMLPurifier/TokenFactoryTest.php
new file mode 100644
index 00000000..9995ef74
--- /dev/null
+++ b/tests/HTMLPurifier/TokenFactoryTest.php
@@ -0,0 +1,19 @@
+ 'about:blank'));
+ $generated = $factory->createStart('a', array('href' => 'about:blank'));
+
+ $this->assertEqual($regular, $generated);
+
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/tests/index.php b/tests/index.php
index ae6566ef..8aab3589 100644
--- a/tests/index.php
+++ b/tests/index.php
@@ -79,6 +79,10 @@ $test_files[] = 'AttrTransform/ImgRequiredTest.php';
$test_files[] = 'URISchemeRegistryTest.php';
$test_files[] = 'URISchemeTest.php';
+if (version_compare(PHP_VERSION, '5', '>=')) {
+ $test_files[] = 'TokenFactoryTest.php';
+}
+
$test_file_lookup = array_flip($test_files);
function htmlpurifier_path2class($path) {