diff --git a/configdoc/usage.xml b/configdoc/usage.xml
index 983d0bba..56eeafee 100644
--- a/configdoc/usage.xml
+++ b/configdoc/usage.xml
@@ -498,7 +498,7 @@
- 64
+ 67
diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php
index d4904da0..28a93de9 100644
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -19,6 +19,7 @@
*/
require 'HTMLPurifier.php';
+require 'HTMLPurifier/Arborize.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
@@ -54,6 +55,7 @@ require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
+require 'HTMLPurifier/Node.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
@@ -191,6 +193,9 @@ require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
+require 'HTMLPurifier/Node/Comment.php';
+require 'HTMLPurifier/Node/Element.php';
+require 'HTMLPurifier/Node/Text.php';
require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php';
diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php
index c87d0411..9dea6d1e 100644
--- a/library/HTMLPurifier.safe-includes.php
+++ b/library/HTMLPurifier.safe-includes.php
@@ -13,6 +13,7 @@
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
+require_once $__dir . '/HTMLPurifier/Arborize.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
@@ -48,6 +49,7 @@ require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
+require_once $__dir . '/HTMLPurifier/Node.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
@@ -185,6 +187,9 @@ require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
+require_once $__dir . '/HTMLPurifier/Node/Comment.php';
+require_once $__dir . '/HTMLPurifier/Node/Element.php';
+require_once $__dir . '/HTMLPurifier/Node/Text.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
diff --git a/library/HTMLPurifier/Arborize.php b/library/HTMLPurifier/Arborize.php
new file mode 100644
index 00000000..9e6617be
--- /dev/null
+++ b/library/HTMLPurifier/Arborize.php
@@ -0,0 +1,71 @@
+getHTMLDefinition();
+ $parent = new HTMLPurifier_Token_Start($definition->info_parent);
+ $stack = array($parent->toNode());
+ foreach ($tokens as $token) {
+ $token->skip = null; // [MUT]
+ $token->carryover = null; // [MUT]
+ if ($token instanceof HTMLPurifier_Token_End) {
+ $token->start = null; // [MUT]
+ $r = array_pop($stack);
+ assert($r->name === $token->name);
+ assert(empty($token->attr));
+ $r->endCol = $token->col;
+ $r->endLine = $token->line;
+ $r->endArmor = $token->armor;
+ continue;
+ }
+ $node = $token->toNode();
+ $stack[count($stack)-1]->children[] = $node;
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $stack[] = $node;
+ }
+ }
+ assert(count($stack) == 1);
+ return $stack[0];
+ }
+
+ public static function flatten($node, $config, $context) {
+ $level = 0;
+ $nodes = array($level => new HTMLPurifier_Queue(array($node)));
+ $closingTokens = array();
+ $tokens = array();
+ do {
+ while (!$nodes[$level]->isEmpty()) {
+ $node = $nodes[$level]->shift(); // FIFO
+ list($start, $end) = $node->toTokenPair();
+ if ($level > 0) {
+ $tokens[] = $start;
+ }
+ if ($end !== NULL) {
+ $closingTokens[$level][] = $end;
+ }
+ if ($node instanceof HTMLPurifier_Node_Element) {
+ $level++;
+ $nodes[$level] = new HTMLPurifier_Queue();
+ foreach ($node->children as $childNode) {
+ $nodes[$level]->push($childNode);
+ }
+ }
+ }
+ $level--;
+ if ($level && isset($closingTokens[$level])) {
+ while ($token = array_pop($closingTokens[$level])) {
+ $tokens[] = $token;
+ }
+ }
+ } while ($level > 0);
+ return $tokens;
+ }
+}
diff --git a/library/HTMLPurifier/Node.php b/library/HTMLPurifier/Node.php
new file mode 100644
index 00000000..9e239b3c
--- /dev/null
+++ b/library/HTMLPurifier/Node.php
@@ -0,0 +1,40 @@
+data = $data;
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+ public function toTokenPair() {
+ return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null);
+ }
+}
diff --git a/library/HTMLPurifier/Node/Element.php b/library/HTMLPurifier/Node/Element.php
new file mode 100644
index 00000000..6cbf56da
--- /dev/null
+++ b/library/HTMLPurifier/Node/Element.php
@@ -0,0 +1,59 @@
+ form or the form, i.e.
+ * is it a pair of start/end tokens or an empty token.
+ * @bool
+ */
+ public $empty = false;
+
+ public $endCol = null, $endLine = null, $endArmor = array();
+
+ public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
+ $this->name = $name;
+ $this->attr = $attr;
+ $this->line = $line;
+ $this->col = $col;
+ $this->armor = $armor;
+ }
+
+ public function toTokenPair() {
+ // XXX inefficiency here, normalization is not necessary
+ if ($this->empty) {
+ return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null);
+ } else {
+ $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor);
+ $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor);
+ //$end->start = $start;
+ return array($start, $end);
+ }
+ }
+}
+
diff --git a/library/HTMLPurifier/Node/Text.php b/library/HTMLPurifier/Node/Text.php
new file mode 100644
index 00000000..03dc1b20
--- /dev/null
+++ b/library/HTMLPurifier/Node/Text.php
@@ -0,0 +1,47 @@
+data = $data;
+ $this->is_whitespace = $is_whitespace;
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+ public function toTokenPair() {
+ return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null);
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index d00c6d04..f78ad086 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -58,6 +58,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
//####################################################################//
// Pre-processing
+ //$node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
+ //$new_tokens = HTMLPurifier_Arborize::flatten($node, $config, $context);
+
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php
index b1ff93c3..85b85e07 100644
--- a/library/HTMLPurifier/Token.php
+++ b/library/HTMLPurifier/Token.php
@@ -90,6 +90,11 @@ abstract class HTMLPurifier_Token
$this->line = $l;
$this->col = $c;
}
+
+ /**
+ * Converts a token into its corresponding node.
+ */
+ abstract public function toNode();
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Comment.php b/library/HTMLPurifier/Token/Comment.php
index ae90ad21..23453c70 100644
--- a/library/HTMLPurifier/Token/Comment.php
+++ b/library/HTMLPurifier/Token/Comment.php
@@ -29,6 +29,10 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
$this->line = $line;
$this->col = $col;
}
+
+ public function toNode() {
+ return new HTMLPurifier_Node_Comment($this->data, $this->line, $this->col);
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Empty.php b/library/HTMLPurifier/Token/Empty.php
index c54fbc82..78a95f55 100644
--- a/library/HTMLPurifier/Token/Empty.php
+++ b/library/HTMLPurifier/Token/Empty.php
@@ -5,6 +5,11 @@
*/
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
+ public function toNode() {
+ $n = parent::toNode();
+ $n->empty = true;
+ return $n;
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/End.php b/library/HTMLPurifier/Token/End.php
index bfc5e3f1..59b38fdc 100644
--- a/library/HTMLPurifier/Token/End.php
+++ b/library/HTMLPurifier/Token/End.php
@@ -15,6 +15,10 @@ class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
* @type HTMLPurifier_Token
*/
public $start;
+
+ public function toNode() {
+ throw new Exception("HTMLPurifier_Token_End->toNode not supported!");
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Tag.php b/library/HTMLPurifier/Token/Tag.php
index f65235a9..d643fa64 100644
--- a/library/HTMLPurifier/Token/Tag.php
+++ b/library/HTMLPurifier/Token/Tag.php
@@ -59,6 +59,10 @@ abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
$this->col = $col;
$this->armor = $armor;
}
+
+ public function toNode() {
+ return new HTMLPurifier_Node_Element($this->name, $this->attr, $this->line, $this->col, $this->armor);
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Text.php b/library/HTMLPurifier/Token/Text.php
index 75d25e3f..f26a1c21 100644
--- a/library/HTMLPurifier/Token/Text.php
+++ b/library/HTMLPurifier/Token/Text.php
@@ -44,6 +44,10 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
$this->line = $line;
$this->col = $col;
}
+
+ public function toNode() {
+ return new HTMLPurifier_Node_Text($this->data, $this->is_whitespace, $this->line, $this->col);
+ }
}
// vim: et sw=4 sts=4