diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php
index 83f3aaac..f6627f53 100644
--- a/PureHTMLDefinition.php
+++ b/PureHTMLDefinition.php
@@ -352,13 +352,58 @@ class HTMLDTD_Element
// true = leave nodes as is
// false = delete parent node and all children
// array(...) = replace children nodes with these
+
+// this is the hardest one to implement. We'll use fancy regexp tricks
+// right now, we only expect it to return TRUE or FALSE (it won't attempt
+// to fix the tree)
+
+// we may end up writing custom code for each HTML case
+// in order to make it self correcting
class HTMLDTD_ChildDef
{
var $dtd_regex;
+ var $_pcre_regex;
function HTMLDTD_ChildDef($dtd_regex) {
$this->dtd_regex = $dtd_regex;
+ $this->_compileRegex();
+ }
+ function _compileRegex() {
+ $raw = str_replace(' ', '', $this->dtd_regex);
+ if ($raw{0} != '(') {
+ $raw = "($raw)";
+ }
+ $reg = str_replace(',', ',?', $raw);
+ $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
+ $this->_pcre_regex = $reg;
+ }
+ function validateChildren($tokens_of_children) {
+ $list_of_children = '';
+ $nesting = 0; // depth into the nest
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) continue;
+
+ $is_child = ($nesting == 0); // direct
+
+ if (is_a($token, 'MF_StartTag')) {
+ $nesting++;
+ } elseif (is_a($token, 'MF_EndTag')) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $list_of_children .= $token->name . ',';
+ }
+ }
+ $list_of_children = rtrim($list_of_children, ',');
+
+ $okay =
+ preg_match(
+ '/^'.$this->_pcre_regex.'$/',
+ $list_of_children
+ );
+
+ return (bool) $okay;
}
- function validateChildren($tokens_of_children) {}
}
class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
{
@@ -373,6 +418,9 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
$this->elements = $elements;
$this->gen = new HTML_Generator();
}
+ function validateChildren() {
+ trigger_error('Cannot call abstract function!', E_USER_ERROR);
+ }
}
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
{
diff --git a/docs/spec.txt b/docs/spec.txt
index e5faa574..59d52ea1 100644
--- a/docs/spec.txt
+++ b/docs/spec.txt
@@ -153,6 +153,8 @@ So... here's the interesting code:
--
+// # This actually does the validation
+
// Validate the order of the children
if (!$was_error && count($dtd_children)) {
$children_list = implode(',', $children);
@@ -166,6 +168,8 @@ if (!$was_error && count($dtd_children)) {
--
+// # This figures out the PcreRegex
+
//$ch is a string of the allowed childs
$children = preg_split('/([^#a-zA-Z0-9_.-]+)/', $ch, -1, PREG_SPLIT_NO_EMPTY);
// check for parsed character data special case
@@ -222,7 +226,8 @@ form
must not contain other form elements.
Normative exclusions straight from the horses mouth. These are SGML style,
-not XML style, so we need to modify the ruleset slightly.
+not XML style, so we need to modify the ruleset slightly. However, the DTD
+may have done this for us already.
--
diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php
index ffa5bddf..aaffe26c 100644
--- a/tests/PureHTMLDefinition.php
+++ b/tests/PureHTMLDefinition.php
@@ -190,8 +190,61 @@ class Test_PureHTMLDefinition extends UnitTestCase
class Test_HTMLDTD_ChildDef extends UnitTestCase
{
+ function assertSeries($inputs, $expect, $def) {
+ foreach ($inputs as $i => $input) {
+ $result = $def->validateChildren($input);
+ if (is_bool($expect[$i])) {
+ $this->assertIdentical($expect[$i], $result);
+ } else {
+ $this->assertEqual($expect[$i], $result);
+ paintIf($result, $result != $expect[$i]);
+ }
+ }
+ }
+
+ function test_complex() {
+
+ // the table definition
+ $def = new HTMLDTD_ChildDef(
+ '(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
+
+ $inputs[0] = array();
+ $expect[0] = false;
+
+ // we really don't care what's inside, because if it turns out
+ // this tr is illegal, we'll end up re-evaluating the parent node
+ // anyway.
+ $inputs[1] = array(
+ new MF_StartTag('tr') ,new MF_EndTag('tr')
+ );
+ $expect[1] = true;
+
+ $inputs[2] = array(
+ new MF_StartTag('caption') ,new MF_EndTag('caption')
+ ,new MF_StartTag('col') ,new MF_EndTag('col')
+ ,new MF_StartTag('thead') ,new MF_EndTag('thead')
+ ,new MF_StartTag('tfoot') ,new MF_EndTag('tfoot')
+ ,new MF_StartTag('tbody') ,new MF_EndTag('tbody')
+ );
+ $expect[2] = true;
+
+ $inputs[3] = array(
+ new MF_StartTag('col') ,new MF_EndTag('col')
+ ,new MF_StartTag('col') ,new MF_EndTag('col')
+ ,new MF_StartTag('col') ,new MF_EndTag('col')
+ ,new MF_StartTag('tr') ,new MF_EndTag('tr')
+ );
+ $expect[3] = true;
+
+ $this->assertSeries($inputs, $expect, $def);
+
+ }
+
function test_simple() {
+ // simple is actually an abstract class
+ // but we're unit testing some of the conv. functions it gives
+
$def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo');
$this->assertEqual($def->elements,
array(
@@ -274,15 +327,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
);
$expect[5] = false;
- foreach ($inputs as $i => $input) {
- $result = $def->validateChildren($input);
- if (is_bool($expect[$i])) {
- $this->assertIdentical($expect[$i], $result);
- } else {
- $this->assertEqual($expect[$i], $result);
- paintIf($result, $result != $expect[$i]);
- }
- }
+ $this->assertSeries($inputs, $expect, $def);
}