diff --git a/HTML_Generator.php b/HTML_Generator.php
index b8aeef23..9726267d 100644
--- a/HTML_Generator.php
+++ b/HTML_Generator.php
@@ -21,7 +21,7 @@ class HTML_Generator
} elseif (is_a($token, 'MF_EmptyTag')) {
$attr = $this->generateAttributes($token->attributes);
- return '<' . $token->name . ' ' . $attr . ' />';
+ return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
} elseif (is_a($token, 'MF_Text')) {
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
diff --git a/MarkupFragment.php b/MarkupFragment.php
index 3d99d4dd..f94ecb27 100644
--- a/MarkupFragment.php
+++ b/MarkupFragment.php
@@ -31,9 +31,10 @@ class MF_Text extends MF
{
var $name = '#PCDATA';
var $data;
+ var $is_whitespace = false;
function MF_Text($data) {
- $this->data = trim($data); // fairly certain trimming it's okay
- // but it's not default SAX behavior
+ $this->data = $data;
+ if (trim($data, " \n\r\t") === '') $this->is_whitespace = true;
}
function append($mf_text) {
return new MF_Text($this->data . $mf_text->data);
diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php
index 80bb530e..1e189550 100644
--- a/PureHTMLDefinition.php
+++ b/PureHTMLDefinition.php
@@ -342,6 +342,10 @@ class HTMLDTD_Element
}
+// HTMLDTD_ChildDef and inheritance have three types of output:
+// true = leave nodes as is
+// false = delete parent node and all children
+// array(...) = replace children nodes with these
class HTMLDTD_ChildDef
{
var $dtd_regex;
@@ -354,13 +358,76 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
{
var $elements = array();
function HTMLDTD_ChildDef_Simple($elements) {
+ if (is_string($elements)) {
+ $elements = str_replace(' ', '', $elements);
+ $elements = explode('|', $elements);
+ }
+ $elements = array_flip($elements);
+ foreach ($elements as $i => $x) $elements[$i] = true;
$this->elements = $elements;
+ $this->gen = new HTML_Generator();
}
}
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
{
function validateChildren($tokens_of_children) {
-
+ // if there are no tokens, delete parent node
+ if (empty($tokens_of_children)) return false;
+
+ // the new set of children
+ $result = array();
+
+ // current depth into the nest
+ $nesting = 0;
+
+ // whether or not we're deleting a node
+ $is_deleting = false;
+
+ // whether or not parsed character data is allowed
+ // this controls whether or not we silently drop a tag
+ // or generate escaped HTML from it
+ $pcdata_allowed = isset($this->elements['#PCDATA']);
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) {
+ $result[] = $token;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ $is_child = ($nesting == 0);
+
+ if (is_a($token, 'MF_StartTag')) {
+ $nesting++;
+ } elseif (is_a($token, 'MF_EndTag')) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $is_deleting = false;
+ if (!isset($this->elements[$token->name])) {
+ $is_deleting = true;
+ if ($pcdata_allowed) {
+ $result[] = new MF_Text($this->gen->generateFromToken($token));
+ }
+ continue;
+ }
+ }
+ if (!$is_deleting) {
+ $result[] = $token;
+ } elseif ($pcdata_allowed) {
+ $result[] = new MF_Text($this->gen->generateFromToken($token));
+ } else {
+ // drop silently
+ }
+ }
+ if (empty($result)) return false;
+ if ($all_whitespace) return false;
+ if ($tokens_of_children == $result) return true;
+ return $result;
}
}
class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple
diff --git a/tests/HTML_Generator.php b/tests/HTML_Generator.php
index 97d77dd2..f867ea22 100644
--- a/tests/HTML_Generator.php
+++ b/tests/HTML_Generator.php
@@ -30,6 +30,9 @@ class Test_HTML_Generator extends UnitTestCase
$inputs[4] = new MF_StartTag('asdf');
$expect[4] = '';
+ $inputs[5] = new MF_EmptyTag('br');
+ $expect[5] = '
';
+
foreach ($inputs as $i => $input) {
$result = $this->gen->generateFromToken($input);
$this->assertEqual($result, $expect[$i]);
diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php
index 2c15fefa..56fe3d85 100644
--- a/tests/PureHTMLDefinition.php
+++ b/tests/PureHTMLDefinition.php
@@ -154,20 +154,26 @@ class Test_PureHTMLDefinition extends UnitTestCase
$inputs[9] = array(
new MF_StartTag('ol')
+
,new MF_StartTag('li')
,new MF_Text('Item 1')
+
,new MF_StartTag('li')
,new MF_Text('Item 2')
+
,new MF_EndTag('ol')
);
$expect[9] = array(
new MF_StartTag('ol')
+
,new MF_StartTag('li')
,new MF_Text('Item 1')
,new MF_EndTag('li')
+
,new MF_StartTag('li')
,new MF_Text('Item 2')
,new MF_EndTag('li')
+
,new MF_EndTag('ol')
);
@@ -181,4 +187,122 @@ class Test_PureHTMLDefinition extends UnitTestCase
}
+class Test_HTMLDTD_ChildDef extends UnitTestCase
+{
+
+ function test_simple() {
+
+ $def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo');
+ $this->assertEqual($def->elements,
+ array(
+ 'foobar' => true
+ ,'bang' => true
+ ,'gizmo' => true
+ ));
+
+ $def = new HTMLDTD_ChildDef_Simple(array('href', 'src'));
+ $this->assertEqual($def->elements,
+ array(
+ 'href' => true
+ ,'src' => true
+ ));
+ }
+
+ function test_required_pcdata_forbidden() {
+
+ $def = new HTMLDTD_ChildDef_Required('dt | dd');
+
+ $inputs[0] = array();
+ $expect[0] = false;
+
+ $inputs[1] = array(
+ new MF_StartTag('dt')
+ ,new MF_Text('Term')
+ ,new MF_EndTag('dt')
+
+ ,new MF_Text('Text in an illegal location')
+
+ ,new MF_StartTag('dd')
+ ,new MF_Text('Definition')
+ ,new MF_EndTag('dd')
+
+ ,new MF_StartTag('b') // test tag removal too
+ ,new MF_EndTag('b')
+ );
+ $expect[1] = array(
+ new MF_StartTag('dt')
+ ,new MF_Text('Term')
+ ,new MF_EndTag('dt')
+
+ ,new MF_StartTag('dd')
+ ,new MF_Text('Definition')
+ ,new MF_EndTag('dd')
+ );
+
+ $inputs[2] = array(new MF_Text('How do you do!'));
+ $expect[2] = false;
+
+ // whitespace shouldn't trigger it
+ $inputs[3] = array(
+ new MF_Text("\n")
+ ,new MF_StartTag('dd')
+ ,new MF_Text('Definition')
+ ,new MF_EndTag('dd')
+ ,new MF_Text(' ')
+ );
+ $expect[3] = true;
+
+ $inputs[4] = array(
+ new MF_StartTag('dd')
+ ,new MF_Text('Definition')
+ ,new MF_EndTag('dd')
+ ,new MF_Text(' ')
+ ,new MF_StartTag('b')
+ ,new MF_EndTag('b')
+ ,new MF_Text(' ')
+ );
+ $expect[4] = array(
+ new MF_StartTag('dd')
+ ,new MF_Text('Definition')
+ ,new MF_EndTag('dd')
+ ,new MF_Text(' ')
+ ,new MF_Text(' ')
+ );
+ $inputs[5] = array(
+ new MF_Text(' ')
+ ,new MF_Text("\t")
+ );
+ $expect[5] = false;
+
+ foreach ($inputs as $i => $input) {
+ $result = $def->validateChildren($input);
+ if (is_bool($expect[$i])) {
+ $this->assertIdentical($expect[$i], $result);
+ } else {
+ $this->assertEqual($expect[$i], $result);
+ paintIf($result, $result != $expect[$i]);
+ }
+ }
+
+ }
+
+ function test_required_pcdata_allowed() {
+ $def = new HTMLDTD_ChildDef_Required('#PCDATA | b');
+ $input = array(
+ new MF_StartTag('b')
+ ,new MF_Text('Bold text')
+ ,new MF_EndTag('b')
+ ,new MF_EmptyTag('img') // illegal tag
+ );
+ $expect = array(
+ new MF_StartTag('b')
+ ,new MF_Text('Bold text')
+ ,new MF_EndTag('b')
+ ,new MF_Text('')
+ );
+ $this->assertEqual($expect, $def->validateChildren($input));
+ }
+
+}
+
?>
\ No newline at end of file