diff --git a/HTML_Generator.php b/HTML_Generator.php index b8aeef23..9726267d 100644 --- a/HTML_Generator.php +++ b/HTML_Generator.php @@ -21,7 +21,7 @@ class HTML_Generator } elseif (is_a($token, 'MF_EmptyTag')) { $attr = $this->generateAttributes($token->attributes); - return '<' . $token->name . ' ' . $attr . ' />'; + return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />'; } elseif (is_a($token, 'MF_Text')) { return htmlentities($token->data, ENT_COMPAT, 'UTF-8'); diff --git a/MarkupFragment.php b/MarkupFragment.php index 3d99d4dd..f94ecb27 100644 --- a/MarkupFragment.php +++ b/MarkupFragment.php @@ -31,9 +31,10 @@ class MF_Text extends MF { var $name = '#PCDATA'; var $data; + var $is_whitespace = false; function MF_Text($data) { - $this->data = trim($data); // fairly certain trimming it's okay - // but it's not default SAX behavior + $this->data = $data; + if (trim($data, " \n\r\t") === '') $this->is_whitespace = true; } function append($mf_text) { return new MF_Text($this->data . $mf_text->data); diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php index 80bb530e..1e189550 100644 --- a/PureHTMLDefinition.php +++ b/PureHTMLDefinition.php @@ -342,6 +342,10 @@ class HTMLDTD_Element } +// HTMLDTD_ChildDef and inheritance have three types of output: +// true = leave nodes as is +// false = delete parent node and all children +// array(...) = replace children nodes with these class HTMLDTD_ChildDef { var $dtd_regex; @@ -354,13 +358,76 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef { var $elements = array(); function HTMLDTD_ChildDef_Simple($elements) { + if (is_string($elements)) { + $elements = str_replace(' ', '', $elements); + $elements = explode('|', $elements); + } + $elements = array_flip($elements); + foreach ($elements as $i => $x) $elements[$i] = true; $this->elements = $elements; + $this->gen = new HTML_Generator(); } } class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple { function validateChildren($tokens_of_children) { - + // if there are no tokens, delete parent node + if (empty($tokens_of_children)) return false; + + // the new set of children + $result = array(); + + // current depth into the nest + $nesting = 0; + + // whether or not we're deleting a node + $is_deleting = false; + + // whether or not parsed character data is allowed + // this controls whether or not we silently drop a tag + // or generate escaped HTML from it + $pcdata_allowed = isset($this->elements['#PCDATA']); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) { + $result[] = $token; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + $is_child = ($nesting == 0); + + if (is_a($token, 'MF_StartTag')) { + $nesting++; + } elseif (is_a($token, 'MF_EndTag')) { + $nesting--; + } + + if ($is_child) { + $is_deleting = false; + if (!isset($this->elements[$token->name])) { + $is_deleting = true; + if ($pcdata_allowed) { + $result[] = new MF_Text($this->gen->generateFromToken($token)); + } + continue; + } + } + if (!$is_deleting) { + $result[] = $token; + } elseif ($pcdata_allowed) { + $result[] = new MF_Text($this->gen->generateFromToken($token)); + } else { + // drop silently + } + } + if (empty($result)) return false; + if ($all_whitespace) return false; + if ($tokens_of_children == $result) return true; + return $result; } } class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple diff --git a/tests/HTML_Generator.php b/tests/HTML_Generator.php index 97d77dd2..f867ea22 100644 --- a/tests/HTML_Generator.php +++ b/tests/HTML_Generator.php @@ -30,6 +30,9 @@ class Test_HTML_Generator extends UnitTestCase $inputs[4] = new MF_StartTag('asdf'); $expect[4] = ''; + $inputs[5] = new MF_EmptyTag('br'); + $expect[5] = '
'; + foreach ($inputs as $i => $input) { $result = $this->gen->generateFromToken($input); $this->assertEqual($result, $expect[$i]); diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php index 2c15fefa..56fe3d85 100644 --- a/tests/PureHTMLDefinition.php +++ b/tests/PureHTMLDefinition.php @@ -154,20 +154,26 @@ class Test_PureHTMLDefinition extends UnitTestCase $inputs[9] = array( new MF_StartTag('ol') + ,new MF_StartTag('li') ,new MF_Text('Item 1') + ,new MF_StartTag('li') ,new MF_Text('Item 2') + ,new MF_EndTag('ol') ); $expect[9] = array( new MF_StartTag('ol') + ,new MF_StartTag('li') ,new MF_Text('Item 1') ,new MF_EndTag('li') + ,new MF_StartTag('li') ,new MF_Text('Item 2') ,new MF_EndTag('li') + ,new MF_EndTag('ol') ); @@ -181,4 +187,122 @@ class Test_PureHTMLDefinition extends UnitTestCase } +class Test_HTMLDTD_ChildDef extends UnitTestCase +{ + + function test_simple() { + + $def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo'); + $this->assertEqual($def->elements, + array( + 'foobar' => true + ,'bang' => true + ,'gizmo' => true + )); + + $def = new HTMLDTD_ChildDef_Simple(array('href', 'src')); + $this->assertEqual($def->elements, + array( + 'href' => true + ,'src' => true + )); + } + + function test_required_pcdata_forbidden() { + + $def = new HTMLDTD_ChildDef_Required('dt | dd'); + + $inputs[0] = array(); + $expect[0] = false; + + $inputs[1] = array( + new MF_StartTag('dt') + ,new MF_Text('Term') + ,new MF_EndTag('dt') + + ,new MF_Text('Text in an illegal location') + + ,new MF_StartTag('dd') + ,new MF_Text('Definition') + ,new MF_EndTag('dd') + + ,new MF_StartTag('b') // test tag removal too + ,new MF_EndTag('b') + ); + $expect[1] = array( + new MF_StartTag('dt') + ,new MF_Text('Term') + ,new MF_EndTag('dt') + + ,new MF_StartTag('dd') + ,new MF_Text('Definition') + ,new MF_EndTag('dd') + ); + + $inputs[2] = array(new MF_Text('How do you do!')); + $expect[2] = false; + + // whitespace shouldn't trigger it + $inputs[3] = array( + new MF_Text("\n") + ,new MF_StartTag('dd') + ,new MF_Text('Definition') + ,new MF_EndTag('dd') + ,new MF_Text(' ') + ); + $expect[3] = true; + + $inputs[4] = array( + new MF_StartTag('dd') + ,new MF_Text('Definition') + ,new MF_EndTag('dd') + ,new MF_Text(' ') + ,new MF_StartTag('b') + ,new MF_EndTag('b') + ,new MF_Text(' ') + ); + $expect[4] = array( + new MF_StartTag('dd') + ,new MF_Text('Definition') + ,new MF_EndTag('dd') + ,new MF_Text(' ') + ,new MF_Text(' ') + ); + $inputs[5] = array( + new MF_Text(' ') + ,new MF_Text("\t") + ); + $expect[5] = false; + + foreach ($inputs as $i => $input) { + $result = $def->validateChildren($input); + if (is_bool($expect[$i])) { + $this->assertIdentical($expect[$i], $result); + } else { + $this->assertEqual($expect[$i], $result); + paintIf($result, $result != $expect[$i]); + } + } + + } + + function test_required_pcdata_allowed() { + $def = new HTMLDTD_ChildDef_Required('#PCDATA | b'); + $input = array( + new MF_StartTag('b') + ,new MF_Text('Bold text') + ,new MF_EndTag('b') + ,new MF_EmptyTag('img') // illegal tag + ); + $expect = array( + new MF_StartTag('b') + ,new MF_Text('Bold text') + ,new MF_EndTag('b') + ,new MF_Text('') + ); + $this->assertEqual($expect, $def->validateChildren($input)); + } + +} + ?> \ No newline at end of file