diff --git a/library/HTMLPurifier/ElementDef.php b/library/HTMLPurifier/ElementDef.php index 6af5a09c..ec559a3e 100644 --- a/library/HTMLPurifier/ElementDef.php +++ b/library/HTMLPurifier/ElementDef.php @@ -83,6 +83,14 @@ class HTMLPurifier_ElementDef /** * Lookup table of tags excluded from all descendants of this tag. + * @note SGML permits exclusions for all descendants, but this is + * not possible with DTDs or XML Schemas. W3C has elected to + * use complicated compositions of content_models to simulate + * exclusion for children, but we go the simpler, SGML-style + * route of flat-out exclusions, which correctly apply to + * all descendants and not just children. Note that the XHTML + * Modularization Abstract Modules are blithely unaware of such + * distinctions. * @public */ var $excludes = array(); diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index 4a420a7d..9eb11a76 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -223,6 +223,24 @@ class HTMLPurifier_HTMLModule } $attr[0] = $attr_includes; } + + /** + * Convenience function that generates a lookup table with boolean + * true as value. + * @param $list List of values to turn into a lookup + * @note You can also pass an arbitrary number of arguments in + * place of the regular argument + * @return Lookup array equivalent of list + */ + function makeLookup($list) { + if (is_string($list)) $list = func_get_args(); + $ret = array(); + foreach ($list as $value) { + if (is_null($value)) continue; + $ret[$value] = true; + } + return $ret; + } } ?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php index 64b6e110..125c5113 100644 --- a/library/HTMLPurifier/HTMLModule/Text.php +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -10,65 +10,61 @@ require_once 'HTMLPurifier/HTMLModule.php'; * - Block Structural (div, p) * - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var) * - Inline Structural (br, span) - * We have elected not to follow suite, but this may change. + * This module, functionally, does not distinguish between these + * sub-modules, but the code is internally structured to reflect + * these distinctions. */ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule { var $name = 'Text'; - - var $elements = array('abbr', 'acronym', 'address', 'blockquote', - 'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3', - 'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong', - 'var'); - var $content_sets = array( - 'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6', - 'Block' => 'address | blockquote | div | p | pre', - 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var', 'Flow' => 'Heading | Block | Inline' ); function HTMLPurifier_HTMLModule_Text() { - foreach ($this->elements as $element) { - $this->info[$element] = new HTMLPurifier_ElementDef(); - // attributes - if ($element == 'br') { - $this->info[$element]->attr = array(0 => array('Core')); - } elseif ($element == 'blockquote' || $element == 'q') { - $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI'); - } else { - $this->info[$element]->attr = array(0 => array('Common')); - } - // content models - if ($element == 'br') { - $this->info[$element]->content_model_type = 'empty'; - } elseif ($element == 'blockquote') { - $this->info[$element]->content_model = 'Heading | Block | List'; - $this->info[$element]->content_model_type = 'optional'; - } elseif ($element == 'div') { - $this->info[$element]->content_model = '#PCDATA | Flow'; - $this->info[$element]->content_model_type = 'optional'; - } else { - $this->info[$element]->content_model = '#PCDATA | Inline'; - $this->info[$element]->content_model_type = 'optional'; - } - } - // SGML permits exclusions for all descendants, but this is - // not possible with DTDs or XML Schemas. W3C has elected to - // use complicated compositions of content_models to simulate - // exclusion for children, but we go the simpler, SGML-style - // route of flat-out exclusions. Note that the Abstract Module - // is blithely unaware of such distinctions. - $this->info['pre']->excludes = array_flip(array( - 'img', 'big', 'small', - 'object', 'applet', 'font', 'basefont' // generally not allowed - )); - $this->info['p']->auto_close = array_flip(array( + + // Inline Phrasal ------------------------------------------------- + $this->addElement('abbr', true, 'Inline', 'Inline', 'Common'); + $this->addElement('acronym', true, 'Inline', 'Inline', 'Common'); + $this->addElement('cite', true, 'Inline', 'Inline', 'Common'); + $this->addElement('code', true, 'Inline', 'Inline', 'Common'); + $this->addElement('dfn', true, 'Inline', 'Inline', 'Common'); + $this->addElement('em', true, 'Inline', 'Inline', 'Common'); + $this->addElement('kbd', true, 'Inline', 'Inline', 'Common'); + $this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI')); + $this->addElement('samp', true, 'Inline', 'Inline', 'Common'); + $this->addElement('strong', true, 'Inline', 'Inline', 'Common'); + $this->addElement('var', true, 'Inline', 'Inline', 'Common'); + + // Inline Structural ---------------------------------------------- + $this->addElement('span', true, 'Inline', 'Inline', 'Common'); + $this->addElement('br', true, 'Inline', 'Empty', 'Core'); + + // Block Phrasal -------------------------------------------------- + $this->addElement('address', true, 'Block', 'Inline', 'Common'); + $this->addElement('blockquote', true, 'Block', + 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') ); + $pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common'); + $pre->excludes = $this->makeLookup( + 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' ); + $this->addElement('h1', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h2', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h3', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h4', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h5', true, 'Heading', 'Inline', 'Common'); + $this->addElement('h6', true, 'Heading', 'Inline', 'Common'); + + // Block Structural ----------------------------------------------- + $p =& $this->addElement('p', true, 'Block', 'Inline', 'Common'); + // this seems really ad hoc: implementing some general + // heuristics would probably be better + $p->auto_close = $this->makeLookup( 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre', - 'table', 'ul' - )); + 'table', 'ul' ); + $this->addElement('div', true, 'Block', 'Flow', 'Common'); + } } diff --git a/tests/HTMLPurifier/HTMLModule/BdoTest.php b/tests/HTMLPurifier/HTMLModule/BdoTest.php new file mode 100644 index 00000000..c402fc29 --- /dev/null +++ b/tests/HTMLPurifier/HTMLModule/BdoTest.php @@ -0,0 +1,30 @@ +assertResult( + ' + + #PCDATA Inline + + ', true, array('Attr.EnableID' => true) + ); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/HTMLModuleHarness.php b/tests/HTMLPurifier/HTMLModuleHarness.php new file mode 100644 index 00000000..1f60f435 --- /dev/null +++ b/tests/HTMLPurifier/HTMLModuleHarness.php @@ -0,0 +1,14 @@ +obj = new HTMLPurifier_Strategy_Core(); + } +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/HTMLModuleTest.php b/tests/HTMLPurifier/HTMLModuleTest.php index 3aff9cc3..9e0b7100 100644 --- a/tests/HTMLPurifier/HTMLModuleTest.php +++ b/tests/HTMLPurifier/HTMLModuleTest.php @@ -121,6 +121,30 @@ class HTMLPurifier_HTMLModuleTest extends UnitTestCase } + function test_makeLookup() { + + $module = new HTMLPurifier_HTMLModule(); + + $this->assertIdentical( + $module->makeLookup('foo'), + array('foo' => true) + ); + $this->assertIdentical( + $module->makeLookup(array('foo')), + array('foo' => true) + ); + + $this->assertIdentical( + $module->makeLookup('foo', 'two'), + array('foo' => true, 'two' => true) + ); + $this->assertIdentical( + $module->makeLookup(array('foo', 'two')), + array('foo' => true, 'two' => true) + ); + + } + } ?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php index 6e94e5ab..237105d1 100644 --- a/tests/HTMLPurifier/Harness.php +++ b/tests/HTMLPurifier/Harness.php @@ -73,12 +73,17 @@ class HTMLPurifier_Harness extends UnitTestCase $context->loadArray($context_array); if ($this->to_tokens && is_string($input)) { - $input = $this->lexer->tokenizeHTML($input, $config, $context); + // $func may cause $input to change, so "clone" another copy + // to sacrifice + $input = $this->lexer->tokenizeHTML($s = $input, $config, $context); + $input_c = $this->lexer->tokenizeHTML($s, $config, $context); + } else { + $input_c = $input; } // call the function $func = $this->func; - $result = $this->obj->$func($input, $config, $context); + $result = $this->obj->$func($input_c, $config, $context); // test a bool result if (is_bool($result)) { diff --git a/tests/test_files.php b/tests/test_files.php index 06af15ee..0f989c75 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -62,6 +62,7 @@ $test_files[] = 'EntityParserTest.php'; $test_files[] = 'GeneratorTest.php'; $test_files[] = 'HTMLModuleManagerTest.php'; $test_files[] = 'HTMLModuleTest.php'; +$test_files[] = 'HTMLModule/BdoTest.php'; $test_files[] = 'IDAccumulatorTest.php'; $test_files[] = 'LanguageFactoryTest.php'; $test_files[] = 'LanguageTest.php';