diff --git a/library/HTMLPurifier/ElementDef.php b/library/HTMLPurifier/ElementDef.php
index 6af5a09c..ec559a3e 100644
--- a/library/HTMLPurifier/ElementDef.php
+++ b/library/HTMLPurifier/ElementDef.php
@@ -83,6 +83,14 @@ class HTMLPurifier_ElementDef
/**
* Lookup table of tags excluded from all descendants of this tag.
+ * @note SGML permits exclusions for all descendants, but this is
+ * not possible with DTDs or XML Schemas. W3C has elected to
+ * use complicated compositions of content_models to simulate
+ * exclusion for children, but we go the simpler, SGML-style
+ * route of flat-out exclusions, which correctly apply to
+ * all descendants and not just children. Note that the XHTML
+ * Modularization Abstract Modules are blithely unaware of such
+ * distinctions.
* @public
*/
var $excludes = array();
diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php
index 4a420a7d..9eb11a76 100644
--- a/library/HTMLPurifier/HTMLModule.php
+++ b/library/HTMLPurifier/HTMLModule.php
@@ -223,6 +223,24 @@ class HTMLPurifier_HTMLModule
}
$attr[0] = $attr_includes;
}
+
+ /**
+ * Convenience function that generates a lookup table with boolean
+ * true as value.
+ * @param $list List of values to turn into a lookup
+ * @note You can also pass an arbitrary number of arguments in
+ * place of the regular argument
+ * @return Lookup array equivalent of list
+ */
+ function makeLookup($list) {
+ if (is_string($list)) $list = func_get_args();
+ $ret = array();
+ foreach ($list as $value) {
+ if (is_null($value)) continue;
+ $ret[$value] = true;
+ }
+ return $ret;
+ }
}
?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php
index 64b6e110..125c5113 100644
--- a/library/HTMLPurifier/HTMLModule/Text.php
+++ b/library/HTMLPurifier/HTMLModule/Text.php
@@ -10,65 +10,61 @@ require_once 'HTMLPurifier/HTMLModule.php';
* - Block Structural (div, p)
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
* - Inline Structural (br, span)
- * We have elected not to follow suite, but this may change.
+ * This module, functionally, does not distinguish between these
+ * sub-modules, but the code is internally structured to reflect
+ * these distinctions.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
var $name = 'Text';
-
- var $elements = array('abbr', 'acronym', 'address', 'blockquote',
- 'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
- 'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
- 'var');
-
var $content_sets = array(
- 'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
- 'Block' => 'address | blockquote | div | p | pre',
- 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
- foreach ($this->elements as $element) {
- $this->info[$element] = new HTMLPurifier_ElementDef();
- // attributes
- if ($element == 'br') {
- $this->info[$element]->attr = array(0 => array('Core'));
- } elseif ($element == 'blockquote' || $element == 'q') {
- $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
- } else {
- $this->info[$element]->attr = array(0 => array('Common'));
- }
- // content models
- if ($element == 'br') {
- $this->info[$element]->content_model_type = 'empty';
- } elseif ($element == 'blockquote') {
- $this->info[$element]->content_model = 'Heading | Block | List';
- $this->info[$element]->content_model_type = 'optional';
- } elseif ($element == 'div') {
- $this->info[$element]->content_model = '#PCDATA | Flow';
- $this->info[$element]->content_model_type = 'optional';
- } else {
- $this->info[$element]->content_model = '#PCDATA | Inline';
- $this->info[$element]->content_model_type = 'optional';
- }
- }
- // SGML permits exclusions for all descendants, but this is
- // not possible with DTDs or XML Schemas. W3C has elected to
- // use complicated compositions of content_models to simulate
- // exclusion for children, but we go the simpler, SGML-style
- // route of flat-out exclusions. Note that the Abstract Module
- // is blithely unaware of such distinctions.
- $this->info['pre']->excludes = array_flip(array(
- 'img', 'big', 'small',
- 'object', 'applet', 'font', 'basefont' // generally not allowed
- ));
- $this->info['p']->auto_close = array_flip(array(
+
+ // Inline Phrasal -------------------------------------------------
+ $this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('acronym', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('cite', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('code', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('dfn', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('em', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('kbd', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
+ $this->addElement('samp', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('strong', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('var', true, 'Inline', 'Inline', 'Common');
+
+ // Inline Structural ----------------------------------------------
+ $this->addElement('span', true, 'Inline', 'Inline', 'Common');
+ $this->addElement('br', true, 'Inline', 'Empty', 'Core');
+
+ // Block Phrasal --------------------------------------------------
+ $this->addElement('address', true, 'Block', 'Inline', 'Common');
+ $this->addElement('blockquote', true, 'Block',
+ 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
+ $pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common');
+ $pre->excludes = $this->makeLookup(
+ 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
+ $this->addElement('h1', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h2', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h3', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h4', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h5', true, 'Heading', 'Inline', 'Common');
+ $this->addElement('h6', true, 'Heading', 'Inline', 'Common');
+
+ // Block Structural -----------------------------------------------
+ $p =& $this->addElement('p', true, 'Block', 'Inline', 'Common');
+ // this seems really ad hoc: implementing some general
+ // heuristics would probably be better
+ $p->auto_close = $this->makeLookup(
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
- 'table', 'ul'
- ));
+ 'table', 'ul' );
+ $this->addElement('div', true, 'Block', 'Flow', 'Common');
+
}
}
diff --git a/tests/HTMLPurifier/HTMLModule/BdoTest.php b/tests/HTMLPurifier/HTMLModule/BdoTest.php
new file mode 100644
index 00000000..c402fc29
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLModule/BdoTest.php
@@ -0,0 +1,30 @@
+assertResult(
+ '
+
+ #PCDATA Inline
+
+ ', true, array('Attr.EnableID' => true)
+ );
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/HTMLModuleHarness.php b/tests/HTMLPurifier/HTMLModuleHarness.php
new file mode 100644
index 00000000..1f60f435
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLModuleHarness.php
@@ -0,0 +1,14 @@
+obj = new HTMLPurifier_Strategy_Core();
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/HTMLModuleTest.php b/tests/HTMLPurifier/HTMLModuleTest.php
index 3aff9cc3..9e0b7100 100644
--- a/tests/HTMLPurifier/HTMLModuleTest.php
+++ b/tests/HTMLPurifier/HTMLModuleTest.php
@@ -121,6 +121,30 @@ class HTMLPurifier_HTMLModuleTest extends UnitTestCase
}
+ function test_makeLookup() {
+
+ $module = new HTMLPurifier_HTMLModule();
+
+ $this->assertIdentical(
+ $module->makeLookup('foo'),
+ array('foo' => true)
+ );
+ $this->assertIdentical(
+ $module->makeLookup(array('foo')),
+ array('foo' => true)
+ );
+
+ $this->assertIdentical(
+ $module->makeLookup('foo', 'two'),
+ array('foo' => true, 'two' => true)
+ );
+ $this->assertIdentical(
+ $module->makeLookup(array('foo', 'two')),
+ array('foo' => true, 'two' => true)
+ );
+
+ }
+
}
?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php
index 6e94e5ab..237105d1 100644
--- a/tests/HTMLPurifier/Harness.php
+++ b/tests/HTMLPurifier/Harness.php
@@ -73,12 +73,17 @@ class HTMLPurifier_Harness extends UnitTestCase
$context->loadArray($context_array);
if ($this->to_tokens && is_string($input)) {
- $input = $this->lexer->tokenizeHTML($input, $config, $context);
+ // $func may cause $input to change, so "clone" another copy
+ // to sacrifice
+ $input = $this->lexer->tokenizeHTML($s = $input, $config, $context);
+ $input_c = $this->lexer->tokenizeHTML($s, $config, $context);
+ } else {
+ $input_c = $input;
}
// call the function
$func = $this->func;
- $result = $this->obj->$func($input, $config, $context);
+ $result = $this->obj->$func($input_c, $config, $context);
// test a bool result
if (is_bool($result)) {
diff --git a/tests/test_files.php b/tests/test_files.php
index 06af15ee..0f989c75 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -62,6 +62,7 @@ $test_files[] = 'EntityParserTest.php';
$test_files[] = 'GeneratorTest.php';
$test_files[] = 'HTMLModuleManagerTest.php';
$test_files[] = 'HTMLModuleTest.php';
+$test_files[] = 'HTMLModule/BdoTest.php';
$test_files[] = 'IDAccumulatorTest.php';
$test_files[] = 'LanguageFactoryTest.php';
$test_files[] = 'LanguageTest.php';