diff --git a/NEWS b/NEWS index 63ee7bf8..ae82e27f 100644 --- a/NEWS +++ b/NEWS @@ -12,8 +12,21 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.5.0, unknown release date ! Added a rudimentary I18N and L10N system modeled off MediaWiki - Allow 'x' subtag in language codes +- Fixed buggy chameleon-support for ins and del . Added support for IDREF attributes (i.e. for) . Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens +. Removed context variable ParentType, replaced with IsInline, which + is false when you're not inline and an integer of the parent that + caused you to become inline when you are (so possibly zero) +. Removed ElementDef->type in favor of ElementDef->descendants_are_inline + and HTMLDefinition->content_sets +. StrictBlockquote now reports what elements its supposed to allow, + rather than what it does allow +. Removed HTMLDefinition->info_flow_elements in favor of + HTMLDefinition->content_sets['Flow'] +. Removed redundant "exclusionary" definitions from DTD roster +. StrictBlockquote now requires a construction parameter as if it + were an Required ChildDef, this is the "real" set of allowed elements 1.4.2, unknown release date ! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier diff --git a/library/HTMLPurifier/ChildDef/Chameleon.php b/library/HTMLPurifier/ChildDef/Chameleon.php index feb84a15..afe0299f 100644 --- a/library/HTMLPurifier/ChildDef/Chameleon.php +++ b/library/HTMLPurifier/ChildDef/Chameleon.php @@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef } function validateChildren($tokens_of_children, $config, &$context) { - $parent_type = $context->get('ParentType'); - switch ($parent_type) { - case 'unknown': - case 'inline': - $result = $this->inline->validateChildren( - $tokens_of_children, $config, $context); - break; - case 'block': - $result = $this->block->validateChildren( - $tokens_of_children, $config, $context); - break; - default: - trigger_error('Invalid context', E_USER_ERROR); - return false; + if ($context->get('IsInline') === false) { + return $this->block->validateChildren( + $tokens_of_children, $config, $context); + } else { + return $this->inline->validateChildren( + $tokens_of_children, $config, $context); } - return $result; } } diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php index 980acac3..0352e488 100644 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php'; /** * Takes the contents of blockquote when in strict and reformats for validation. - * - * From XHTML 1.0 Transitional to Strict, there is a notable change where */ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required { + var $real_elements; + var $fake_elements; var $allow_empty = true; var $type = 'strictblockquote'; var $init = false; - function HTMLPurifier_ChildDef_StrictBlockquote() {} function validateChildren($tokens_of_children, $config, &$context) { $def = $config->getHTMLDefinition(); if (!$this->init) { // allow all inline elements - $this->elements = $def->info_flow_elements; - $this->elements['#PCDATA'] = true; + $this->real_elements = $this->elements; + $this->fake_elements = $def->content_sets['Flow']; + $this->fake_elements['#PCDATA'] = true; $this->init = true; } + // trick the parent class into thinking it allows more + $this->elements = $this->fake_elements; $result = parent::validateChildren($tokens_of_children, $config, $context); + $this->elements = $this->real_elements; + if ($result === false) return array(); if ($result === true) $result = $tokens_of_children; @@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required // ifs are nested for readability if (!$is_inline) { if (!$depth) { - if (($token->type == 'text') || - ($def->info[$token->name]->type == 'inline')) { + if ( + $token->type == 'text' || + !isset($this->elements[$token->name]) + ) { $is_inline = true; $ret[] = $block_wrap_start; } @@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required if (!$depth) { // starting tokens have been inline text / empty if ($token->type == 'start' || $token->type == 'empty') { - if ($def->info[$token->name]->type == 'block') { + if (isset($this->elements[$token->name])) { // ended $ret[] = $block_wrap_end; $is_inline = false; diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 00bcface..ea2582c2 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -163,23 +163,19 @@ class HTMLPurifier_HTMLDefinition var $info_attr_transform_pre = array(); /** - * List of HTMLPurifier_AttrTransform to be performed after validation/ + * List of HTMLPurifier_AttrTransform to be performed after validation. * @public */ var $info_attr_transform_post = array(); - /** - * Lookup table of flow elements - * @public - */ - var $info_flow_elements = array(); - /** * Boolean is a strict definition? * @public */ var $strict; + var $content_sets = array(); + /** * Initializes the definition, the meat of the class. */ @@ -258,11 +254,6 @@ class HTMLPurifier_HTMLDefinition $e_Block = new HTMLPurifier_ChildDef_Optional($e_block); $e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc"; $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow); - $e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA". - " | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline"); - $e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a". - " | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic | $e_inline_forms". - " | $e_misc_inline"); $e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused $e_form_button_content = new HTMLPurifier_ChildDef_Optional( "#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |". @@ -278,7 +269,7 @@ class HTMLPurifier_HTMLDefinition $this->info['div']->child = $e_Flow; if ($this->strict) { - $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote(); + $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote($e_block); } else { $this->info['blockquote']->child = $e_Flow; } @@ -337,9 +328,9 @@ class HTMLPurifier_HTMLDefinition $this->info['br']->child = $this->info['hr']->child = new HTMLPurifier_ChildDef_Empty(); - $this->info['pre']->child = $e_pre_content; - - $this->info['a']->child = $e_a_content; + // exclusionary + $this->info['pre']->child = $e_Inline; + $this->info['a']->child = $e_Inline; $this->info['table']->child = new HTMLPurifier_ChildDef_Table(); @@ -355,27 +346,16 @@ class HTMLPurifier_HTMLDefinition $this->info['td']->child = $e_Flow; ////////////////////////////////////////////////////////////////////// - // info[]->type : defines the type of the element (block or inline) + // misc compat stuff with XHTMLDefinition - // unknown until proven inline/block - foreach ($this->info as $i => $x) { - $this->info[$i]->type = 'unknown'; - } - - // reuses $e_Inline and $e_Block - foreach ($e_Inline->elements as $name => $bool) { - if ($name == '#PCDATA') continue; - if (!isset($this->info[$name])) continue; - $this->info[$name]->type = 'inline'; - } - - foreach ($e_Block->elements as $name => $bool) { - if (!isset($this->info[$name])) continue; - $this->info[$name]->type = 'block'; + foreach ($this->info as $key => $def) { + if ($this->info[$key]->child == $e_Inline) { + $this->info[$key]->descendants_are_inline = true; + } } foreach ($e_Flow->elements as $name => $bool) { - $this->info_flow_elements[$name] = true; + $this->content_sets['Flow'][$name] = true; } ////////////////////////////////////////////////////////////////////// @@ -649,11 +629,7 @@ class HTMLPurifier_ElementDef var $content_model; var $content_model_type; - /** - * Type of the tag: inline or block or unknown? - * @public - */ - var $type; + var $descendants_are_inline; /** * Lookup table of tags excluded from all descendants of this tag. @@ -663,4 +639,4 @@ class HTMLPurifier_ElementDef } -?> \ No newline at end of file +?> diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php index 12c91072..35c9d8ed 100644 --- a/library/HTMLPurifier/HTMLModule/Text.php +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -37,6 +37,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule if ($element == 'br') { $this->info[$element]->content_model_type = 'empty'; } elseif ($element == 'blockquote') { + $this->info[$element]->content_model = 'Heading | Block | List'; $this->info[$element]->content_model_type = 'strictblockquote'; } elseif ($element == 'div') { $this->info[$element]->content_model = '#PCDATA | Flow'; diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php index 20671912..c7314bcd 100644 --- a/library/HTMLPurifier/Printer/HTMLDefinition.php +++ b/library/HTMLPurifier/Printer/HTMLDefinition.php @@ -16,8 +16,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $this->config =& $config; if (isset($_GET['x'])) { // hidden settings - $this->def = new HTMLPurifier_XHTMLDefinition(); - $this->def->initialize($config); + $this->def = new HTMLPurifier_XHTMLDefinition($config); $this->def->setup($config); } else { $this->def = $config->getHTMLDefinition(); @@ -88,8 +87,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); $ret .= $this->end('tr'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Type'); - $ret .= $this->element('td', ucfirst($def->type)); + $ret .= $this->element('th', 'Inline content'); + $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); $ret .= $this->end('tr'); if (!empty($def->excludes)) { $ret .= $this->start('tr'); diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index dd5a920f..08f90756 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $tokens[] = new HTMLPurifier_Token_End($parent_name); // setup the context variables - $parent_type = 'unknown'; // reference var that we alter - $context->register('ParentType', $parent_type); + $is_inline = false; // reference var that we alter + $context->register('IsInline', $is_inline); //####################################################################// // Loop initialization @@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy } // calculate context - if (isset($parent_def)) { - $parent_type = $parent_def->type; + if ($is_inline === false) { + // check if conditions make it inline + if (!empty($parent_def) && $parent_def->descendants_are_inline) { + $is_inline = $count - 1; + } } else { - // generally found in specialized elements like UL - $parent_type = 'unknown'; + // check if we're out of inline + if ($count === $is_inline) { + $is_inline = false; + } } //################################################################// @@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy array_pop($tokens); // remove context variables - $context->destroy('ParentType'); + $context->destroy('IsInline'); //####################################################################// // Return diff --git a/library/HTMLPurifier/XHTMLDefinition.php b/library/HTMLPurifier/XHTMLDefinition.php index bc35bf0e..38efb64c 100644 --- a/library/HTMLPurifier/XHTMLDefinition.php +++ b/library/HTMLPurifier/XHTMLDefinition.php @@ -19,8 +19,9 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition var $modules = array(); var $attr_types; var $attr_collection; + var $content_sets; - function initialize($config) { + function HTMLPurifier_XHTMLDefinition($config) { $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); @@ -48,6 +49,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition } } } + // perform content_set expansions foreach ($content_sets as $i => $set) { // only performed once, so infinite recursion is not @@ -59,41 +61,48 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition array_values($content_sets), $set); } + // define convenient variables $content_sets_keys = array_keys($content_sets); $content_sets_values = array_values($content_sets); + foreach ($content_sets as $name => $set) { + $this->content_sets[$name] = $this->convertToLookup($set); + } foreach ($this->modules as $module_i => $module) { - foreach ($module->info as $element_i => $element) { - $element =& $this->modules[$module_i]->info[$element_i]; + foreach ($module->info as $name => $def) { + $def =& $this->modules[$module_i]->info[$name]; // attribute value expansions - $this->attr_collection->performInclusions($element->attr); + $this->attr_collection->performInclusions($def->attr); $this->attr_collection->expandStringIdentifiers( - $element->attr, $this->attr_types); + $def->attr, $this->attr_types); // perform content model expansions - $content_model = $element->content_model; + $content_model = $def->content_model; if (is_string($content_model)) { - $element->content_model = str_replace( + if (strpos($content_model, 'Inline') !== false) { + $def->descendants_are_inline = true; + } + $def->content_model = str_replace( $content_sets_keys, $content_sets_values, $content_model); } // get child def from content model - $element->child = $this->getChildDef($element); + $def->child = $this->getChildDef($def); // setup info - $this->info[$element_i] = $element; - if ($this->info_parent == $element_i) { - $this->info_parent_def = $this->info[$element_i]; + $this->info[$name] = $def; + if ($this->info_parent == $name) { + $this->info_parent_def = $this->info[$name]; } } } } - function getChildDef($element) { - $value = $element->content_model; - $type = $element->content_model_type; + function getChildDef($def) { + $value = $def->content_model; + $type = $def->content_model_type; switch ($type) { case 'required': return new HTMLPurifier_ChildDef_Required($value); @@ -102,7 +111,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition case 'empty': return new HTMLPurifier_ChildDef_Empty(); case 'strictblockquote': - return new HTMLPurifier_ChildDef_StrictBlockquote(); + return new HTMLPurifier_ChildDef_StrictBlockquote($value); case 'table': return new HTMLPurifier_ChildDef_Table(); case 'chameleon': @@ -114,6 +123,14 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition return HTMLPurifier_ChildDef_Empty(); } + function convertToLookup($string) { + $array = explode('|', str_replace(' ', '', $string)); + foreach ($array as $i => $k) { + $array[$i] = true; + } + return $array; + } + } -?> \ No newline at end of file +?> diff --git a/tests/HTMLPurifier/ChildDef/ChameleonTest.php b/tests/HTMLPurifier/ChildDef/ChameleonTest.php index b4181196..529d9193 100644 --- a/tests/HTMLPurifier/ChildDef/ChameleonTest.php +++ b/tests/HTMLPurifier/ChildDef/ChameleonTest.php @@ -15,17 +15,17 @@ class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness $this->assertResult( 'Allowed.', true, - array(), array('ParentType' => 'inline') + array(), array('IsInline' => true) ); $this->assertResult( '
Not allowed.
', '', - array(), array('ParentType' => 'inline') + array(), array('IsInline' => true) ); $this->assertResult( '
Allowed.
', true, - array(), array('ParentType' => 'block') + array(), array('IsInline' => false) ); } diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php index 27aacc81..56405e91 100644 --- a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php +++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php @@ -9,7 +9,7 @@ extends HTMLPurifier_ChildDefHarness function test() { - $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote(); + $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p'); $this->assertResult(''); $this->assertResult('

Valid

'); diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index 38bd996b..55fc108b 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -70,19 +70,30 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness 'Not allowed!' ); - // block in inline ins not allowed - $this->assertResult( + $this->assertResult( // alt config '
Not allowed!
', '<div>Not allowed!</div>', array('Core.EscapeInvalidChildren' => true) ); + // test block element that has inline content + $this->assertResult( + '

Not allowed!

', + '

Not allowed!

' + ); + // test exclusions $this->assertResult( 'Not allowed', '' ); + // stacked ins/del + $this->assertResult( + '

Not allowed!

', + '

Not allowed!

' + ); + // test inline parent $this->assertResult( 'Bold', true, array('HTML.Parent' => 'span')