0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-10 15:48:42 +00:00

[1.5.0] Massive refactoring for Blockquote and Chameleon to be more extensible and accommodating of XHTMLDefinition.

- Fixed buggy chameleon-support for ins and del
. Removed context variable ParentType, replaced with IsInline, which
  is false when you're not inline and an integer of the parent that
  caused you to become inline when you are (so possibly zero)
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
  and HTMLDefinition->content_sets
. StrictBlockquote now reports what elements its supposed to allow,
  rather than what it does allow
. Removed HTMLDefinition->info_flow_elements in favor of
  HTMLDefinition->content_sets['Flow']
. Removed redundant "exclusionary" definitions from DTD roster
. StrictBlockquote now requires a construction parameter as if it
  were an Required ChildDef, this is the "real" set of allowed elements

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@710 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-04 03:53:57 +00:00
parent 80243f377c
commit 77d9e05a07
11 changed files with 114 additions and 95 deletions

13
NEWS
View File

@ -12,8 +12,21 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.5.0, unknown release date 1.5.0, unknown release date
! Added a rudimentary I18N and L10N system modeled off MediaWiki ! Added a rudimentary I18N and L10N system modeled off MediaWiki
- Allow 'x' subtag in language codes - Allow 'x' subtag in language codes
- Fixed buggy chameleon-support for ins and del
. Added support for IDREF attributes (i.e. for) . Added support for IDREF attributes (i.e. for)
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens . Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
. Removed context variable ParentType, replaced with IsInline, which
is false when you're not inline and an integer of the parent that
caused you to become inline when you are (so possibly zero)
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
and HTMLDefinition->content_sets
. StrictBlockquote now reports what elements its supposed to allow,
rather than what it does allow
. Removed HTMLDefinition->info_flow_elements in favor of
HTMLDefinition->content_sets['Flow']
. Removed redundant "exclusionary" definitions from DTD roster
. StrictBlockquote now requires a construction parameter as if it
were an Required ChildDef, this is the "real" set of allowed elements
1.4.2, unknown release date 1.4.2, unknown release date
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier ! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier

View File

@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
} }
function validateChildren($tokens_of_children, $config, &$context) { function validateChildren($tokens_of_children, $config, &$context) {
$parent_type = $context->get('ParentType'); if ($context->get('IsInline') === false) {
switch ($parent_type) { return $this->block->validateChildren(
case 'unknown': $tokens_of_children, $config, $context);
case 'inline': } else {
$result = $this->inline->validateChildren( return $this->inline->validateChildren(
$tokens_of_children, $config, $context); $tokens_of_children, $config, $context);
break;
case 'block':
$result = $this->block->validateChildren(
$tokens_of_children, $config, $context);
break;
default:
trigger_error('Invalid context', E_USER_ERROR);
return false;
} }
return $result;
} }
} }

View File

@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
/** /**
* Takes the contents of blockquote when in strict and reformats for validation. * Takes the contents of blockquote when in strict and reformats for validation.
*
* From XHTML 1.0 Transitional to Strict, there is a notable change where
*/ */
class HTMLPurifier_ChildDef_StrictBlockquote class HTMLPurifier_ChildDef_StrictBlockquote
extends HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Required
{ {
var $real_elements;
var $fake_elements;
var $allow_empty = true; var $allow_empty = true;
var $type = 'strictblockquote'; var $type = 'strictblockquote';
var $init = false; var $init = false;
function HTMLPurifier_ChildDef_StrictBlockquote() {}
function validateChildren($tokens_of_children, $config, &$context) { function validateChildren($tokens_of_children, $config, &$context) {
$def = $config->getHTMLDefinition(); $def = $config->getHTMLDefinition();
if (!$this->init) { if (!$this->init) {
// allow all inline elements // allow all inline elements
$this->elements = $def->info_flow_elements; $this->real_elements = $this->elements;
$this->elements['#PCDATA'] = true; $this->fake_elements = $def->content_sets['Flow'];
$this->fake_elements['#PCDATA'] = true;
$this->init = true; $this->init = true;
} }
// trick the parent class into thinking it allows more
$this->elements = $this->fake_elements;
$result = parent::validateChildren($tokens_of_children, $config, $context); $result = parent::validateChildren($tokens_of_children, $config, $context);
$this->elements = $this->real_elements;
if ($result === false) return array(); if ($result === false) return array();
if ($result === true) $result = $tokens_of_children; if ($result === true) $result = $tokens_of_children;
@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required
// ifs are nested for readability // ifs are nested for readability
if (!$is_inline) { if (!$is_inline) {
if (!$depth) { if (!$depth) {
if (($token->type == 'text') || if (
($def->info[$token->name]->type == 'inline')) { $token->type == 'text' ||
!isset($this->elements[$token->name])
) {
$is_inline = true; $is_inline = true;
$ret[] = $block_wrap_start; $ret[] = $block_wrap_start;
} }
@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required
if (!$depth) { if (!$depth) {
// starting tokens have been inline text / empty // starting tokens have been inline text / empty
if ($token->type == 'start' || $token->type == 'empty') { if ($token->type == 'start' || $token->type == 'empty') {
if ($def->info[$token->name]->type == 'block') { if (isset($this->elements[$token->name])) {
// ended // ended
$ret[] = $block_wrap_end; $ret[] = $block_wrap_end;
$is_inline = false; $is_inline = false;

View File

@ -163,23 +163,19 @@ class HTMLPurifier_HTMLDefinition
var $info_attr_transform_pre = array(); var $info_attr_transform_pre = array();
/** /**
* List of HTMLPurifier_AttrTransform to be performed after validation/ * List of HTMLPurifier_AttrTransform to be performed after validation.
* @public * @public
*/ */
var $info_attr_transform_post = array(); var $info_attr_transform_post = array();
/**
* Lookup table of flow elements
* @public
*/
var $info_flow_elements = array();
/** /**
* Boolean is a strict definition? * Boolean is a strict definition?
* @public * @public
*/ */
var $strict; var $strict;
var $content_sets = array();
/** /**
* Initializes the definition, the meat of the class. * Initializes the definition, the meat of the class.
*/ */
@ -258,11 +254,6 @@ class HTMLPurifier_HTMLDefinition
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block); $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
$e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc"; $e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc";
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow); $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
" | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic | $e_inline_forms".
" | $e_misc_inline");
$e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused $e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused
$e_form_button_content = new HTMLPurifier_ChildDef_Optional( $e_form_button_content = new HTMLPurifier_ChildDef_Optional(
"#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |". "#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |".
@ -278,7 +269,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['div']->child = $e_Flow; $this->info['div']->child = $e_Flow;
if ($this->strict) { if ($this->strict) {
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote(); $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote($e_block);
} else { } else {
$this->info['blockquote']->child = $e_Flow; $this->info['blockquote']->child = $e_Flow;
} }
@ -337,9 +328,9 @@ class HTMLPurifier_HTMLDefinition
$this->info['br']->child = $this->info['br']->child =
$this->info['hr']->child = new HTMLPurifier_ChildDef_Empty(); $this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
$this->info['pre']->child = $e_pre_content; // exclusionary
$this->info['pre']->child = $e_Inline;
$this->info['a']->child = $e_a_content; $this->info['a']->child = $e_Inline;
$this->info['table']->child = new HTMLPurifier_ChildDef_Table(); $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
@ -355,27 +346,16 @@ class HTMLPurifier_HTMLDefinition
$this->info['td']->child = $e_Flow; $this->info['td']->child = $e_Flow;
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info[]->type : defines the type of the element (block or inline) // misc compat stuff with XHTMLDefinition
// unknown until proven inline/block foreach ($this->info as $key => $def) {
foreach ($this->info as $i => $x) { if ($this->info[$key]->child == $e_Inline) {
$this->info[$i]->type = 'unknown'; $this->info[$key]->descendants_are_inline = true;
} }
// reuses $e_Inline and $e_Block
foreach ($e_Inline->elements as $name => $bool) {
if ($name == '#PCDATA') continue;
if (!isset($this->info[$name])) continue;
$this->info[$name]->type = 'inline';
}
foreach ($e_Block->elements as $name => $bool) {
if (!isset($this->info[$name])) continue;
$this->info[$name]->type = 'block';
} }
foreach ($e_Flow->elements as $name => $bool) { foreach ($e_Flow->elements as $name => $bool) {
$this->info_flow_elements[$name] = true; $this->content_sets['Flow'][$name] = true;
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@ -649,11 +629,7 @@ class HTMLPurifier_ElementDef
var $content_model; var $content_model;
var $content_model_type; var $content_model_type;
/** var $descendants_are_inline;
* Type of the tag: inline or block or unknown?
* @public
*/
var $type;
/** /**
* Lookup table of tags excluded from all descendants of this tag. * Lookup table of tags excluded from all descendants of this tag.
@ -663,4 +639,4 @@ class HTMLPurifier_ElementDef
} }
?> ?>

View File

@ -37,6 +37,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
if ($element == 'br') { if ($element == 'br') {
$this->info[$element]->content_model_type = 'empty'; $this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') { } elseif ($element == 'blockquote') {
$this->info[$element]->content_model = 'Heading | Block | List';
$this->info[$element]->content_model_type = 'strictblockquote'; $this->info[$element]->content_model_type = 'strictblockquote';
} elseif ($element == 'div') { } elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow'; $this->info[$element]->content_model = '#PCDATA | Flow';

View File

@ -16,8 +16,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$this->config =& $config; $this->config =& $config;
if (isset($_GET['x'])) { // hidden settings if (isset($_GET['x'])) { // hidden settings
$this->def = new HTMLPurifier_XHTMLDefinition(); $this->def = new HTMLPurifier_XHTMLDefinition($config);
$this->def->initialize($config);
$this->def->setup($config); $this->def->setup($config);
} else { } else {
$this->def = $config->getHTMLDefinition(); $this->def = $config->getHTMLDefinition();
@ -88,8 +87,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
$ret .= $this->end('tr'); $ret .= $this->end('tr');
$ret .= $this->start('tr'); $ret .= $this->start('tr');
$ret .= $this->element('th', 'Type'); $ret .= $this->element('th', 'Inline content');
$ret .= $this->element('td', ucfirst($def->type)); $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
$ret .= $this->end('tr'); $ret .= $this->end('tr');
if (!empty($def->excludes)) { if (!empty($def->excludes)) {
$ret .= $this->start('tr'); $ret .= $this->start('tr');

View File

@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
$tokens[] = new HTMLPurifier_Token_End($parent_name); $tokens[] = new HTMLPurifier_Token_End($parent_name);
// setup the context variables // setup the context variables
$parent_type = 'unknown'; // reference var that we alter $is_inline = false; // reference var that we alter
$context->register('ParentType', $parent_type); $context->register('IsInline', $is_inline);
//####################################################################// //####################################################################//
// Loop initialization // Loop initialization
@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
} }
// calculate context // calculate context
if (isset($parent_def)) { if ($is_inline === false) {
$parent_type = $parent_def->type; // check if conditions make it inline
if (!empty($parent_def) && $parent_def->descendants_are_inline) {
$is_inline = $count - 1;
}
} else { } else {
// generally found in specialized elements like UL // check if we're out of inline
$parent_type = 'unknown'; if ($count === $is_inline) {
$is_inline = false;
}
} }
//################################################################// //################################################################//
@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_pop($tokens); array_pop($tokens);
// remove context variables // remove context variables
$context->destroy('ParentType'); $context->destroy('IsInline');
//####################################################################// //####################################################################//
// Return // Return

View File

@ -19,8 +19,9 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
var $modules = array(); var $modules = array();
var $attr_types; var $attr_types;
var $attr_collection; var $attr_collection;
var $content_sets;
function initialize($config) { function HTMLPurifier_XHTMLDefinition($config) {
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
@ -48,6 +49,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
} }
} }
} }
// perform content_set expansions // perform content_set expansions
foreach ($content_sets as $i => $set) { foreach ($content_sets as $i => $set) {
// only performed once, so infinite recursion is not // only performed once, so infinite recursion is not
@ -59,41 +61,48 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
array_values($content_sets), array_values($content_sets),
$set); $set);
} }
// define convenient variables
$content_sets_keys = array_keys($content_sets); $content_sets_keys = array_keys($content_sets);
$content_sets_values = array_values($content_sets); $content_sets_values = array_values($content_sets);
foreach ($content_sets as $name => $set) {
$this->content_sets[$name] = $this->convertToLookup($set);
}
foreach ($this->modules as $module_i => $module) { foreach ($this->modules as $module_i => $module) {
foreach ($module->info as $element_i => $element) { foreach ($module->info as $name => $def) {
$element =& $this->modules[$module_i]->info[$element_i]; $def =& $this->modules[$module_i]->info[$name];
// attribute value expansions // attribute value expansions
$this->attr_collection->performInclusions($element->attr); $this->attr_collection->performInclusions($def->attr);
$this->attr_collection->expandStringIdentifiers( $this->attr_collection->expandStringIdentifiers(
$element->attr, $this->attr_types); $def->attr, $this->attr_types);
// perform content model expansions // perform content model expansions
$content_model = $element->content_model; $content_model = $def->content_model;
if (is_string($content_model)) { if (is_string($content_model)) {
$element->content_model = str_replace( if (strpos($content_model, 'Inline') !== false) {
$def->descendants_are_inline = true;
}
$def->content_model = str_replace(
$content_sets_keys, $content_sets_values, $content_model); $content_sets_keys, $content_sets_values, $content_model);
} }
// get child def from content model // get child def from content model
$element->child = $this->getChildDef($element); $def->child = $this->getChildDef($def);
// setup info // setup info
$this->info[$element_i] = $element; $this->info[$name] = $def;
if ($this->info_parent == $element_i) { if ($this->info_parent == $name) {
$this->info_parent_def = $this->info[$element_i]; $this->info_parent_def = $this->info[$name];
} }
} }
} }
} }
function getChildDef($element) { function getChildDef($def) {
$value = $element->content_model; $value = $def->content_model;
$type = $element->content_model_type; $type = $def->content_model_type;
switch ($type) { switch ($type) {
case 'required': case 'required':
return new HTMLPurifier_ChildDef_Required($value); return new HTMLPurifier_ChildDef_Required($value);
@ -102,7 +111,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
case 'empty': case 'empty':
return new HTMLPurifier_ChildDef_Empty(); return new HTMLPurifier_ChildDef_Empty();
case 'strictblockquote': case 'strictblockquote':
return new HTMLPurifier_ChildDef_StrictBlockquote(); return new HTMLPurifier_ChildDef_StrictBlockquote($value);
case 'table': case 'table':
return new HTMLPurifier_ChildDef_Table(); return new HTMLPurifier_ChildDef_Table();
case 'chameleon': case 'chameleon':
@ -114,6 +123,14 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
return HTMLPurifier_ChildDef_Empty(); return HTMLPurifier_ChildDef_Empty();
} }
function convertToLookup($string) {
$array = explode('|', str_replace(' ', '', $string));
foreach ($array as $i => $k) {
$array[$i] = true;
}
return $array;
}
} }
?> ?>

View File

@ -15,17 +15,17 @@ class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
$this->assertResult( $this->assertResult(
'<b>Allowed.</b>', true, '<b>Allowed.</b>', true,
array(), array('ParentType' => 'inline') array(), array('IsInline' => true)
); );
$this->assertResult( $this->assertResult(
'<div>Not allowed.</div>', '', '<div>Not allowed.</div>', '',
array(), array('ParentType' => 'inline') array(), array('IsInline' => true)
); );
$this->assertResult( $this->assertResult(
'<div>Allowed.</div>', true, '<div>Allowed.</div>', true,
array(), array('ParentType' => 'block') array(), array('IsInline' => false)
); );
} }

View File

@ -9,7 +9,7 @@ extends HTMLPurifier_ChildDefHarness
function test() { function test() {
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote(); $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
$this->assertResult(''); $this->assertResult('');
$this->assertResult('<p>Valid</p>'); $this->assertResult('<p>Valid</p>');

View File

@ -70,19 +70,30 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
'<span><ins>Not allowed!</ins></span>' '<span><ins>Not allowed!</ins></span>'
); );
// block in inline ins not allowed $this->assertResult( // alt config
$this->assertResult(
'<span><ins><div>Not allowed!</div></ins></span>', '<span><ins><div>Not allowed!</div></ins></span>',
'<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>', '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>',
array('Core.EscapeInvalidChildren' => true) array('Core.EscapeInvalidChildren' => true)
); );
// test block element that has inline content
$this->assertResult(
'<h1><ins><div>Not allowed!</div></ins></h1>',
'<h1><ins>Not allowed!</ins></h1>'
);
// test exclusions // test exclusions
$this->assertResult( $this->assertResult(
'<a><span><a>Not allowed</a></span></a>', '<a><span><a>Not allowed</a></span></a>',
'<a><span></span></a>' '<a><span></span></a>'
); );
// stacked ins/del
$this->assertResult(
'<h1><ins><del><div>Not allowed!</div></del></ins></h1>',
'<h1><ins><del>Not allowed!</del></ins></h1>'
);
// test inline parent // test inline parent
$this->assertResult( $this->assertResult(
'<b>Bold</b>', true, array('HTML.Parent' => 'span') '<b>Bold</b>', true, array('HTML.Parent' => 'span')