mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 13:21:51 +00:00
[1.5.0] Massive refactoring for Blockquote and Chameleon to be more extensible and accommodating of XHTMLDefinition.
- Fixed buggy chameleon-support for ins and del . Removed context variable ParentType, replaced with IsInline, which is false when you're not inline and an integer of the parent that caused you to become inline when you are (so possibly zero) . Removed ElementDef->type in favor of ElementDef->descendants_are_inline and HTMLDefinition->content_sets . StrictBlockquote now reports what elements its supposed to allow, rather than what it does allow . Removed HTMLDefinition->info_flow_elements in favor of HTMLDefinition->content_sets['Flow'] . Removed redundant "exclusionary" definitions from DTD roster . StrictBlockquote now requires a construction parameter as if it were an Required ChildDef, this is the "real" set of allowed elements git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@710 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
80243f377c
commit
77d9e05a07
13
NEWS
13
NEWS
@ -12,8 +12,21 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
1.5.0, unknown release date
|
||||
! Added a rudimentary I18N and L10N system modeled off MediaWiki
|
||||
- Allow 'x' subtag in language codes
|
||||
- Fixed buggy chameleon-support for ins and del
|
||||
. Added support for IDREF attributes (i.e. for)
|
||||
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
|
||||
. Removed context variable ParentType, replaced with IsInline, which
|
||||
is false when you're not inline and an integer of the parent that
|
||||
caused you to become inline when you are (so possibly zero)
|
||||
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
|
||||
and HTMLDefinition->content_sets
|
||||
. StrictBlockquote now reports what elements its supposed to allow,
|
||||
rather than what it does allow
|
||||
. Removed HTMLDefinition->info_flow_elements in favor of
|
||||
HTMLDefinition->content_sets['Flow']
|
||||
. Removed redundant "exclusionary" definitions from DTD roster
|
||||
. StrictBlockquote now requires a construction parameter as if it
|
||||
were an Required ChildDef, this is the "real" set of allowed elements
|
||||
|
||||
1.4.2, unknown release date
|
||||
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
|
||||
|
@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
case 'block':
|
||||
$result = $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
default:
|
||||
trigger_error('Invalid context', E_USER_ERROR);
|
||||
return false;
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
} else {
|
||||
return $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*
|
||||
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $real_elements;
|
||||
var $fake_elements;
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->elements = $def->info_flow_elements;
|
||||
$this->elements['#PCDATA'] = true;
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
$this->elements = $this->real_elements;
|
||||
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (($token->type == 'text') ||
|
||||
($def->info[$token->name]->type == 'inline')) {
|
||||
if (
|
||||
$token->type == 'text' ||
|
||||
!isset($this->elements[$token->name])
|
||||
) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if ($def->info[$token->name]->type == 'block') {
|
||||
if (isset($this->elements[$token->name])) {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
|
@ -163,23 +163,19 @@ class HTMLPurifier_HTMLDefinition
|
||||
var $info_attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed after validation/
|
||||
* List of HTMLPurifier_AttrTransform to be performed after validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Lookup table of flow elements
|
||||
* @public
|
||||
*/
|
||||
var $info_flow_elements = array();
|
||||
|
||||
/**
|
||||
* Boolean is a strict definition?
|
||||
* @public
|
||||
*/
|
||||
var $strict;
|
||||
|
||||
var $content_sets = array();
|
||||
|
||||
/**
|
||||
* Initializes the definition, the meat of the class.
|
||||
*/
|
||||
@ -258,11 +254,6 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||
$e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic | $e_inline_forms".
|
||||
" | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(
|
||||
"#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |".
|
||||
@ -278,7 +269,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['div']->child = $e_Flow;
|
||||
|
||||
if ($this->strict) {
|
||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote($e_block);
|
||||
} else {
|
||||
$this->info['blockquote']->child = $e_Flow;
|
||||
}
|
||||
@ -337,9 +328,9 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['br']->child =
|
||||
$this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
|
||||
|
||||
$this->info['pre']->child = $e_pre_content;
|
||||
|
||||
$this->info['a']->child = $e_a_content;
|
||||
// exclusionary
|
||||
$this->info['pre']->child = $e_Inline;
|
||||
$this->info['a']->child = $e_Inline;
|
||||
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
@ -355,27 +346,16 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['td']->child = $e_Flow;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->type : defines the type of the element (block or inline)
|
||||
// misc compat stuff with XHTMLDefinition
|
||||
|
||||
// unknown until proven inline/block
|
||||
foreach ($this->info as $i => $x) {
|
||||
$this->info[$i]->type = 'unknown';
|
||||
}
|
||||
|
||||
// reuses $e_Inline and $e_Block
|
||||
foreach ($e_Inline->elements as $name => $bool) {
|
||||
if ($name == '#PCDATA') continue;
|
||||
if (!isset($this->info[$name])) continue;
|
||||
$this->info[$name]->type = 'inline';
|
||||
}
|
||||
|
||||
foreach ($e_Block->elements as $name => $bool) {
|
||||
if (!isset($this->info[$name])) continue;
|
||||
$this->info[$name]->type = 'block';
|
||||
foreach ($this->info as $key => $def) {
|
||||
if ($this->info[$key]->child == $e_Inline) {
|
||||
$this->info[$key]->descendants_are_inline = true;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($e_Flow->elements as $name => $bool) {
|
||||
$this->info_flow_elements[$name] = true;
|
||||
$this->content_sets['Flow'][$name] = true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -649,11 +629,7 @@ class HTMLPurifier_ElementDef
|
||||
var $content_model;
|
||||
var $content_model_type;
|
||||
|
||||
/**
|
||||
* Type of the tag: inline or block or unknown?
|
||||
* @public
|
||||
*/
|
||||
var $type;
|
||||
var $descendants_are_inline;
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
|
@ -37,6 +37,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||
if ($element == 'br') {
|
||||
$this->info[$element]->content_model_type = 'empty';
|
||||
} elseif ($element == 'blockquote') {
|
||||
$this->info[$element]->content_model = 'Heading | Block | List';
|
||||
$this->info[$element]->content_model_type = 'strictblockquote';
|
||||
} elseif ($element == 'div') {
|
||||
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||
|
@ -16,8 +16,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
||||
$this->config =& $config;
|
||||
|
||||
if (isset($_GET['x'])) { // hidden settings
|
||||
$this->def = new HTMLPurifier_XHTMLDefinition();
|
||||
$this->def->initialize($config);
|
||||
$this->def = new HTMLPurifier_XHTMLDefinition($config);
|
||||
$this->def->setup($config);
|
||||
} else {
|
||||
$this->def = $config->getHTMLDefinition();
|
||||
@ -88,8 +87,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
||||
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
|
||||
$ret .= $this->end('tr');
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Type');
|
||||
$ret .= $this->element('td', ucfirst($def->type));
|
||||
$ret .= $this->element('th', 'Inline content');
|
||||
$ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
|
||||
$ret .= $this->end('tr');
|
||||
if (!empty($def->excludes)) {
|
||||
$ret .= $this->start('tr');
|
||||
|
@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
||||
|
||||
// setup the context variables
|
||||
$parent_type = 'unknown'; // reference var that we alter
|
||||
$context->register('ParentType', $parent_type);
|
||||
$is_inline = false; // reference var that we alter
|
||||
$context->register('IsInline', $is_inline);
|
||||
|
||||
//####################################################################//
|
||||
// Loop initialization
|
||||
@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
// calculate context
|
||||
if (isset($parent_def)) {
|
||||
$parent_type = $parent_def->type;
|
||||
if ($is_inline === false) {
|
||||
// check if conditions make it inline
|
||||
if (!empty($parent_def) && $parent_def->descendants_are_inline) {
|
||||
$is_inline = $count - 1;
|
||||
}
|
||||
} else {
|
||||
// generally found in specialized elements like UL
|
||||
$parent_type = 'unknown';
|
||||
// check if we're out of inline
|
||||
if ($count === $is_inline) {
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
|
||||
//################################################################//
|
||||
@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
array_pop($tokens);
|
||||
|
||||
// remove context variables
|
||||
$context->destroy('ParentType');
|
||||
$context->destroy('IsInline');
|
||||
|
||||
//####################################################################//
|
||||
// Return
|
||||
|
@ -19,8 +19,9 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
|
||||
var $modules = array();
|
||||
var $attr_types;
|
||||
var $attr_collection;
|
||||
var $content_sets;
|
||||
|
||||
function initialize($config) {
|
||||
function HTMLPurifier_XHTMLDefinition($config) {
|
||||
|
||||
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
|
||||
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
|
||||
@ -48,6 +49,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// perform content_set expansions
|
||||
foreach ($content_sets as $i => $set) {
|
||||
// only performed once, so infinite recursion is not
|
||||
@ -59,41 +61,48 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
|
||||
array_values($content_sets),
|
||||
$set);
|
||||
}
|
||||
// define convenient variables
|
||||
$content_sets_keys = array_keys($content_sets);
|
||||
$content_sets_values = array_values($content_sets);
|
||||
foreach ($content_sets as $name => $set) {
|
||||
$this->content_sets[$name] = $this->convertToLookup($set);
|
||||
}
|
||||
|
||||
foreach ($this->modules as $module_i => $module) {
|
||||
foreach ($module->info as $element_i => $element) {
|
||||
$element =& $this->modules[$module_i]->info[$element_i];
|
||||
foreach ($module->info as $name => $def) {
|
||||
$def =& $this->modules[$module_i]->info[$name];
|
||||
|
||||
// attribute value expansions
|
||||
$this->attr_collection->performInclusions($element->attr);
|
||||
$this->attr_collection->performInclusions($def->attr);
|
||||
$this->attr_collection->expandStringIdentifiers(
|
||||
$element->attr, $this->attr_types);
|
||||
$def->attr, $this->attr_types);
|
||||
|
||||
// perform content model expansions
|
||||
$content_model = $element->content_model;
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
$element->content_model = str_replace(
|
||||
if (strpos($content_model, 'Inline') !== false) {
|
||||
$def->descendants_are_inline = true;
|
||||
}
|
||||
$def->content_model = str_replace(
|
||||
$content_sets_keys, $content_sets_values, $content_model);
|
||||
}
|
||||
|
||||
// get child def from content model
|
||||
$element->child = $this->getChildDef($element);
|
||||
$def->child = $this->getChildDef($def);
|
||||
|
||||
// setup info
|
||||
$this->info[$element_i] = $element;
|
||||
if ($this->info_parent == $element_i) {
|
||||
$this->info_parent_def = $this->info[$element_i];
|
||||
$this->info[$name] = $def;
|
||||
if ($this->info_parent == $name) {
|
||||
$this->info_parent_def = $this->info[$name];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function getChildDef($element) {
|
||||
$value = $element->content_model;
|
||||
$type = $element->content_model_type;
|
||||
function getChildDef($def) {
|
||||
$value = $def->content_model;
|
||||
$type = $def->content_model_type;
|
||||
switch ($type) {
|
||||
case 'required':
|
||||
return new HTMLPurifier_ChildDef_Required($value);
|
||||
@ -102,7 +111,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
|
||||
case 'empty':
|
||||
return new HTMLPurifier_ChildDef_Empty();
|
||||
case 'strictblockquote':
|
||||
return new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
return new HTMLPurifier_ChildDef_StrictBlockquote($value);
|
||||
case 'table':
|
||||
return new HTMLPurifier_ChildDef_Table();
|
||||
case 'chameleon':
|
||||
@ -114,6 +123,14 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
|
||||
return HTMLPurifier_ChildDef_Empty();
|
||||
}
|
||||
|
||||
function convertToLookup($string) {
|
||||
$array = explode('|', str_replace(' ', '', $string));
|
||||
foreach ($array as $i => $k) {
|
||||
$array[$i] = true;
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -15,17 +15,17 @@ class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
|
||||
|
||||
$this->assertResult(
|
||||
'<b>Allowed.</b>', true,
|
||||
array(), array('ParentType' => 'inline')
|
||||
array(), array('IsInline' => true)
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Not allowed.</div>', '',
|
||||
array(), array('ParentType' => 'inline')
|
||||
array(), array('IsInline' => true)
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Allowed.</div>', true,
|
||||
array(), array('ParentType' => 'block')
|
||||
array(), array('IsInline' => false)
|
||||
);
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ extends HTMLPurifier_ChildDefHarness
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
|
||||
|
||||
$this->assertResult('');
|
||||
$this->assertResult('<p>Valid</p>');
|
||||
|
@ -70,19 +70,30 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
||||
'<span><ins>Not allowed!</ins></span>'
|
||||
);
|
||||
|
||||
// block in inline ins not allowed
|
||||
$this->assertResult(
|
||||
$this->assertResult( // alt config
|
||||
'<span><ins><div>Not allowed!</div></ins></span>',
|
||||
'<span><ins><div>Not allowed!</div></ins></span>',
|
||||
array('Core.EscapeInvalidChildren' => true)
|
||||
);
|
||||
|
||||
// test block element that has inline content
|
||||
$this->assertResult(
|
||||
'<h1><ins><div>Not allowed!</div></ins></h1>',
|
||||
'<h1><ins>Not allowed!</ins></h1>'
|
||||
);
|
||||
|
||||
// test exclusions
|
||||
$this->assertResult(
|
||||
'<a><span><a>Not allowed</a></span></a>',
|
||||
'<a><span></span></a>'
|
||||
);
|
||||
|
||||
// stacked ins/del
|
||||
$this->assertResult(
|
||||
'<h1><ins><del><div>Not allowed!</div></del></ins></h1>',
|
||||
'<h1><ins><del>Not allowed!</del></ins></h1>'
|
||||
);
|
||||
|
||||
// test inline parent
|
||||
$this->assertResult(
|
||||
'<b>Bold</b>', true, array('HTML.Parent' => 'span')
|
||||
|
Loading…
Reference in New Issue
Block a user