0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-19 10:45:18 +00:00

[2.0.1] Scrap auto_close in favor of ChildDef->elements heuristic.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1213 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-23 20:52:57 +00:00
parent 5d0a992579
commit e5191b3ada
16 changed files with 36 additions and 40 deletions

2
NEWS
View File

@ -10,6 +10,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
==========================
2.0.1, unknown release date
! Tag auto-closing now based on a ChildDef heuristic rather than a
manually set auto_close array; some behavior may change
- Clean up special case code for <script> tags
- Reorder includes for DefinitionCache decorators, fixes a possible
missing class error

View File

@ -36,6 +36,11 @@ class HTMLPurifier_ChildDef
*/
var $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow
*/
var $elements = array();
/**
* Validates nodes according to definition and returns modification.
*

View File

@ -35,6 +35,7 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->inline->elements;
}
function validateChildren($tokens_of_children, $config, &$context) {

View File

@ -44,6 +44,12 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// collect all elements into the $elements array
preg_match_all("/$el/", $reg, $matches);
foreach ($matches[0] as $match) {
$this->elements[$match] = true;
}
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);

View File

@ -25,7 +25,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
if (empty($i)) unset($elements[$i]);
if (empty($i)) unset($elements[$i]); // remove blank
}
}
$this->elements = $elements;

View File

@ -9,6 +9,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
var $allow_empty = false;
var $type = 'table';
var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;

View File

@ -71,13 +71,6 @@ class HTMLPurifier_ElementDef
/**
* Lookup table of tags that close this tag. Used during parsing
* to make sure we don't attempt to nest unclosed tags.
* @public
*/
var $auto_close = array();
/**
* Does the element have a content model (#PCDATA | Inline)*? This
* is important for chameleon ins and del processing in
@ -151,7 +144,6 @@ class HTMLPurifier_ElementDef
}
$this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
$this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
$this->_mergeAssocArray($this->auto_close, $def->auto_close);
$this->_mergeAssocArray($this->excludes, $def->excludes);
if(!empty($def->content_model)) {

View File

@ -26,8 +26,7 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
$this->addElement('ul', true, 'List', 'Required: li', 'Common');
$this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
$li =& $this->addElement('li', true, false, 'Flow', 'Common');
$li->auto_close = array('li' => true);
$this->addElement('li', true, false, 'Flow', 'Common');
$this->addElement('dd', true, false, 'Flow', 'Common');
$this->addElement('dt', true, false, 'Inline', 'Common');

View File

@ -56,10 +56,7 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
$cell_align
);
$this->addElement('col', true, false, 'Empty', 'Common', $cell_col);
$colgroup =& $this->addElement('colgroup', true, false, 'Optional: col', 'Common', $cell_col);
$colgroup->auto_close = $this->makeLookup(
'thead', 'tbody', 'tfoot', 'tr'
);
$this->addElement('colgroup', true, false, 'Optional: col', 'Common', $cell_col);
$this->addElement('tbody', true, false, 'Required: tr', 'Common', $cell_align);
$this->addElement('thead', true, false, 'Required: tr', 'Common', $cell_align);

View File

@ -55,13 +55,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->addElement('h6', true, 'Heading', 'Inline', 'Common');
// Block Structural -----------------------------------------------
$p =& $this->addElement('p', true, 'Block', 'Inline', 'Common');
// this seems really ad hoc: implementing some general
// heuristics would probably be better
$p->auto_close = $this->makeLookup(
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
'table', 'ul' );
$this->addElement('p', true, 'Block', 'Inline', 'Common');
$this->addElement('div', true, 'Block', 'Flow', 'Common');
}

View File

@ -116,7 +116,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
*/
function _isInline($token, $config) {
$definition = $config->getHTMLDefinition();
return !isset($definition->info['p']->auto_close[$token->name]);
return isset($definition->info['p']->child->elements[$token->name]);
}
}

View File

@ -105,8 +105,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// this can be replaced with a more general algorithm:
// if the token is not allowed by the parent, auto-close
// the parent
if (isset($parent_info->auto_close[$token->name])) {
// close th e parent, then append the token
if (!isset($parent_info->child->elements[$token->name])) {
// close the parent, then append the token
$result[] = new HTMLPurifier_Token_End($parent->name);
$result[] = $token;
$current_nesting[] = $token;

View File

@ -10,6 +10,9 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
$this->assertEqual($this->obj->elements, array('a' => true,
'b' => true, 'c' => true, 'd' => true));
$this->assertResult('', false);
$this->assertResult('<a /><a />', false);
@ -21,6 +24,8 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
function testNesting() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b,(c|d))+');
$this->assertEqual($this->obj->elements, array('a' => true,
'b' => true, 'c' => true, 'd' => true));
$this->assertResult('', false);
$this->assertResult('<a /><b /><c /><a /><b /><d />');
$this->assertResult('<a /><b /><c /><d />', false);
@ -28,6 +33,8 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
function testNestedEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('b,(a|(c|d))+');
$this->assertEqual($this->obj->elements, array('a' => true,
'b' => true, 'c' => true, 'd' => true));
$this->assertResult('', false);
$this->assertResult('<b /><a /><c /><d />');
$this->assertResult('<b /><d /><a /><a />');
@ -37,6 +44,7 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
function testNestedQuantifier() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(b,c+)*');
$this->assertEqual($this->obj->elements, array('b' => true, 'c' => true));
$this->assertResult('');
$this->assertResult('<b /><c />');
$this->assertResult('<b /><c /><c /><c />');
@ -47,6 +55,7 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
function testEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a|b');
$this->assertEqual($this->obj->elements, array('a' => true, 'b' => true));
$this->assertResult('', false);
$this->assertResult('<a />');
$this->assertResult('<b />');
@ -57,6 +66,7 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
function testCommafication() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a,b');
$this->assertEqual($this->obj->elements, array('a' => true, 'b' => true));
$this->assertResult('<a /><b />');
$this->assertResult('<ab />', false);

View File

@ -33,10 +33,6 @@ class HTMLPurifier_ElementDefTest extends UnitTestCase
$def1->child = $overloaded_old;
$def1->content_model = 'old';
$def1->content_model_type = $overloaded_old;
$def1->auto_close = array(
'old' => true,
'removed-old' => true
);
$def1->descendants_are_inline = false;
$def1->excludes = array(
'old' => true,
@ -60,10 +56,6 @@ class HTMLPurifier_ElementDefTest extends UnitTestCase
$def2->child = $new;
$def2->content_model = 'new';
$def2->content_model_type = $overloaded_new;
$def2->auto_close = array(
'new' => true,
'removed-old' => false
);
$def2->descendants_are_inline = true;
$def2->excludes = array(
'new' => true,
@ -90,10 +82,6 @@ class HTMLPurifier_ElementDefTest extends UnitTestCase
$this->assertIdentical($def1->child, $new);
$this->assertIdentical($def1->content_model, 'old | new');
$this->assertIdentical($def1->content_model_type, $overloaded_new);
$this->assertIdentical($def1->auto_close, array(
'old' => true,
'new' => true
));
$this->assertIdentical($def1->descendants_are_inline, true);
$this->assertIdentical($def1->excludes, array(
'old' => true,

View File

@ -20,7 +20,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
);
$this->assertResult(
'<b><div>Fix nesting.</div></b>',
'<b>Fix nesting.</b>'
'<b></b><div>Fix nesting.</div>'
);
$this->assertResult(
'<asdf>Foreign element removal.</asdf>',
@ -28,7 +28,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
);
$this->assertResult(
'<foo><b><div>All three.</div></b>',
'<b>All three.</b>'
'<b></b><div>All three.</div>'
);
}