mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-08 06:48:42 +00:00
[1.1] Table child definition made more flexible, will fix up poorly ordered elements
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@417 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
665e80d223
commit
e440f25bce
1
NEWS
1
NEWS
@ -6,6 +6,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
quotes, apostrophes and less than or greater than signs.
|
||||
- Enforce alphanumeric namespace and directive names for configuration.
|
||||
- Directive documentation generation using XSLT
|
||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||
|
||||
1.0.2, unknown release date
|
||||
(bugfix release may be dropped if no bugs found)
|
||||
|
1
TODO
1
TODO
@ -6,7 +6,6 @@ Ongoing
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
1.1 release
|
||||
- Rewrite table's child definition to be faster, smart, and regexp free
|
||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
||||
- Formatters for plaintext
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
|
@ -5,13 +5,6 @@
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
|
||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
||||
// to fix the tree)
|
||||
|
||||
// we may end up writing custom code for each HTML case
|
||||
// in order to make it self correcting
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||
'When true, a child is found that is not allowed in the context of the '.
|
||||
@ -62,9 +55,7 @@ class HTMLPurifier_ChildDef
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value. Table is the only
|
||||
* child definition that uses this class, and we ought to give
|
||||
* it a dedicated one.
|
||||
* it will only give a bool return value.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
@ -307,4 +298,129 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[0]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[0]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[0]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array($token);
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
// unrecognized, drop silently
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@ -209,8 +209,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
$this->info['a']->child = $e_a_content;
|
||||
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Custom(
|
||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
// not a real entity, watch the double underscore
|
||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||
|
@ -187,6 +187,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
if (!$parent_def->child->allow_empty) {
|
||||
// we need to do a double-check
|
||||
$i = $parent_index;
|
||||
array_pop($stack);
|
||||
}
|
||||
|
||||
// PROJECTED OPTIMIZATION: Process all children elements before
|
||||
@ -255,4 +256,4 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
||||
|
@ -59,7 +59,7 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function atest_table() {
|
||||
function test_table() {
|
||||
|
||||
// currently inactive, awaiting augmentation
|
||||
|
||||
@ -71,19 +71,33 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
// we really don't care what's inside, because if it turns out
|
||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
||||
// anyway.
|
||||
$inputs[1] = '<tr></tr>';
|
||||
// we're using empty tags to compact the tests: under real circumstances
|
||||
// there would be contents in them
|
||||
|
||||
$inputs[1] = '<tr />';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
||||
'<tfoot></tfoot><tbody></tbody>';
|
||||
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
||||
$inputs[3] = '<col /><col /><col /><tr />';
|
||||
$expect[3] = true;
|
||||
|
||||
// mixed up order
|
||||
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||
|
||||
// duplicates of singles
|
||||
// - first caption serves
|
||||
// - trailing tfoots/theads get turned into tbodys
|
||||
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||
|
||||
// errant text dropped (until bubbling is implemented)
|
||||
$inputs[6] = 'foo';
|
||||
$expect[6] = false;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user