mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
[1.1] Table child definition made more flexible, will fix up poorly ordered elements
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@417 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
665e80d223
commit
e440f25bce
1
NEWS
1
NEWS
@ -6,6 +6,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
quotes, apostrophes and less than or greater than signs.
|
quotes, apostrophes and less than or greater than signs.
|
||||||
- Enforce alphanumeric namespace and directive names for configuration.
|
- Enforce alphanumeric namespace and directive names for configuration.
|
||||||
- Directive documentation generation using XSLT
|
- Directive documentation generation using XSLT
|
||||||
|
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||||
|
|
||||||
1.0.2, unknown release date
|
1.0.2, unknown release date
|
||||||
(bugfix release may be dropped if no bugs found)
|
(bugfix release may be dropped if no bugs found)
|
||||||
|
1
TODO
1
TODO
@ -6,7 +6,6 @@ Ongoing
|
|||||||
- Plugins for major CMSes (very tricky issue)
|
- Plugins for major CMSes (very tricky issue)
|
||||||
|
|
||||||
1.1 release
|
1.1 release
|
||||||
- Rewrite table's child definition to be faster, smart, and regexp free
|
|
||||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
||||||
- Formatters for plaintext
|
- Formatters for plaintext
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
|
@ -5,13 +5,6 @@
|
|||||||
// false = delete parent node and all children
|
// false = delete parent node and all children
|
||||||
// array(...) = replace children nodes with these
|
// array(...) = replace children nodes with these
|
||||||
|
|
||||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
|
||||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
|
||||||
// to fix the tree)
|
|
||||||
|
|
||||||
// we may end up writing custom code for each HTML case
|
|
||||||
// in order to make it self correcting
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigDef::define(
|
||||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||||
'When true, a child is found that is not allowed in the context of the '.
|
'When true, a child is found that is not allowed in the context of the '.
|
||||||
@ -62,9 +55,7 @@ class HTMLPurifier_ChildDef
|
|||||||
* Custom validation class, accepts DTD child definitions
|
* Custom validation class, accepts DTD child definitions
|
||||||
*
|
*
|
||||||
* @warning Currently this class is an all or nothing proposition, that is,
|
* @warning Currently this class is an all or nothing proposition, that is,
|
||||||
* it will only give a bool return value. Table is the only
|
* it will only give a bool return value.
|
||||||
* child definition that uses this class, and we ought to give
|
|
||||||
* it a dedicated one.
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
@ -307,4 +298,129 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for tables
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $allow_empty = false;
|
||||||
|
var $type = 'table';
|
||||||
|
function HTMLPurifier_ChildDef_Table() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, $context) {
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// this ensures that the loop gets run one last time before closing
|
||||||
|
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||||
|
// get rid of the token later.
|
||||||
|
$tokens_of_children[] = false;
|
||||||
|
|
||||||
|
// only one of these elements is allowed in a table
|
||||||
|
$caption = false;
|
||||||
|
$thead = false;
|
||||||
|
$tfoot = false;
|
||||||
|
|
||||||
|
// as many of these as you want
|
||||||
|
$cols = array();
|
||||||
|
$content = array();
|
||||||
|
|
||||||
|
$nesting = 0; // current depth so we can determine nodes
|
||||||
|
$is_collecting = false; // are we globbing together tokens to package
|
||||||
|
// into one of the collectors?
|
||||||
|
$collection = array(); // collected nodes
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token === false) {
|
||||||
|
// terminating sequence started
|
||||||
|
} elseif ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle node collection
|
||||||
|
if ($is_collecting) {
|
||||||
|
if ($is_child) {
|
||||||
|
// okay, let's stash the tokens away
|
||||||
|
// first token tells us the type of the collection
|
||||||
|
switch ($collection[0]->name) {
|
||||||
|
case 'tr':
|
||||||
|
case 'tbody':
|
||||||
|
$content[] = $collection;
|
||||||
|
break;
|
||||||
|
case 'caption':
|
||||||
|
if ($caption !== false) break;
|
||||||
|
$caption = $collection;
|
||||||
|
break;
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
// access the appropriate variable, $thead or $tfoot
|
||||||
|
$var = $collection[0]->name;
|
||||||
|
if ($$var === false) {
|
||||||
|
$$var = $collection;
|
||||||
|
} else {
|
||||||
|
// transmutate the first and less entries into
|
||||||
|
// tbody tags, and then put into content
|
||||||
|
$collection[0]->name = 'tbody';
|
||||||
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
|
$content[] = $collection;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'colgroup':
|
||||||
|
$cols[] = $collection;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$collection = array();
|
||||||
|
$is_collecting = false;
|
||||||
|
} else {
|
||||||
|
// add the node to the collection
|
||||||
|
$collection[] = $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// terminate
|
||||||
|
if ($token === false) break;
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
// determine what we're dealing with
|
||||||
|
if ($token->name == 'col') {
|
||||||
|
// the only empty tag in the possie, we can handle it
|
||||||
|
// immediately
|
||||||
|
$cols[] = array($token);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch($token->name) {
|
||||||
|
case 'caption':
|
||||||
|
case 'colgroup':
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
case 'tbody':
|
||||||
|
case 'tr':
|
||||||
|
$is_collecting = true;
|
||||||
|
$collection[] = $token;
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
// unrecognized, drop silently
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($content)) return false;
|
||||||
|
|
||||||
|
$ret = array();
|
||||||
|
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||||
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
|
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
|
||||||
|
array_pop($tokens_of_children); // remove phantom token
|
||||||
|
|
||||||
|
return ($ret === $tokens_of_children) ? true : $ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -209,8 +209,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['a']->child = $e_a_content;
|
$this->info['a']->child = $e_a_content;
|
||||||
|
|
||||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Custom(
|
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
|
||||||
|
|
||||||
// not a real entity, watch the double underscore
|
// not a real entity, watch the double underscore
|
||||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||||
|
@ -187,6 +187,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
if (!$parent_def->child->allow_empty) {
|
if (!$parent_def->child->allow_empty) {
|
||||||
// we need to do a double-check
|
// we need to do a double-check
|
||||||
$i = $parent_index;
|
$i = $parent_index;
|
||||||
|
array_pop($stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PROJECTED OPTIMIZATION: Process all children elements before
|
// PROJECTED OPTIMIZATION: Process all children elements before
|
||||||
|
@ -59,7 +59,7 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function atest_table() {
|
function test_table() {
|
||||||
|
|
||||||
// currently inactive, awaiting augmentation
|
// currently inactive, awaiting augmentation
|
||||||
|
|
||||||
@ -71,19 +71,33 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
$inputs[0] = '';
|
$inputs[0] = '';
|
||||||
$expect[0] = false;
|
$expect[0] = false;
|
||||||
|
|
||||||
// we really don't care what's inside, because if it turns out
|
// we're using empty tags to compact the tests: under real circumstances
|
||||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
// there would be contents in them
|
||||||
// anyway.
|
|
||||||
$inputs[1] = '<tr></tr>';
|
$inputs[1] = '<tr />';
|
||||||
$expect[1] = true;
|
$expect[1] = true;
|
||||||
|
|
||||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||||
'<tfoot></tfoot><tbody></tbody>';
|
'<tr><td>asdf</td></tr></tbody>';
|
||||||
$expect[2] = true;
|
$expect[2] = true;
|
||||||
|
|
||||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
$inputs[3] = '<col /><col /><col /><tr />';
|
||||||
$expect[3] = true;
|
$expect[3] = true;
|
||||||
|
|
||||||
|
// mixed up order
|
||||||
|
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||||
|
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||||
|
|
||||||
|
// duplicates of singles
|
||||||
|
// - first caption serves
|
||||||
|
// - trailing tfoots/theads get turned into tbodys
|
||||||
|
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||||
|
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||||
|
|
||||||
|
// errant text dropped (until bubbling is implemented)
|
||||||
|
$inputs[6] = 'foo';
|
||||||
|
$expect[6] = false;
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $config);
|
$this->assertSeries($inputs, $expect, $config);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user