mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
Finish implementing fixNesting(). Removed security-in-depth check for optimization reasons, since the info array will never cause such a condition.
git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@58 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
3e6bcb7a0f
commit
ff8f24458d
@ -44,6 +44,10 @@ class PureHTMLDefinition
|
||||
|
||||
// transforms: font, menu, dir, center
|
||||
|
||||
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
||||
// and also know the assumptions the code makes about what this
|
||||
// contains for optimization purposes (see fixNesting)
|
||||
|
||||
$e_special_extra = 'img';
|
||||
$e_special_basic = 'br | span | bdo';
|
||||
$e_special = "$e_special_basic | $e_special_extra";
|
||||
@ -338,14 +342,19 @@ class PureHTMLDefinition
|
||||
for ($j = $i, $depth = 0; ; $j++) {
|
||||
if ($tokens[$j]->type == 'start') {
|
||||
$depth++;
|
||||
// skip token assignment on first iteration
|
||||
if ($depth == 1) continue;
|
||||
} elseif ($tokens[$j]->type == 'end') {
|
||||
$depth--;
|
||||
// skip token assignment on last iteration
|
||||
if ($depth == 0) break;
|
||||
}
|
||||
$child_tokens[] = $tokens[$j];
|
||||
}
|
||||
|
||||
// $i is index of start token
|
||||
// $j is index of end token
|
||||
|
||||
// have DTD child def validate children
|
||||
$element_def = $this->info[$tokens[$i]->name];
|
||||
$result = $element_def->child_def->validateChildren($child_tokens);
|
||||
@ -353,14 +362,48 @@ class PureHTMLDefinition
|
||||
// process result
|
||||
if ($result === true) {
|
||||
|
||||
// leave the nodes as is, scroll to next node
|
||||
$i++;
|
||||
while ($i < $size and $tokens[$i]->type != 'start') {
|
||||
$i++;
|
||||
}
|
||||
// leave the nodes as is
|
||||
|
||||
} elseif($result === false) {
|
||||
|
||||
// WARNING WARNING WARNING!!!
|
||||
// While for the original DTD, there will never be
|
||||
// cascading removal, more complex ones may have such
|
||||
// a problem.
|
||||
|
||||
// If you modify the info array such that an element
|
||||
// that requires children may contain a child that requires
|
||||
// children, you need to also scroll back and re-check that
|
||||
// elements parent node
|
||||
|
||||
$length = $j - $i + 1;
|
||||
|
||||
// remove entire node
|
||||
array_splice($tokens, $i, $length);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
|
||||
// ensure that we scroll to the next node
|
||||
$i--;
|
||||
|
||||
} else {
|
||||
|
||||
$length = $j - $i - 1;
|
||||
|
||||
// replace node with $result
|
||||
array_splice($tokens, $i + 1, $length, $result);
|
||||
|
||||
// change size
|
||||
$size -= $length;
|
||||
$size += count($result);
|
||||
|
||||
}
|
||||
|
||||
// scroll to next node
|
||||
$i++;
|
||||
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
||||
|
||||
}
|
||||
|
||||
// remove implicit divs
|
||||
@ -404,6 +447,7 @@ class HTMLDTD_Element
|
||||
// in order to make it self correcting
|
||||
class HTMLDTD_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $dtd_regex;
|
||||
var $_pcre_regex;
|
||||
function HTMLDTD_ChildDef($dtd_regex) {
|
||||
|
@ -155,12 +155,18 @@ The way, I suppose, one would check for it, is whenever a node is removed,
|
||||
scroll to it's parent start, and re-evaluate it. Make sure you're able to do
|
||||
that with minimal code repetition.
|
||||
|
||||
EDITOR'S NOTE: this behavior is not implemented by default, because the
|
||||
default configuration has a setup that ensures that cascading node removals
|
||||
will never happen. However, there will be warning signs in case someone tries
|
||||
to hack it further.
|
||||
|
||||
The most complex case can probably be done by using some fancy regexp
|
||||
expressions and transformations. However, it doesn't seem right that, say,
|
||||
a stray <b> in a <table> can cause the entire table to be removed. Fixing it,
|
||||
however, may be too difficult.
|
||||
however, may be too difficult (or not, see below).
|
||||
|
||||
This code was ripped from the PEAR class XML_DTD. It implements regexp checking.
|
||||
This code was excerpted from the PEAR class XML_DTD. It implements regexp
|
||||
checking.
|
||||
|
||||
--
|
||||
|
||||
@ -259,6 +265,11 @@ So... I say delete the node when PCDATA isn't allowed (or the regex is too
|
||||
complicated to determine where PCDATA could be inserted), and translate the node
|
||||
to text when PCDATA is allowed.
|
||||
|
||||
--
|
||||
|
||||
Note that generic child definitions are not usually desirable: we should
|
||||
implement custom handlers for each one that specify the stuff correctly.
|
||||
|
||||
== STAGE 4 - check attributes ==
|
||||
|
||||
While we're doing all this nesting hocus-pocus, attributes are also being
|
||||
|
@ -413,14 +413,29 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
||||
new MF_EndTag('b'),
|
||||
);
|
||||
|
||||
// need test of empty set that's required, resulting in removal of node
|
||||
// test of empty set that's required, resulting in removal of node
|
||||
$inputs[3] = array(
|
||||
new MF_StartTag('ul'),
|
||||
new MF_EndTag('ul')
|
||||
);
|
||||
$expect[3] = array();
|
||||
|
||||
// need test of cascading removal (if possible)
|
||||
|
||||
// ! cover all child element conditions
|
||||
|
||||
// execute only one test at a time:
|
||||
$inputs = array( $inputs[0] );
|
||||
// test illegal text which gets removed
|
||||
$inputs[4] = array(
|
||||
new MF_StartTag('ul'),
|
||||
new MF_Text('Illegal Text'),
|
||||
new MF_StartTag('li'),
|
||||
new MF_Text('Legal item'),
|
||||
new MF_EndTag('li'),
|
||||
new MF_EndTag('ul')
|
||||
);
|
||||
$expect[4] = array(
|
||||
new MF_StartTag('ul'),
|
||||
new MF_StartTag('li'),
|
||||
new MF_Text('Legal item'),
|
||||
new MF_EndTag('li'),
|
||||
new MF_EndTag('ul')
|
||||
);
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->def->fixNesting($input);
|
||||
|
Loading…
Reference in New Issue
Block a user