mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
Finish implementing fixNesting(). Removed security-in-depth check for optimization reasons, since the info array will never cause such a condition.
git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@58 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
3e6bcb7a0f
commit
ff8f24458d
@ -44,6 +44,10 @@ class PureHTMLDefinition
|
|||||||
|
|
||||||
// transforms: font, menu, dir, center
|
// transforms: font, menu, dir, center
|
||||||
|
|
||||||
|
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
||||||
|
// and also know the assumptions the code makes about what this
|
||||||
|
// contains for optimization purposes (see fixNesting)
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
$e_special_extra = 'img';
|
||||||
$e_special_basic = 'br | span | bdo';
|
$e_special_basic = 'br | span | bdo';
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
$e_special = "$e_special_basic | $e_special_extra";
|
||||||
@ -338,14 +342,19 @@ class PureHTMLDefinition
|
|||||||
for ($j = $i, $depth = 0; ; $j++) {
|
for ($j = $i, $depth = 0; ; $j++) {
|
||||||
if ($tokens[$j]->type == 'start') {
|
if ($tokens[$j]->type == 'start') {
|
||||||
$depth++;
|
$depth++;
|
||||||
|
// skip token assignment on first iteration
|
||||||
if ($depth == 1) continue;
|
if ($depth == 1) continue;
|
||||||
} elseif ($tokens[$j]->type == 'end') {
|
} elseif ($tokens[$j]->type == 'end') {
|
||||||
$depth--;
|
$depth--;
|
||||||
|
// skip token assignment on last iteration
|
||||||
if ($depth == 0) break;
|
if ($depth == 0) break;
|
||||||
}
|
}
|
||||||
$child_tokens[] = $tokens[$j];
|
$child_tokens[] = $tokens[$j];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// $i is index of start token
|
||||||
|
// $j is index of end token
|
||||||
|
|
||||||
// have DTD child def validate children
|
// have DTD child def validate children
|
||||||
$element_def = $this->info[$tokens[$i]->name];
|
$element_def = $this->info[$tokens[$i]->name];
|
||||||
$result = $element_def->child_def->validateChildren($child_tokens);
|
$result = $element_def->child_def->validateChildren($child_tokens);
|
||||||
@ -353,14 +362,48 @@ class PureHTMLDefinition
|
|||||||
// process result
|
// process result
|
||||||
if ($result === true) {
|
if ($result === true) {
|
||||||
|
|
||||||
// leave the nodes as is, scroll to next node
|
// leave the nodes as is
|
||||||
$i++;
|
|
||||||
while ($i < $size and $tokens[$i]->type != 'start') {
|
} elseif($result === false) {
|
||||||
$i++;
|
|
||||||
}
|
// WARNING WARNING WARNING!!!
|
||||||
|
// While for the original DTD, there will never be
|
||||||
|
// cascading removal, more complex ones may have such
|
||||||
|
// a problem.
|
||||||
|
|
||||||
|
// If you modify the info array such that an element
|
||||||
|
// that requires children may contain a child that requires
|
||||||
|
// children, you need to also scroll back and re-check that
|
||||||
|
// elements parent node
|
||||||
|
|
||||||
|
$length = $j - $i + 1;
|
||||||
|
|
||||||
|
// remove entire node
|
||||||
|
array_splice($tokens, $i, $length);
|
||||||
|
|
||||||
|
// change size
|
||||||
|
$size -= $length;
|
||||||
|
|
||||||
|
// ensure that we scroll to the next node
|
||||||
|
$i--;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
$length = $j - $i - 1;
|
||||||
|
|
||||||
|
// replace node with $result
|
||||||
|
array_splice($tokens, $i + 1, $length, $result);
|
||||||
|
|
||||||
|
// change size
|
||||||
|
$size -= $length;
|
||||||
|
$size += count($result);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scroll to next node
|
||||||
|
$i++;
|
||||||
|
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove implicit divs
|
// remove implicit divs
|
||||||
@ -404,6 +447,7 @@ class HTMLDTD_Element
|
|||||||
// in order to make it self correcting
|
// in order to make it self correcting
|
||||||
class HTMLDTD_ChildDef
|
class HTMLDTD_ChildDef
|
||||||
{
|
{
|
||||||
|
var $type = 'custom';
|
||||||
var $dtd_regex;
|
var $dtd_regex;
|
||||||
var $_pcre_regex;
|
var $_pcre_regex;
|
||||||
function HTMLDTD_ChildDef($dtd_regex) {
|
function HTMLDTD_ChildDef($dtd_regex) {
|
||||||
|
@ -155,12 +155,18 @@ The way, I suppose, one would check for it, is whenever a node is removed,
|
|||||||
scroll to it's parent start, and re-evaluate it. Make sure you're able to do
|
scroll to it's parent start, and re-evaluate it. Make sure you're able to do
|
||||||
that with minimal code repetition.
|
that with minimal code repetition.
|
||||||
|
|
||||||
|
EDITOR'S NOTE: this behavior is not implemented by default, because the
|
||||||
|
default configuration has a setup that ensures that cascading node removals
|
||||||
|
will never happen. However, there will be warning signs in case someone tries
|
||||||
|
to hack it further.
|
||||||
|
|
||||||
The most complex case can probably be done by using some fancy regexp
|
The most complex case can probably be done by using some fancy regexp
|
||||||
expressions and transformations. However, it doesn't seem right that, say,
|
expressions and transformations. However, it doesn't seem right that, say,
|
||||||
a stray <b> in a <table> can cause the entire table to be removed. Fixing it,
|
a stray <b> in a <table> can cause the entire table to be removed. Fixing it,
|
||||||
however, may be too difficult.
|
however, may be too difficult (or not, see below).
|
||||||
|
|
||||||
This code was ripped from the PEAR class XML_DTD. It implements regexp checking.
|
This code was excerpted from the PEAR class XML_DTD. It implements regexp
|
||||||
|
checking.
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
@ -259,6 +265,11 @@ So... I say delete the node when PCDATA isn't allowed (or the regex is too
|
|||||||
complicated to determine where PCDATA could be inserted), and translate the node
|
complicated to determine where PCDATA could be inserted), and translate the node
|
||||||
to text when PCDATA is allowed.
|
to text when PCDATA is allowed.
|
||||||
|
|
||||||
|
--
|
||||||
|
|
||||||
|
Note that generic child definitions are not usually desirable: we should
|
||||||
|
implement custom handlers for each one that specify the stuff correctly.
|
||||||
|
|
||||||
== STAGE 4 - check attributes ==
|
== STAGE 4 - check attributes ==
|
||||||
|
|
||||||
While we're doing all this nesting hocus-pocus, attributes are also being
|
While we're doing all this nesting hocus-pocus, attributes are also being
|
||||||
|
@ -413,14 +413,29 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
|||||||
new MF_EndTag('b'),
|
new MF_EndTag('b'),
|
||||||
);
|
);
|
||||||
|
|
||||||
// need test of empty set that's required, resulting in removal of node
|
// test of empty set that's required, resulting in removal of node
|
||||||
|
$inputs[3] = array(
|
||||||
|
new MF_StartTag('ul'),
|
||||||
|
new MF_EndTag('ul')
|
||||||
|
);
|
||||||
|
$expect[3] = array();
|
||||||
|
|
||||||
// need test of cascading removal (if possible)
|
// test illegal text which gets removed
|
||||||
|
$inputs[4] = array(
|
||||||
// ! cover all child element conditions
|
new MF_StartTag('ul'),
|
||||||
|
new MF_Text('Illegal Text'),
|
||||||
// execute only one test at a time:
|
new MF_StartTag('li'),
|
||||||
$inputs = array( $inputs[0] );
|
new MF_Text('Legal item'),
|
||||||
|
new MF_EndTag('li'),
|
||||||
|
new MF_EndTag('ul')
|
||||||
|
);
|
||||||
|
$expect[4] = array(
|
||||||
|
new MF_StartTag('ul'),
|
||||||
|
new MF_StartTag('li'),
|
||||||
|
new MF_Text('Legal item'),
|
||||||
|
new MF_EndTag('li'),
|
||||||
|
new MF_EndTag('ul')
|
||||||
|
);
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->def->fixNesting($input);
|
$result = $this->def->fixNesting($input);
|
||||||
|
Loading…
Reference in New Issue
Block a user