0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-03-11 17:18:44 +00:00

[1.7.0] ChildDef_Custom's regex generation has been improved, removing several false positives

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1173 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-20 15:54:50 +00:00
parent cf7a50163c
commit 8bbb73e47d
4 changed files with 64 additions and 6 deletions

2
NEWS
View File

@ -46,6 +46,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
- StrictBlockquote child definition refrains from wrapping whitespace - StrictBlockquote child definition refrains from wrapping whitespace
in tags now. in tags now.
- Bug resulting from tag transforms to non-allowed elements fixed - Bug resulting from tag transforms to non-allowed elements fixed
- ChildDef_Custom's regex generation has been improved, removing several
false positives
. Unit test for ElementDef created, ElementDef behavior modified to . Unit test for ElementDef created, ElementDef behavior modified to
be more flexible be more flexible
. Added convenience functions for HTMLModule constructors . Added convenience functions for HTMLModule constructors

View File

@ -3,5 +3,5 @@ The first is Tidy, which enables HTML Purifier to both natively support
deprecated elements and also convert them to standards-compliant deprecated elements and also convert them to standards-compliant
alternatives. The second is the Advanced API, which enables users to alternatives. The second is the Advanced API, which enables users to
create new elements and attributes with ease. Keeping in line with a create new elements and attributes with ease. Keeping in line with a
commitment to high quality, there are also four esoteric bug-fixes and a commitment to high quality, there are also five esoteric bug-fixes and a
plethora of subtle improvements that enhance the library. plethora of subtle improvements that enhance the library.

View File

@ -38,8 +38,21 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
if ($raw{0} != '(') { if ($raw{0} != '(') {
$raw = "($raw)"; $raw = "($raw)";
} }
$reg = str_replace(',', ',?', $raw); $el = '[#a-zA-Z0-9_.-]+';
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); $reg = $raw;
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);
// remove commas when they were not solicited
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
// remove all non-paranthetical commas: they are handled by first regex
$reg = preg_replace("/,\(/", '(', $reg);
$this->_pcre_regex = $reg; $this->_pcre_regex = $reg;
} }
function validateChildren($tokens_of_children, $config, &$context) { function validateChildren($tokens_of_children, $config, &$context) {
@ -60,11 +73,11 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$list_of_children .= $token->name . ','; $list_of_children .= $token->name . ',';
} }
} }
$list_of_children = rtrim($list_of_children, ','); // add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay = $okay =
preg_match( preg_match(
'/^'.$this->_pcre_regex.'$/', '/^,?'.$this->_pcre_regex.'$/',
$list_of_children $list_of_children
); );

View File

@ -19,6 +19,49 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
} }
function testNesting() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b,(c|d))+');
$this->assertResult('', false);
$this->assertResult('<a /><b /><c /><a /><b /><d />');
$this->assertResult('<a /><b /><c /><d />', false);
}
function testNestedEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('b,(a|(c|d))+');
$this->assertResult('', false);
$this->assertResult('<b /><a /><c /><d />');
$this->assertResult('<b /><d /><a /><a />');
$this->assertResult('<b /><a />');
$this->assertResult('<acd />', false);
}
function testNestedQuantifier() {
$this->obj = new HTMLPurifier_ChildDef_Custom('(b,c+)*');
$this->assertResult('');
$this->assertResult('<b /><c />');
$this->assertResult('<b /><c /><c /><c />');
$this->assertResult('<b /><c /><b /><c />');
$this->assertResult('<b /><c /><b />', false);
}
function testEitherOr() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a|b');
$this->assertResult('', false);
$this->assertResult('<a />');
$this->assertResult('<b />');
$this->assertResult('<a /><b />', false);
}
function testCommafication() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a,b');
$this->assertResult('<a /><b />');
$this->assertResult('<ab />', false);
}
} }
?> ?>