0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 16:31:53 +00:00

- Fixed lots of bugs

- Defined new directive %Core.EscapeInvalidChildren, for previously commented out functionality
- Removed convenience configuration generation: you *have* to pass it unless you're interfacing with HTMLPurifier
- Homogenized function parameters even when only a few of them are used
- Rewrote unit tests that expected previous behavior
- Introduced configuration object to ChildDef tests

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@243 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-08-14 02:46:34 +00:00
parent 0170bb2120
commit 238678871e
11 changed files with 104 additions and 58 deletions

View File

@ -59,7 +59,7 @@ class HTMLPurifier
$generator = new HTMLPurifier_Generator();
return $generator->generateFromTokens(
$strategy->execute(
$lexer->tokenizeHTML($html)
$lexer->tokenizeHTML($html), $config
)
);
}

View File

@ -12,6 +12,13 @@
// we may end up writing custom code for each HTML case
// in order to make it self correcting
HTMLPurifier_ConfigDef::define(
'Core', 'EscapeInvalidChildren', false,
'When true, a child is found that is not allowed in the context of the '.
'parent element will be transformed into text as if it were ASCII. When '.
'false, that element (and all its descendants) will be silently dropped.'
);
class HTMLPurifier_ChildDef
{
var $type;
@ -40,7 +47,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children) {
function validateChildren($tokens_of_children, $config, $context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
@ -85,7 +92,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
}
var $allow_empty = false;
var $type = 'required';
function validateChildren($tokens_of_children) {
function validateChildren($tokens_of_children, $config, $context) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
@ -106,6 +113,9 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
@ -125,21 +135,21 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed) {
//$result[] = new HTMLPurifier_Token_Text(
// $this->gen->generateFromToken($token)
//);
if ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token)
);
}
continue;
}
}
if (!$is_deleting) {
$result[] = $token;
} elseif ($pcdata_allowed) {
//$result[] =
// new HTMLPurifier_Token_Text(
// $this->gen->generateFromToken( $token )
// );
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token )
);
} else {
// drop silently
}
@ -157,8 +167,8 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
var $allow_empty = true;
var $type = 'optional';
function validateChildren($tokens_of_children) {
$result = parent::validateChildren($tokens_of_children);
function validateChildren($tokens_of_children, $config, $context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array();
return $result;
}
@ -170,7 +180,7 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
var $allow_empty = true;
var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {}
function validateChildren() {
function validateChildren($tokens_of_children, $config, $context) {
return false;
}
}
@ -186,14 +196,16 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
$this->block = new HTMLPurifier_ChildDef_Optional($block);
}
function validateChildren($tokens_of_children, $context) {
function validateChildren($tokens_of_children, $config, $context) {
switch ($context) {
case 'unknown':
case 'inline':
$result = $this->inline->validateChildren($tokens_of_children);
$result = $this->inline->validateChildren(
$tokens_of_children, $config, $context);
break;
case 'block':
$result = $this->block->validateChildren($tokens_of_children);
$result = $this->block->validateChildren(
$tokens_of_children, $config, $context);
break;
default:
trigger_error('Invalid context', E_USER_ERROR);

View File

@ -12,8 +12,7 @@ class HTMLPurifier_Strategy_Composite
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
}
function execute($tokens, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
function execute($tokens, $config) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config);
}

View File

@ -40,7 +40,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
$this->definition = HTMLPurifier_Definition::instance();
}
function execute($tokens) {
function execute($tokens, $config) {
//####################################################################//
// Pre-processing
@ -147,7 +147,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
$child_def = $def->child;
// have DTD child def validate children
$result = $child_def->validateChildren($child_tokens, $context);
$result = $child_def->validateChildren(
$child_tokens, $config,$context);
// determine whether or not this element has any exclusions
$excludes = $def->excludes;

View File

@ -15,7 +15,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->definition = HTMLPurifier_Definition::instance();
}
function execute($tokens) {
function execute($tokens, $config) {
$result = array();
$current_nesting = array();
foreach ($tokens as $token) {

View File

@ -24,7 +24,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$this->definition = HTMLPurifier_Definition::instance();
}
function execute($tokens) {
function execute($tokens, $config) {
$result = array();
foreach($tokens as $token) {
if (!empty( $token->is_tag )) {

View File

@ -23,10 +23,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
$this->definition = HTMLPurifier_Definition::instance();
}
function execute($tokens, $config = null) {
// load default configuration object if none passed
if (!$config) $config = HTMLPurifier_Config::createDefault();
function execute($tokens, $config) {
// setup StrategyContext
$context = new HTMLPurifier_AttrContext();

View File

@ -7,6 +7,7 @@ require_once 'HTMLPurifier/Generator.php';
class HTMLPurifier_ChildDefTest extends UnitTestCase
{
var $def;
var $lex;
var $gen;
@ -16,21 +17,24 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
parent::UnitTestCase();
}
function assertSeries($inputs, $expect, $def, $context = array()) {
function assertSeries($inputs, $expect, $config, $context = array()) {
foreach ($inputs as $i => $input) {
$tokens = $this->lex->tokenizeHTML($input);
if (isset($context[$i])) {
$result = $def->validateChildren($tokens, $context[$i]);
} else {
$result = $def->validateChildren($tokens);
if (!isset($context[$i])) {
$context[$i] = null;
}
if (!isset($config[$i])) {
$config[$i] = HTMLPurifier_Config::createDefault();
}
$result = $this->def->validateChildren($tokens, $config[$i], $context[$i]);
if (is_bool($expect[$i])) {
$this->assertIdentical($expect[$i], $result);
$this->assertIdentical($expect[$i], $result, "Test $i: %s");
} else {
$result_html = $this->gen->generateFromTokens($result);
$this->assertEqual($expect[$i], $result_html, "Test $i: %s");
$this->assertIdentical($expect[$i], $result_html, "Test $i: %s");
paintIf($result_html, $result_html != $expect[$i]);
}
}
@ -39,9 +43,11 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
function test_custom() {
// the table definition
$def = new HTMLPurifier_ChildDef_Custom(
$this->def = new HTMLPurifier_ChildDef_Custom(
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
$inputs = $expect = $config = array();
$inputs[0] = '';
$expect[0] = false;
@ -58,7 +64,7 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
$expect[3] = true;
$this->assertSeries($inputs, $expect, $def);
$this->assertSeries($inputs, $expect, $config);
}
@ -82,7 +88,8 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
function test_required_pcdata_forbidden() {
$def = new HTMLPurifier_ChildDef_Required('dt | dd');
$this->def = new HTMLPurifier_ChildDef_Required('dt | dd');
$inputs = $expect = $config = array();
$inputs[0] = '';
$expect[0] = false;
@ -105,21 +112,31 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
$inputs[5] = "\t ";
$expect[5] = false;
$this->assertSeries($inputs, $expect, $def);
$this->assertSeries($inputs, $expect, $config);
}
function test_required_pcdata_allowed() {
$def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$this->def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$inputs = $expect = $config = array();
$inputs[0] = '<b>Bold text</b><img />';
$expect[0] = '<b>Bold text</b>&lt;img /&gt;';
$expect[0] = '<b>Bold text</b>';
$this->assertSeries($inputs, $expect, $def);
// with child escaping on
$inputs[1] = '<b>Bold text</b><img />';
$expect[1] = '<b>Bold text</b>&lt;img /&gt;';
$config[1] = HTMLPurifier_Config::createDefault();
$config[1]->set('Core', 'EscapeInvalidChildren', true);
$this->assertSeries($inputs, $expect, $config);
}
function test_optional() {
$def = new HTMLPurifier_ChildDef_Optional('b | i');
$this->def = new HTMLPurifier_ChildDef_Optional('b | i');
$inputs = $expect = $config = array();
$inputs[0] = '<b>Bold text</b><img />';
$expect[0] = '<b>Bold text</b>';
@ -127,16 +144,19 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
$inputs[1] = 'Not allowed text';
$expect[1] = '';
$this->assertSeries($inputs, $expect, $def);
$this->assertSeries($inputs, $expect, $config);
}
function test_chameleon() {
$def = new HTMLPurifier_ChildDef_Chameleon(
$this->def = new HTMLPurifier_ChildDef_Chameleon(
'b | i', // allowed only when in inline context
'b | i | div' // allowed only when in block context
);
$inputs = $expect = $config = array();
$context = array();
$inputs[0] = '<b>Allowed.</b>';
$expect[0] = true;
$context[0] = 'inline';
@ -149,7 +169,7 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
$expect[2] = true;
$context[2] = 'block';
$this->assertSeries($inputs, $expect, $def, $context);
$this->assertSeries($inputs, $expect, $config, $context);
}

View File

@ -12,6 +12,10 @@ class HTMLPurifier_Strategy_CoreTest
$inputs = array();
$expect = array();
$config = array();
$config_escape = HTMLPurifier_Config::createDefault();
$config_escape->set('Core', 'EscapeInvalidChildren', true);
$inputs[0] = '';
$expect[0] = '';
@ -19,19 +23,17 @@ class HTMLPurifier_Strategy_CoreTest
$inputs[1] = '<b>Make well formed.';
$expect[1] = '<b>Make well formed.</b>';
// behavior may change
$inputs[2] = '<b><div>Fix nesting.</div></b>';
$expect[2] = '<b>&lt;div&gt;Fix nesting.&lt;/div&gt;</b>';
$expect[2] = '<b></b>';
// behavior may change
$inputs[3] = '<asdf>Foreign element removal.</asdf>';
$expect[3] = '&lt;asdf&gt;Foreign element removal.&lt;/asdf&gt;';
// behavior may change
$inputs[4] = '<foo><b><div>All three.</div></b>';
$expect[4] = '&lt;foo&gt;<b>&lt;div&gt;All three.&lt;/div&gt;</b>';
$expect[4] = '&lt;foo&gt;<b></b>';
$this->assertStrategyWorks($strategy, $inputs, $expect);
$this->assertStrategyWorks($strategy, $inputs, $expect, $config);
}
}

View File

@ -13,6 +13,10 @@ class HTMLPurifier_Strategy_FixNestingTest
$inputs = array();
$expect = array();
$config = array();
$config_escape = HTMLPurifier_Config::createDefault();
$config_escape->set('Core', 'EscapeInvalidChildren', true);
// next id = 4
@ -27,7 +31,12 @@ class HTMLPurifier_Strategy_FixNestingTest
// illegal block in inline, element -> text
$inputs[2] = '<b><div>Illegal div.</div></b>';
$expect[2] = '<b>&lt;div&gt;Illegal div.&lt;/div&gt;</b>';
$expect[2] = '<b></b>';
// same test with different configuration (fragile)
$inputs[13] = '<b><div>Illegal div.</div></b>';
$expect[13] = '<b>&lt;div&gt;Illegal div.&lt;/div&gt;</b>';
$config[13] = $config_escape;
// test of empty set that's required, resulting in removal of node
$inputs[3] = '<ul></ul>';
@ -63,13 +72,20 @@ class HTMLPurifier_Strategy_FixNestingTest
// block in inline ins not allowed
$inputs[11] = '<span><ins><div>Not allowed!</div></ins></span>';
$expect[11] = '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>';
$expect[11] = '<span><ins></ins></span>';
// block in inline ins not allowed
$inputs[14] = '<span><ins><div>Not allowed!</div></ins></span>';
$expect[14] = '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>';
$config[14] = $config_escape;
// test exclusions
$inputs[12] = '<a><span><a>Not allowed</a></span></a>';
$expect[12] = '<a><span></span></a>';
$this->assertStrategyWorks($strategy, $inputs, $expect);
// next test is *15*
$this->assertStrategyWorks($strategy, $inputs, $expect, $config);
}
}

View File

@ -28,11 +28,10 @@ class HTMLPurifier_StrategyHarness extends UnitTestCase
function assertStrategyWorks($strategy, $inputs, $expect, $config = array()) {
foreach ($inputs as $i => $input) {
$tokens = $this->lex->tokenizeHTML($input);
if (isset($config[$i])) {
$result_tokens = $strategy->execute($tokens, $config[$i]);
} else {
$result_tokens = $strategy->execute($tokens);
if (!isset($config[$i])) {
$config[$i] = HTMLPurifier_Config::createDefault();
}
$result_tokens = $strategy->execute($tokens, $config[$i]);
$result = $this->gen->generateFromTokens($result_tokens);
$this->assertEqual($expect[$i], $result, "Test $i: %s");
paintIf($result, $result != $expect[$i]);