0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00
- Partially finished migrating to new Context object (done in r485).
- Created HTMLPurifier_Harness to assist with testing, ChildDefTest migrated to that framework.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@484 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-10-01 20:47:07 +00:00
parent 58be73fcf7
commit 8f515b9cda
21 changed files with 261 additions and 203 deletions

View File

@ -10,12 +10,9 @@ Directives are divided into namespaces, indicating the major portion of
functionality they cover (although there may be overlaps. Please consult functionality they cover (although there may be overlaps. Please consult
the documentation in ConfigDef for more information on these namespaces. the documentation in ConfigDef for more information on these namespaces.
Since configuration is dependent on context, most of the internal classes Since configuration is dependant on context, internal classes require a
require a configuration object to be passed as a parameter. However, a few configuration object to be passed as a parameter. (They also require a
make this optional: they will supply a default configuration object if none Context object).
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
and Lexer::tokenizeHTML. However, whenever a valid configuration object
is defined, that object should be used.
In relation to HTMLDefinition and CSSDefinition, there is a special class In relation to HTMLDefinition and CSSDefinition, there is a special class
of directives that influence the *construction* of the Definition object. of directives that influence the *construction* of the Definition object.

View File

@ -44,6 +44,7 @@
// they get included // they get included
require_once 'HTMLPurifier/ConfigSchema.php'; require_once 'HTMLPurifier/ConfigSchema.php';
require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
require_once 'HTMLPurifier/Lexer.php'; require_once 'HTMLPurifier/Lexer.php';
require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Generator.php';
@ -95,16 +96,17 @@ class HTMLPurifier
*/ */
function purify($html, $config = null) { function purify($html, $config = null) {
$config = $config ? $config : $this->config; $config = $config ? $config : $this->config;
$html = $this->encoder->convertToUTF8($html, $config); $context =& new HTMLPurifier_Context();
$html = $this->encoder->convertToUTF8($html, $config, $context);
$html = $html =
$this->generator->generateFromTokens( $this->generator->generateFromTokens(
$this->strategy->execute( $this->strategy->execute(
$this->lexer->tokenizeHTML($html, $config), $this->lexer->tokenizeHTML($html, $config, $context),
$config $config, $context
), ),
$config $config, $context
); );
$html = $this->encoder->convertFromUTF8($html, $config); $html = $this->encoder->convertFromUTF8($html, $config, $context);
return $html; return $html;
} }

View File

@ -38,15 +38,14 @@ class HTMLPurifier_ChildDef
/** /**
* Validates nodes according to definition and returns modification. * Validates nodes according to definition and returns modification.
* *
* @warning $context is NOT HTMLPurifier_AttrContext
* @param $tokens_of_children Array of HTMLPurifier_Token * @param $tokens_of_children Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object * @param $config HTMLPurifier_Config object
* @param $context String context indicating inline, block or unknown * @param $context HTMLPurifier_Context object
* @return bool true to leave nodes as is * @return bool true to leave nodes as is
* @return bool false to remove parent node * @return bool false to remove parent node
* @return array of replacement child tokens * @return array of replacement child tokens
*/ */
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
trigger_error('Call to abstract function', E_USER_ERROR); trigger_error('Call to abstract function', E_USER_ERROR);
} }
} }
@ -91,7 +90,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg; $this->_pcre_regex = $reg;
} }
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
$list_of_children = ''; $list_of_children = '';
$nesting = 0; // depth into the nest $nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) { foreach ($tokens_of_children as $token) {
@ -145,7 +144,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
} }
var $allow_empty = false; var $allow_empty = false;
var $type = 'required'; var $type = 'required';
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
// if there are no tokens, delete parent node // if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false; if (empty($tokens_of_children)) return false;
@ -227,7 +226,7 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{ {
var $allow_empty = true; var $allow_empty = true;
var $type = 'optional'; var $type = 'optional';
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context); $result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array(); if ($result === false) return array();
return $result; return $result;
@ -246,7 +245,7 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
var $allow_empty = true; var $allow_empty = true;
var $type = 'empty'; var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {} function HTMLPurifier_ChildDef_Empty() {}
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
return array(); return array();
} }
} }
@ -281,8 +280,9 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
$this->block = new HTMLPurifier_ChildDef_Optional($block); $this->block = new HTMLPurifier_ChildDef_Optional($block);
} }
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
switch ($context) { $parent_type = $context->get('ParentType');
switch ($parent_type) {
case 'unknown': case 'unknown':
case 'inline': case 'inline':
$result = $this->inline->validateChildren( $result = $this->inline->validateChildren(
@ -308,7 +308,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
var $allow_empty = false; var $allow_empty = false;
var $type = 'table'; var $type = 'table';
function HTMLPurifier_ChildDef_Table() {} function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, $context) { function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false; if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing // this ensures that the loop gets run one last time before closing

View File

@ -266,7 +266,7 @@ class HTMLPurifier_Encoder
/** /**
* Converts a string to UTF-8 based on configuration. * Converts a string to UTF-8 based on configuration.
*/ */
function convertToUTF8($str, $config) { function convertToUTF8($str, $config, &$context) {
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding'); $encoding = $config->get('Core', 'Encoding');
@ -283,7 +283,7 @@ class HTMLPurifier_Encoder
* @note Currently, this is a lossy conversion, with unexpressable * @note Currently, this is a lossy conversion, with unexpressable
* characters being omitted. * characters being omitted.
*/ */
function convertFromUTF8($str, $config) { function convertFromUTF8($str, $config, &$context) {
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding'); $encoding = $config->get('Core', 'Encoding');

View File

@ -122,7 +122,7 @@ class HTMLPurifier_Lexer
* @param $string String HTML. * @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML. * @return HTMLPurifier_Token array representation of HTML.
*/ */
function tokenizeHTML($string, $config = null) { function tokenizeHTML($string, $config, &$context) {
trigger_error('Call to abstract class', E_USER_ERROR); trigger_error('Call to abstract class', E_USER_ERROR);
} }
@ -196,7 +196,7 @@ class HTMLPurifier_Lexer
* Takes a piece of HTML and normalizes it by converting entities, fixing * Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff. * encoding, extracting bits, and other good stuff.
*/ */
function normalize($html, $config) { function normalize($html, $config, &$context) {
// extract body from document if applicable // extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) { if ($config->get('Core', 'AcceptFullDocuments')) {

View File

@ -38,10 +38,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$this->factory = new HTMLPurifier_TokenFactory(); $this->factory = new HTMLPurifier_TokenFactory();
} }
public function tokenizeHTML($string, $config = null) { public function tokenizeHTML($string, $config, &$context) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$string = $this->normalize($string, $config); $string = $this->normalize($string, $config, $context);
// preprocess string, essential for UTF-8 // preprocess string, essential for UTF-8
$string = $string =

View File

@ -24,11 +24,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
*/ */
var $_whitespace = "\x20\x09\x0D\x0A"; var $_whitespace = "\x20\x09\x0D\x0A";
function tokenizeHTML($html, $config = null) { function tokenizeHTML($html, $config, &$context) {
if (!$config) $config = HTMLPurifier_Config::createDefault(); $html = $this->normalize($html, $config, $context);
$html = $this->normalize($html, $config);
$cursor = 0; // our location in the text $cursor = 0; // our location in the text
$inside_tag = false; // whether or not we're parsing the inside of a tag $inside_tag = false; // whether or not we're parsing the inside of a tag
@ -147,6 +145,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($attribute_string) { if ($attribute_string) {
$attributes = $this->parseAttributeString( $attributes = $this->parseAttributeString(
$attribute_string $attribute_string
, $config, $context
); );
} else { } else {
$attributes = array(); $attributes = array();
@ -181,7 +180,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
* @param $string Inside of tag excluding name. * @param $string Inside of tag excluding name.
* @returns Assoc array of attributes. * @returns Assoc array of attributes.
*/ */
function parseAttributeString($string) { function parseAttributeString($string, $config, &$context) {
$string = (string) $string; // quick typecast $string = (string) $string; // quick typecast
if ($string == '') return array(); // no attributes if ($string == '') return array(); // no attributes

View File

@ -31,12 +31,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/ */
var $tokens = array(); var $tokens = array();
function tokenizeHTML($string, $config = null) { function tokenizeHTML($string, $config, &$context) {
$this->tokens = array(); $this->tokens = array();
if (!$config) $config = HTMLPurifier_Config::createDefault(); $string = $this->normalize($string, $config, $context);
$string = $this->normalize($string, $config);
$parser=& new XML_HTMLSax3(); $parser=& new XML_HTMLSax3();
$parser->set_object($this); $parser->set_object($this);

View File

@ -24,7 +24,7 @@ class HTMLPurifier_Strategy
* @param $config Configuration options * @param $config Configuration options
* @returns Processed array of token objects. * @returns Processed array of token objects.
*/ */
function execute($tokens, $config = null) { function execute($tokens, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR); trigger_error('Cannot call abstract function', E_USER_ERROR);
} }

View File

@ -18,9 +18,9 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR); trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
} }
function execute($tokens, $config) { function execute($tokens, $config, &$context) {
foreach ($this->strategies as $strategy) { foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config); $tokens = $strategy->execute($tokens, $config, $context);
} }
return $tokens; return $tokens;
} }

View File

@ -34,8 +34,7 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
{ {
function execute($tokens, $config) { function execute($tokens, $config, &$context) {
//####################################################################// //####################################################################//
// Pre-processing // Pre-processing
@ -49,6 +48,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
$tokens[] = new HTMLPurifier_Token_End($parent_name); $tokens[] = new HTMLPurifier_Token_End($parent_name);
// setup the context variables
$parent_type = 'unknown'; // reference var that we alter
$context->register('ParentType', $parent_type);
//####################################################################// //####################################################################//
// Loop initialization // Loop initialization
@ -109,10 +112,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// calculate context // calculate context
if (isset($parent_def)) { if (isset($parent_def)) {
$context = $parent_def->type; $parent_type = $parent_def->type;
} else { } else {
// generally found in specialized elements like UL // generally found in specialized elements like UL
$context = 'unknown'; $parent_type = 'unknown';
} }
//################################################################// //################################################################//
@ -145,7 +148,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// have DTD child def validate children // have DTD child def validate children
$result = $child_def->validateChildren( $result = $child_def->validateChildren(
$child_tokens, $config,$context); $child_tokens, $config, $context);
// determine whether or not this element has any exclusions // determine whether or not this element has any exclusions
$excludes = $def->excludes; $excludes = $def->excludes;
@ -247,6 +250,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_shift($tokens); array_shift($tokens);
array_pop($tokens); array_pop($tokens);
// remove context variables
$context->destroy('ParentType');
//####################################################################// //####################################################################//
// Return // Return

View File

@ -10,7 +10,7 @@ require_once 'HTMLPurifier/Generator.php';
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{ {
function execute($tokens, $config) { function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator(); $generator = new HTMLPurifier_Generator();
$result = array(); $result = array();
@ -86,7 +86,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if (empty($current_nesting)) { if (empty($current_nesting)) {
if ($escape_invalid_tags) { if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text( $result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config) $generator->generateFromToken($token, $config, $context)
); );
} }
continue; continue;
@ -123,7 +123,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($skipped_tags === false) { if ($skipped_tags === false) {
if ($escape_invalid_tags) { if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text( $result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config) $generator->generateFromToken($token, $config, $context)
); );
} }
continue; continue;

View File

@ -16,7 +16,7 @@ require_once 'HTMLPurifier/TagTransform.php';
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{ {
function execute($tokens, $config) { function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator(); $generator = new HTMLPurifier_Generator();
$result = array(); $result = array();
@ -37,7 +37,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} elseif ($escape_invalid_tags) { } elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in // invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text( $token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config) $generator->generateFromToken($token, $config, $context)
); );
} else { } else {
continue; continue;

View File

@ -17,18 +17,18 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
{ {
function execute($tokens, $config) { function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
// setup StrategyContext // setup StrategyContext
$context = new HTMLPurifier_AttrContext(); $attr_context = new HTMLPurifier_AttrContext();
// setup ID accumulator and load it with blacklisted IDs // setup ID accumulator and load it with blacklisted IDs
// eventually, we'll have a dedicated context object to hold // eventually, we'll have a dedicated context object to hold
// all these accumulators and caches. For now, just an IDAccumulator // all these accumulators and caches. For now, just an IDAccumulator
$context->id_accumulator = new HTMLPurifier_IDAccumulator(); $attr_context->id_accumulator = new HTMLPurifier_IDAccumulator();
$context->id_accumulator->load($config->get('Attr', 'IDBlacklist')); $attr_context->id_accumulator->load($config->get('Attr', 'IDBlacklist'));
// create alias to global definition array, see also $defs // create alias to global definition array, see also $defs
// DEFINITION CALL // DEFINITION CALL
@ -81,14 +81,14 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
} else { } else {
// validate according to the element's definition // validate according to the element's definition
$result = $defs[$attr_key]->validate( $result = $defs[$attr_key]->validate(
$value, $config, $context $value, $config, $attr_context
); );
} }
} elseif ( isset($d_defs[$attr_key]) ) { } elseif ( isset($d_defs[$attr_key]) ) {
// there is a global definition defined, validate according // there is a global definition defined, validate according
// to the global definition // to the global definition
$result = $d_defs[$attr_key]->validate( $result = $d_defs[$attr_key]->validate(
$value, $config, $context $value, $config, $attr_context
); );
} else { } else {
// system never heard of the attribute? DELETE! // system never heard of the attribute? DELETE!

View File

@ -1,122 +1,78 @@
<?php <?php
require_once 'HTMLPurifier/Harness.php';
require_once 'HTMLPurifier/ChildDef.php'; require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php';
require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Generator.php';
class HTMLPurifier_ChildDefTest extends UnitTestCase class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
{ {
var $def; function setUp() {
var $lex; $this->obj = null;
var $gen; $this->func = 'validateChildren';
$this->to_tokens = true;
function HTMLPurifier_ChildDefTest() { $this->to_html = true;
// it is vital that the tags be treated as literally as possible
$this->lex = new HTMLPurifier_Lexer_DirectLex();
$this->gen = new HTMLPurifier_Generator();
parent::UnitTestCase();
}
function assertSeries($inputs, $expect, $config, $context = array()) {
foreach ($inputs as $i => $input) {
if (!isset($context[$i])) {
$context[$i] = null;
}
if (!isset($config[$i])) {
$config[$i] = HTMLPurifier_Config::createDefault();
}
$tokens = $this->lex->tokenizeHTML($input, $config[$i]);
$result = $this->def->validateChildren($tokens, $config[$i], $context[$i]);
if (is_bool($expect[$i])) {
$this->assertIdentical($expect[$i], $result, "Test $i: %s");
} else {
$result_html = $this->gen->generateFromTokens($result, $config[$i]);
$this->assertIdentical($expect[$i], $result_html, "Test $i: %s");
paintIf($result_html, $result_html != $expect[$i]);
}
}
} }
function test_custom() { function test_custom() {
$this->def = new HTMLPurifier_ChildDef_Custom( $this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
'(a, b?, c*, d+, (a, b)*)');
$inputs = array(); $this->assertResult('', false);
$expect = array(); $this->assertResult('<a /><a />', false);
$config = array();
$inputs[0] = ''; $this->assertResult('<a /><b /><c /><d /><a /><b />');
$expect[0] = false; $this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
'<a href="moo" /><b>foo</b>');
$inputs[1] = '<a /><b /><c /><d /><a /><b />';
$expect[1] = true;
$inputs[2] = '<a /><d>Dob</d><a /><b>foo</b><a href="moo" /><b>foo</b>';
$expect[2] = true;
$inputs[3] = '<a /><a />';
$expect[3] = false;
$this->assertSeries($inputs, $expect, $config);
} }
function test_table() { function test_table() {
// currently inactive, awaiting augmentation
// the table definition // the table definition
$this->def = new HTMLPurifier_ChildDef_Table(); $this->obj = new HTMLPurifier_ChildDef_Table();
$inputs = $expect = $config = array(); $inputs = $expect = $config = array();
$inputs[0] = ''; $this->assertResult('', false);
$expect[0] = false;
// we're using empty tags to compact the tests: under real circumstances // we're using empty tags to compact the tests: under real circumstances
// there would be contents in them // there would be contents in them
$inputs[1] = '<tr />'; $this->assertResult('<tr />');
$expect[1] = true; $this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
'<tr><td>asdf</td></tr></tbody>');
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'. $this->assertResult('<col /><col /><col /><tr />');
'<tr><td>asdf</td></tr></tbody>';
$expect[2] = true;
$inputs[3] = '<col /><col /><col /><tr />';
$expect[3] = true;
// mixed up order // mixed up order
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />'; $this->assertResult(
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />'; '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
// duplicates of singles // duplicates of singles
// - first caption serves // - first caption serves
// - trailing tfoots/theads get turned into tbodys // - trailing tfoots/theads get turned into tbodys
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />'; $this->assertResult(
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'; '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
);
// errant text dropped (until bubbling is implemented) // errant text dropped (until bubbling is implemented)
$inputs[6] = 'foo'; $this->assertResult('foo', false);
$expect[6] = false;
// whitespace sticks to the previous element, last whitespace is // whitespace sticks to the previous element, last whitespace is
// stationary // stationary
$inputs[7] = "\n <tr />\n <tr />\n "; $this->assertResult("\n <tr />\n <tr />\n ");
$expect[7] = true; $this->assertResult(
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
$inputs[8] = "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t"; "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
$expect[8] = "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"; );
$this->assertSeries($inputs, $expect, $config);
} }
function test_parsing() { function testParsing() {
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo'); $def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
$this->assertEqual($def->elements, $this->assertEqual($def->elements,
@ -132,92 +88,78 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
'href' => true 'href' => true
,'src' => true ,'src' => true
)); ));
} }
function test_required_pcdata_forbidden() { function test_required_pcdata_forbidden() {
$this->def = new HTMLPurifier_ChildDef_Required('dt | dd'); $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
$inputs = $expect = $config = array();
$inputs[0] = ''; $this->assertResult('', false);
$expect[0] = false; $this->assertResult(
'<dt>Term</dt>Text in an illegal location'.
$inputs[1] = '<dt>Term</dt>Text in an illegal location'. '<dd>Definition</dd><b>Illegal tag</b>',
'<dd>Definition</dd><b>Illegal tag</b>'; '<dt>Term</dt><dd>Definition</dd>');
$this->assertResult('How do you do!', false);
$expect[1] = '<dt>Term</dt><dd>Definition</dd>';
$inputs[2] = 'How do you do!';
$expect[2] = false;
// whitespace shouldn't trigger it // whitespace shouldn't trigger it
$inputs[3] = "\n<dd>Definition</dd> "; $this->assertResult("\n<dd>Definition</dd> ");
$expect[3] = true;
$inputs[4] ='<dd>Definition</dd> <b></b> '; $this->assertResult(
$expect[4] = '<dd>Definition</dd> '; '<dd>Definition</dd> <b></b> ',
'<dd>Definition</dd> '
$inputs[5] = "\t "; );
$expect[5] = false; $this->assertResult("\t ", false);
$this->assertSeries($inputs, $expect, $config);
} }
function test_required_pcdata_allowed() { function test_required_pcdata_allowed() {
$this->def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$inputs = $expect = $config = array(); $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
$inputs[0] = '<b>Bold text</b><img />'; $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
$expect[0] = '<b>Bold text</b>';
// with child escaping on // with child escaping on
$inputs[1] = '<b>Bold text</b><img />'; $this->assertResult(
$expect[1] = '<b>Bold text</b>&lt;img /&gt;'; '<b>Bold text</b><img />',
$config[1] = HTMLPurifier_Config::createDefault(); '<b>Bold text</b>&lt;img /&gt;',
$config[1]->set('Core', 'EscapeInvalidChildren', true); array(
'Core.EscapeInvalidChildren' => true
)
);
$this->assertSeries($inputs, $expect, $config);
} }
function test_optional() { function test_optional() {
$this->def = new HTMLPurifier_ChildDef_Optional('b | i');
$inputs = $expect = $config = array(); $this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
$inputs[0] = '<b>Bold text</b><img />'; $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
$expect[0] = '<b>Bold text</b>'; $this->assertResult('Not allowed text', '');
$inputs[1] = 'Not allowed text';
$expect[1] = '';
$this->assertSeries($inputs, $expect, $config);
} }
function test_chameleon() { function test_chameleon() {
$this->def = new HTMLPurifier_ChildDef_Chameleon( $this->obj = new HTMLPurifier_ChildDef_Chameleon(
'b | i', // allowed only when in inline context 'b | i', // allowed only when in inline context
'b | i | div' // allowed only when in block context 'b | i | div' // allowed only when in block context
); );
$inputs = $expect = $config = array(); $this->assertResult(
$context = array(); '<b>Allowed.</b>', true,
array(), array('ParentType' => 'inline')
);
$inputs[0] = '<b>Allowed.</b>'; $this->assertResult(
$expect[0] = true; '<div>Not allowed.</div>', '',
$context[0] = 'inline'; array(), array('ParentType' => 'inline')
);
$inputs[1] = '<div>Not allowed.</div>'; $this->assertResult(
$expect[1] = ''; '<div>Allowed.</div>', true,
$context[1] = 'inline'; array(), array('ParentType' => 'block')
);
$inputs[2] = '<div>Allowed.</div>';
$expect[2] = true;
$context[2] = 'block';
$this->assertSeries($inputs, $expect, $config, $context);
} }

View File

@ -31,10 +31,11 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
function test_convertToUTF8() { function test_convertToUTF8() {
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
// UTF-8 means that we don't touch it // UTF-8 means that we don't touch it
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertToUTF8("\xF6", $config), $this->Encoder->convertToUTF8("\xF6", $config, $context),
"\xF6" // this is invalid "\xF6" // this is invalid
); );
$this->assertNoErrors(); $this->assertNoErrors();
@ -43,14 +44,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
// Now it gets converted // Now it gets converted
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertToUTF8("\xF6", $config), $this->Encoder->convertToUTF8("\xF6", $config, $context),
"\xC3\xB6" "\xC3\xB6"
); );
$config->set('Test', 'ForceNoIconv', true); $config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertToUTF8("\xF6", $config), $this->Encoder->convertToUTF8("\xF6", $config, $context),
"\xC3\xB6" "\xC3\xB6"
); );
@ -58,10 +59,11 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
function test_convertFromUTF8() { function test_convertFromUTF8() {
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
// UTF-8 means that we don't touch it // UTF-8 means that we don't touch it
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertFromUTF8("\xC3\xB6", $config), $this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
"\xC3\xB6" "\xC3\xB6"
); );
@ -69,14 +71,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
// Now it gets converted // Now it gets converted
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertFromUTF8("\xC3\xB6", $config), $this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
"\xF6" "\xF6"
); );
$config->set('Test', 'ForceNoIconv', true); $config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical( $this->assertIdentical(
$this->Encoder->convertFromUTF8("\xC3\xB6", $config), $this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
"\xF6" "\xF6"
); );

View File

@ -0,0 +1,107 @@
<?php
/**
* General-purpose test-harness that makes testing functions that require
* configuration and context objects easier when those two parameters are
* meaningless. See HTMLPurifier_ChildDefTest for a good example of usage.
*/
class HTMLPurifier_Harness extends UnitTestCase
{
/**
* Instance of the object that will execute the method
*/
var $obj;
/**
* Name of the function to be executed
*/
var $func;
/**
* Whether or not the method deals in tokens. If set to true, assertResult()
* will transparently convert HTML to and back from tokens.
*/
var $to_tokens = false;
/**
* Whether or not to convert tokens back into HTML before performing
* equality check, has no effect on bools.
*/
var $to_html = false;
/**
* Instance of an HTMLPurifier_Lexer implementation.
*/
var $lexer;
/**
* Instance of HTMLPurifier_Generator
*/
var $generator;
function HTMLPurifier_Harness() {
$this->lexer = new HTMLPurifier_Lexer_DirectLex();
$this->generator = new HTMLPurifier_Generator();
parent::UnitTestCase();
}
/**
* Asserts a specific result from a one parameter + config/context function
* @param $input Input parameter
* @param $expect Expectation
* @param $config_array Configuration array in form of
* Namespace.Directive => Value or an actual config
* object.
* @param $context_array Context array in form of Key => Value or an actual
* context object.
*/
function assertResult($input, $expect = true,
$config_array = array(), $context_array = array()
) {
// setup config object
$config = HTMLPurifier_Config::createDefault();
foreach ($config_array as $key => $value) {
list($namespace, $directive) = explode('.', $key);
$config->set($namespace, $directive, $value);
}
// setup context object
$context = new HTMLPurifier_Context();
foreach ($context_array as $key => $value) {
$context->register($key, $value);
}
if ($this->to_tokens && is_string($input)) {
$input = $this->lexer->tokenizeHTML($input, $config, $context);
}
// call the function
$func = $this->func;
$result = $this->obj->$func($input, $config, $context);
// test a bool result
if (is_bool($result)) {
$this->assertIdentical($expect, $result);
return;
} elseif (is_bool($expect)) {
$expect = $input;
}
if ($this->to_html) {
$result = $this->generator->
generateFromTokens($result, $config, $context);
if (is_array($expect)) {
$expect = $this->generator->
generateFromTokens($expect, $config, $context);
}
}
$this->assertEqual($expect, $result);
}
}
?>

View File

@ -53,9 +53,11 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
$input[10] = 'name="input" selected'; $input[10] = 'name="input" selected';
$expect[10] = array('name' => 'input', 'selected' => 'selected'); $expect[10] = array('name' => 'input', 'selected' => 'selected');
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$size = count($input); $size = count($input);
for($i = 0; $i < $size; $i++) { for($i = 0; $i < $size; $i++) {
$result = $this->DirectLex->parseAttributeString($input[$i]); $result = $this->DirectLex->parseAttributeString($input[$i], $config, $context);
$this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s'); $this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s');
paintIf($result, $expect[$i] != $result); paintIf($result, $expect[$i] != $result);
} }

View File

@ -279,16 +279,17 @@ class HTMLPurifier_LexerTest extends UnitTestCase
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) ); $expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
$default_config = HTMLPurifier_Config::createDefault(); $default_config = HTMLPurifier_Config::createDefault();
$default_context = new HTMLPurifier_Context();
foreach($input as $i => $discard) { foreach($input as $i => $discard) {
if (!isset($config[$i])) $config[$i] = $default_config; if (!isset($config[$i])) $config[$i] = $default_config;
$result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i]); $result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i], $default_context);
$this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s'); $this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s');
paintIf($result, $expect[$i] != $result); paintIf($result, $expect[$i] != $result);
if ($this->_has_pear) { if ($this->_has_pear) {
// assert unless I say otherwise // assert unless I say otherwise
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i]); $sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i], $default_context);
if (!isset($sax_expect[$i])) { if (!isset($sax_expect[$i])) {
// by default, assert with normal result // by default, assert with normal result
$this->assertEqual($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s'); $this->assertEqual($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s');
@ -304,7 +305,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
} }
if ($this->_has_dom) { if ($this->_has_dom) {
$dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i]); $dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i], $default_context);
// same structure as SAX // same structure as SAX
if (!isset($dom_expect[$i])) { if (!isset($dom_expect[$i])) {
$this->assertEqual($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s'); $this->assertEqual($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s');

View File

@ -28,6 +28,7 @@ class HTMLPurifier_Strategy_CompositeTest extends UnitTestCase
$mock_1 = new HTMLPurifier_StrategyMock($this); $mock_1 = new HTMLPurifier_StrategyMock($this);
$mock_2 = new HTMLPurifier_StrategyMock($this); $mock_2 = new HTMLPurifier_StrategyMock($this);
$mock_3 = new HTMLPurifier_StrategyMock($this); $mock_3 = new HTMLPurifier_StrategyMock($this);
$context = new HTMLPurifier_Context();
// setup the object // setup the object
@ -43,9 +44,9 @@ class HTMLPurifier_Strategy_CompositeTest extends UnitTestCase
$config = new HTMLPurifier_ConfigMock(); $config = new HTMLPurifier_ConfigMock();
$params_1 = array($input_1, $config); $params_1 = array($input_1, $config, $context);
$params_2 = array($input_2, $config); $params_2 = array($input_2, $config, $context);
$params_3 = array($input_3, $config); $params_3 = array($input_3, $config, $context);
$mock_1->expectOnce('execute', $params_1); $mock_1->expectOnce('execute', $params_1);
$mock_1->setReturnValue('execute', $input_2, $params_1); $mock_1->setReturnValue('execute', $input_2, $params_1);
@ -58,7 +59,7 @@ class HTMLPurifier_Strategy_CompositeTest extends UnitTestCase
// perform test // perform test
$output = $composite->execute($input_1, $config); $output = $composite->execute($input_1, $config, $context);
$this->assertIdentical($input_4, $output); $this->assertIdentical($input_4, $output);
// tally the calls // tally the calls

View File

@ -25,12 +25,13 @@ class HTMLPurifier_StrategyHarness extends UnitTestCase
} }
function assertStrategyWorks($strategy, $inputs, $expect, $config = array()) { function assertStrategyWorks($strategy, $inputs, $expect, $config = array()) {
$context = new HTMLPurifier_Context();
foreach ($inputs as $i => $input) { foreach ($inputs as $i => $input) {
$tokens = $this->lex->tokenizeHTML($input);
if (!isset($config[$i])) { if (!isset($config[$i])) {
$config[$i] = HTMLPurifier_Config::createDefault(); $config[$i] = HTMLPurifier_Config::createDefault();
} }
$result_tokens = $strategy->execute($tokens, $config[$i]); $tokens = $this->lex->tokenizeHTML($input, $config[$i], $context);
$result_tokens = $strategy->execute($tokens, $config[$i], $context);
$result = $this->gen->generateFromTokens($result_tokens, $config[$i]); $result = $this->gen->generateFromTokens($result_tokens, $config[$i]);
$this->assertEqual($expect[$i], $result, "Test $i: %s"); $this->assertEqual($expect[$i], $result, "Test $i: %s");
paintIf($result, $result != $expect[$i]); paintIf($result, $result != $expect[$i]);