mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 13:21:51 +00:00
[1.7.0] Fix bug in HTMLPurifier_Harness that causes certain aspects of $input to change after parsing
- Add makeLookup() convenience function to HTMLModule - Relocate SGML exclusion comment - Add preliminary Bdo module test git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1049 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
bda9167423
commit
da92cb9ff4
@ -83,6 +83,14 @@ class HTMLPurifier_ElementDef
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
* @note SGML permits exclusions for all descendants, but this is
|
||||
* not possible with DTDs or XML Schemas. W3C has elected to
|
||||
* use complicated compositions of content_models to simulate
|
||||
* exclusion for children, but we go the simpler, SGML-style
|
||||
* route of flat-out exclusions, which correctly apply to
|
||||
* all descendants and not just children. Note that the XHTML
|
||||
* Modularization Abstract Modules are blithely unaware of such
|
||||
* distinctions.
|
||||
* @public
|
||||
*/
|
||||
var $excludes = array();
|
||||
|
@ -223,6 +223,24 @@ class HTMLPurifier_HTMLModule
|
||||
}
|
||||
$attr[0] = $attr_includes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that generates a lookup table with boolean
|
||||
* true as value.
|
||||
* @param $list List of values to turn into a lookup
|
||||
* @note You can also pass an arbitrary number of arguments in
|
||||
* place of the regular argument
|
||||
* @return Lookup array equivalent of list
|
||||
*/
|
||||
function makeLookup($list) {
|
||||
if (is_string($list)) $list = func_get_args();
|
||||
$ret = array();
|
||||
foreach ($list as $value) {
|
||||
if (is_null($value)) continue;
|
||||
$ret[$value] = true;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@ -10,65 +10,61 @@ require_once 'HTMLPurifier/HTMLModule.php';
|
||||
* - Block Structural (div, p)
|
||||
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
|
||||
* - Inline Structural (br, span)
|
||||
* We have elected not to follow suite, but this may change.
|
||||
* This module, functionally, does not distinguish between these
|
||||
* sub-modules, but the code is internally structured to reflect
|
||||
* these distinctions.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Text';
|
||||
|
||||
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
|
||||
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
|
||||
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
|
||||
'var');
|
||||
|
||||
var $content_sets = array(
|
||||
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
|
||||
'Block' => 'address | blockquote | div | p | pre',
|
||||
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
|
||||
'Flow' => 'Heading | Block | Inline'
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_Text() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
// attributes
|
||||
if ($element == 'br') {
|
||||
$this->info[$element]->attr = array(0 => array('Core'));
|
||||
} elseif ($element == 'blockquote' || $element == 'q') {
|
||||
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
|
||||
} else {
|
||||
$this->info[$element]->attr = array(0 => array('Common'));
|
||||
}
|
||||
// content models
|
||||
if ($element == 'br') {
|
||||
$this->info[$element]->content_model_type = 'empty';
|
||||
} elseif ($element == 'blockquote') {
|
||||
$this->info[$element]->content_model = 'Heading | Block | List';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} elseif ($element == 'div') {
|
||||
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} else {
|
||||
$this->info[$element]->content_model = '#PCDATA | Inline';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
}
|
||||
}
|
||||
// SGML permits exclusions for all descendants, but this is
|
||||
// not possible with DTDs or XML Schemas. W3C has elected to
|
||||
// use complicated compositions of content_models to simulate
|
||||
// exclusion for children, but we go the simpler, SGML-style
|
||||
// route of flat-out exclusions. Note that the Abstract Module
|
||||
// is blithely unaware of such distinctions.
|
||||
$this->info['pre']->excludes = array_flip(array(
|
||||
'img', 'big', 'small',
|
||||
'object', 'applet', 'font', 'basefont' // generally not allowed
|
||||
));
|
||||
$this->info['p']->auto_close = array_flip(array(
|
||||
|
||||
// Inline Phrasal -------------------------------------------------
|
||||
$this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('acronym', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('cite', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('code', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('dfn', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('em', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('kbd', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('q', true, 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
|
||||
$this->addElement('samp', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('strong', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('var', true, 'Inline', 'Inline', 'Common');
|
||||
|
||||
// Inline Structural ----------------------------------------------
|
||||
$this->addElement('span', true, 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('br', true, 'Inline', 'Empty', 'Core');
|
||||
|
||||
// Block Phrasal --------------------------------------------------
|
||||
$this->addElement('address', true, 'Block', 'Inline', 'Common');
|
||||
$this->addElement('blockquote', true, 'Block',
|
||||
'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
|
||||
$pre =& $this->addElement('pre', true, 'Block', 'Inline', 'Common');
|
||||
$pre->excludes = $this->makeLookup(
|
||||
'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
|
||||
$this->addElement('h1', true, 'Heading', 'Inline', 'Common');
|
||||
$this->addElement('h2', true, 'Heading', 'Inline', 'Common');
|
||||
$this->addElement('h3', true, 'Heading', 'Inline', 'Common');
|
||||
$this->addElement('h4', true, 'Heading', 'Inline', 'Common');
|
||||
$this->addElement('h5', true, 'Heading', 'Inline', 'Common');
|
||||
$this->addElement('h6', true, 'Heading', 'Inline', 'Common');
|
||||
|
||||
// Block Structural -----------------------------------------------
|
||||
$p =& $this->addElement('p', true, 'Block', 'Inline', 'Common');
|
||||
// this seems really ad hoc: implementing some general
|
||||
// heuristics would probably be better
|
||||
$p->auto_close = $this->makeLookup(
|
||||
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
||||
'table', 'ul'
|
||||
));
|
||||
'table', 'ul' );
|
||||
$this->addElement('div', true, 'Block', 'Flow', 'Common');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
30
tests/HTMLPurifier/HTMLModule/BdoTest.php
Normal file
30
tests/HTMLPurifier/HTMLModule/BdoTest.php
Normal file
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModuleHarness.php';
|
||||
|
||||
class HTMLPurifier_HTMLModule_BdoTest extends HTMLPurifier_HTMLModuleHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->assertResult(
|
||||
'<span>
|
||||
<bdo
|
||||
id="test-id"
|
||||
class="class-name"
|
||||
style="font-weight:bold;"
|
||||
title="Title of tag"
|
||||
lang="en"
|
||||
xml:lang="en"
|
||||
dir="rtl"
|
||||
>
|
||||
#PCDATA <span>Inline</span>
|
||||
</bdo>
|
||||
</span>', true, array('Attr.EnableID' => true)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
14
tests/HTMLPurifier/HTMLModuleHarness.php
Normal file
14
tests/HTMLPurifier/HTMLModuleHarness.php
Normal file
@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/StrategyHarness.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
|
||||
class HTMLPurifier_HTMLModuleHarness extends HTMLPurifier_StrategyHarness
|
||||
{
|
||||
function setup() {
|
||||
parent::setup();
|
||||
$this->obj = new HTMLPurifier_Strategy_Core();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@ -121,6 +121,30 @@ class HTMLPurifier_HTMLModuleTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function test_makeLookup() {
|
||||
|
||||
$module = new HTMLPurifier_HTMLModule();
|
||||
|
||||
$this->assertIdentical(
|
||||
$module->makeLookup('foo'),
|
||||
array('foo' => true)
|
||||
);
|
||||
$this->assertIdentical(
|
||||
$module->makeLookup(array('foo')),
|
||||
array('foo' => true)
|
||||
);
|
||||
|
||||
$this->assertIdentical(
|
||||
$module->makeLookup('foo', 'two'),
|
||||
array('foo' => true, 'two' => true)
|
||||
);
|
||||
$this->assertIdentical(
|
||||
$module->makeLookup(array('foo', 'two')),
|
||||
array('foo' => true, 'two' => true)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -73,12 +73,17 @@ class HTMLPurifier_Harness extends UnitTestCase
|
||||
$context->loadArray($context_array);
|
||||
|
||||
if ($this->to_tokens && is_string($input)) {
|
||||
$input = $this->lexer->tokenizeHTML($input, $config, $context);
|
||||
// $func may cause $input to change, so "clone" another copy
|
||||
// to sacrifice
|
||||
$input = $this->lexer->tokenizeHTML($s = $input, $config, $context);
|
||||
$input_c = $this->lexer->tokenizeHTML($s, $config, $context);
|
||||
} else {
|
||||
$input_c = $input;
|
||||
}
|
||||
|
||||
// call the function
|
||||
$func = $this->func;
|
||||
$result = $this->obj->$func($input, $config, $context);
|
||||
$result = $this->obj->$func($input_c, $config, $context);
|
||||
|
||||
// test a bool result
|
||||
if (is_bool($result)) {
|
||||
|
@ -62,6 +62,7 @@ $test_files[] = 'EntityParserTest.php';
|
||||
$test_files[] = 'GeneratorTest.php';
|
||||
$test_files[] = 'HTMLModuleManagerTest.php';
|
||||
$test_files[] = 'HTMLModuleTest.php';
|
||||
$test_files[] = 'HTMLModule/BdoTest.php';
|
||||
$test_files[] = 'IDAccumulatorTest.php';
|
||||
$test_files[] = 'LanguageFactoryTest.php';
|
||||
$test_files[] = 'LanguageTest.php';
|
||||
|
Loading…
Reference in New Issue
Block a user