mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-05 06:01:52 +00:00
Finish HTMLDTD_ChildDef_Required.
Fix bug in HTML_Generator that resulted in attribute-less empty elements to have extra spaces in them. Add whitespace designation to MF_Text. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@44 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
20c53d6017
commit
83f735ea7e
@ -21,7 +21,7 @@ class HTML_Generator
|
|||||||
|
|
||||||
} elseif (is_a($token, 'MF_EmptyTag')) {
|
} elseif (is_a($token, 'MF_EmptyTag')) {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ' ' . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||||
|
|
||||||
} elseif (is_a($token, 'MF_Text')) {
|
} elseif (is_a($token, 'MF_Text')) {
|
||||||
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
||||||
|
@ -31,9 +31,10 @@ class MF_Text extends MF
|
|||||||
{
|
{
|
||||||
var $name = '#PCDATA';
|
var $name = '#PCDATA';
|
||||||
var $data;
|
var $data;
|
||||||
|
var $is_whitespace = false;
|
||||||
function MF_Text($data) {
|
function MF_Text($data) {
|
||||||
$this->data = trim($data); // fairly certain trimming it's okay
|
$this->data = $data;
|
||||||
// but it's not default SAX behavior
|
if (trim($data, " \n\r\t") === '') $this->is_whitespace = true;
|
||||||
}
|
}
|
||||||
function append($mf_text) {
|
function append($mf_text) {
|
||||||
return new MF_Text($this->data . $mf_text->data);
|
return new MF_Text($this->data . $mf_text->data);
|
||||||
|
@ -342,6 +342,10 @@ class HTMLDTD_Element
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HTMLDTD_ChildDef and inheritance have three types of output:
|
||||||
|
// true = leave nodes as is
|
||||||
|
// false = delete parent node and all children
|
||||||
|
// array(...) = replace children nodes with these
|
||||||
class HTMLDTD_ChildDef
|
class HTMLDTD_ChildDef
|
||||||
{
|
{
|
||||||
var $dtd_regex;
|
var $dtd_regex;
|
||||||
@ -354,13 +358,76 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
|
|||||||
{
|
{
|
||||||
var $elements = array();
|
var $elements = array();
|
||||||
function HTMLDTD_ChildDef_Simple($elements) {
|
function HTMLDTD_ChildDef_Simple($elements) {
|
||||||
|
if (is_string($elements)) {
|
||||||
|
$elements = str_replace(' ', '', $elements);
|
||||||
|
$elements = explode('|', $elements);
|
||||||
|
}
|
||||||
|
$elements = array_flip($elements);
|
||||||
|
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||||
$this->elements = $elements;
|
$this->elements = $elements;
|
||||||
|
$this->gen = new HTML_Generator();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
|
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
|
||||||
{
|
{
|
||||||
function validateChildren($tokens_of_children) {
|
function validateChildren($tokens_of_children) {
|
||||||
|
// if there are no tokens, delete parent node
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// the new set of children
|
||||||
|
$result = array();
|
||||||
|
|
||||||
|
// current depth into the nest
|
||||||
|
$nesting = 0;
|
||||||
|
|
||||||
|
// whether or not we're deleting a node
|
||||||
|
$is_deleting = false;
|
||||||
|
|
||||||
|
// whether or not parsed character data is allowed
|
||||||
|
// this controls whether or not we silently drop a tag
|
||||||
|
// or generate escaped HTML from it
|
||||||
|
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||||
|
|
||||||
|
// a little sanity check to make sure it's not ALL whitespace
|
||||||
|
$all_whitespace = true;
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
if (!empty($token->is_whitespace)) {
|
||||||
|
$result[] = $token;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||||
|
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if (is_a($token, 'MF_StartTag')) {
|
||||||
|
$nesting++;
|
||||||
|
} elseif (is_a($token, 'MF_EndTag')) {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
$is_deleting = false;
|
||||||
|
if (!isset($this->elements[$token->name])) {
|
||||||
|
$is_deleting = true;
|
||||||
|
if ($pcdata_allowed) {
|
||||||
|
$result[] = new MF_Text($this->gen->generateFromToken($token));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!$is_deleting) {
|
||||||
|
$result[] = $token;
|
||||||
|
} elseif ($pcdata_allowed) {
|
||||||
|
$result[] = new MF_Text($this->gen->generateFromToken($token));
|
||||||
|
} else {
|
||||||
|
// drop silently
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (empty($result)) return false;
|
||||||
|
if ($all_whitespace) return false;
|
||||||
|
if ($tokens_of_children == $result) return true;
|
||||||
|
return $result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple
|
class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple
|
||||||
|
@ -30,6 +30,9 @@ class Test_HTML_Generator extends UnitTestCase
|
|||||||
$inputs[4] = new MF_StartTag('asdf');
|
$inputs[4] = new MF_StartTag('asdf');
|
||||||
$expect[4] = '<asdf>';
|
$expect[4] = '<asdf>';
|
||||||
|
|
||||||
|
$inputs[5] = new MF_EmptyTag('br');
|
||||||
|
$expect[5] = '<br />';
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->gen->generateFromToken($input);
|
$result = $this->gen->generateFromToken($input);
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
|
@ -154,20 +154,26 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
|||||||
|
|
||||||
$inputs[9] = array(
|
$inputs[9] = array(
|
||||||
new MF_StartTag('ol')
|
new MF_StartTag('ol')
|
||||||
|
|
||||||
,new MF_StartTag('li')
|
,new MF_StartTag('li')
|
||||||
,new MF_Text('Item 1')
|
,new MF_Text('Item 1')
|
||||||
|
|
||||||
,new MF_StartTag('li')
|
,new MF_StartTag('li')
|
||||||
,new MF_Text('Item 2')
|
,new MF_Text('Item 2')
|
||||||
|
|
||||||
,new MF_EndTag('ol')
|
,new MF_EndTag('ol')
|
||||||
);
|
);
|
||||||
$expect[9] = array(
|
$expect[9] = array(
|
||||||
new MF_StartTag('ol')
|
new MF_StartTag('ol')
|
||||||
|
|
||||||
,new MF_StartTag('li')
|
,new MF_StartTag('li')
|
||||||
,new MF_Text('Item 1')
|
,new MF_Text('Item 1')
|
||||||
,new MF_EndTag('li')
|
,new MF_EndTag('li')
|
||||||
|
|
||||||
,new MF_StartTag('li')
|
,new MF_StartTag('li')
|
||||||
,new MF_Text('Item 2')
|
,new MF_Text('Item 2')
|
||||||
,new MF_EndTag('li')
|
,new MF_EndTag('li')
|
||||||
|
|
||||||
,new MF_EndTag('ol')
|
,new MF_EndTag('ol')
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -181,4 +187,122 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Test_HTMLDTD_ChildDef extends UnitTestCase
|
||||||
|
{
|
||||||
|
|
||||||
|
function test_simple() {
|
||||||
|
|
||||||
|
$def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo');
|
||||||
|
$this->assertEqual($def->elements,
|
||||||
|
array(
|
||||||
|
'foobar' => true
|
||||||
|
,'bang' => true
|
||||||
|
,'gizmo' => true
|
||||||
|
));
|
||||||
|
|
||||||
|
$def = new HTMLDTD_ChildDef_Simple(array('href', 'src'));
|
||||||
|
$this->assertEqual($def->elements,
|
||||||
|
array(
|
||||||
|
'href' => true
|
||||||
|
,'src' => true
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_required_pcdata_forbidden() {
|
||||||
|
|
||||||
|
$def = new HTMLDTD_ChildDef_Required('dt | dd');
|
||||||
|
|
||||||
|
$inputs[0] = array();
|
||||||
|
$expect[0] = false;
|
||||||
|
|
||||||
|
$inputs[1] = array(
|
||||||
|
new MF_StartTag('dt')
|
||||||
|
,new MF_Text('Term')
|
||||||
|
,new MF_EndTag('dt')
|
||||||
|
|
||||||
|
,new MF_Text('Text in an illegal location')
|
||||||
|
|
||||||
|
,new MF_StartTag('dd')
|
||||||
|
,new MF_Text('Definition')
|
||||||
|
,new MF_EndTag('dd')
|
||||||
|
|
||||||
|
,new MF_StartTag('b') // test tag removal too
|
||||||
|
,new MF_EndTag('b')
|
||||||
|
);
|
||||||
|
$expect[1] = array(
|
||||||
|
new MF_StartTag('dt')
|
||||||
|
,new MF_Text('Term')
|
||||||
|
,new MF_EndTag('dt')
|
||||||
|
|
||||||
|
,new MF_StartTag('dd')
|
||||||
|
,new MF_Text('Definition')
|
||||||
|
,new MF_EndTag('dd')
|
||||||
|
);
|
||||||
|
|
||||||
|
$inputs[2] = array(new MF_Text('How do you do!'));
|
||||||
|
$expect[2] = false;
|
||||||
|
|
||||||
|
// whitespace shouldn't trigger it
|
||||||
|
$inputs[3] = array(
|
||||||
|
new MF_Text("\n")
|
||||||
|
,new MF_StartTag('dd')
|
||||||
|
,new MF_Text('Definition')
|
||||||
|
,new MF_EndTag('dd')
|
||||||
|
,new MF_Text(' ')
|
||||||
|
);
|
||||||
|
$expect[3] = true;
|
||||||
|
|
||||||
|
$inputs[4] = array(
|
||||||
|
new MF_StartTag('dd')
|
||||||
|
,new MF_Text('Definition')
|
||||||
|
,new MF_EndTag('dd')
|
||||||
|
,new MF_Text(' ')
|
||||||
|
,new MF_StartTag('b')
|
||||||
|
,new MF_EndTag('b')
|
||||||
|
,new MF_Text(' ')
|
||||||
|
);
|
||||||
|
$expect[4] = array(
|
||||||
|
new MF_StartTag('dd')
|
||||||
|
,new MF_Text('Definition')
|
||||||
|
,new MF_EndTag('dd')
|
||||||
|
,new MF_Text(' ')
|
||||||
|
,new MF_Text(' ')
|
||||||
|
);
|
||||||
|
$inputs[5] = array(
|
||||||
|
new MF_Text(' ')
|
||||||
|
,new MF_Text("\t")
|
||||||
|
);
|
||||||
|
$expect[5] = false;
|
||||||
|
|
||||||
|
foreach ($inputs as $i => $input) {
|
||||||
|
$result = $def->validateChildren($input);
|
||||||
|
if (is_bool($expect[$i])) {
|
||||||
|
$this->assertIdentical($expect[$i], $result);
|
||||||
|
} else {
|
||||||
|
$this->assertEqual($expect[$i], $result);
|
||||||
|
paintIf($result, $result != $expect[$i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_required_pcdata_allowed() {
|
||||||
|
$def = new HTMLDTD_ChildDef_Required('#PCDATA | b');
|
||||||
|
$input = array(
|
||||||
|
new MF_StartTag('b')
|
||||||
|
,new MF_Text('Bold text')
|
||||||
|
,new MF_EndTag('b')
|
||||||
|
,new MF_EmptyTag('img') // illegal tag
|
||||||
|
);
|
||||||
|
$expect = array(
|
||||||
|
new MF_StartTag('b')
|
||||||
|
,new MF_Text('Bold text')
|
||||||
|
,new MF_EndTag('b')
|
||||||
|
,new MF_Text('<img />')
|
||||||
|
);
|
||||||
|
$this->assertEqual($expect, $def->validateChildren($input));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue
Block a user