0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

Finish HTMLDTD_ChildDef_Required.

Fix bug in HTML_Generator that resulted in attribute-less empty elements to have extra spaces in them.

Add whitespace designation to MF_Text.

git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@44 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-04-17 00:49:15 +00:00
parent 20c53d6017
commit 83f735ea7e
5 changed files with 199 additions and 4 deletions

View File

@ -21,7 +21,7 @@ class HTML_Generator
} elseif (is_a($token, 'MF_EmptyTag')) { } elseif (is_a($token, 'MF_EmptyTag')) {
$attr = $this->generateAttributes($token->attributes); $attr = $this->generateAttributes($token->attributes);
return '<' . $token->name . ' ' . $attr . ' />'; return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
} elseif (is_a($token, 'MF_Text')) { } elseif (is_a($token, 'MF_Text')) {
return htmlentities($token->data, ENT_COMPAT, 'UTF-8'); return htmlentities($token->data, ENT_COMPAT, 'UTF-8');

View File

@ -31,9 +31,10 @@ class MF_Text extends MF
{ {
var $name = '#PCDATA'; var $name = '#PCDATA';
var $data; var $data;
var $is_whitespace = false;
function MF_Text($data) { function MF_Text($data) {
$this->data = trim($data); // fairly certain trimming it's okay $this->data = $data;
// but it's not default SAX behavior if (trim($data, " \n\r\t") === '') $this->is_whitespace = true;
} }
function append($mf_text) { function append($mf_text) {
return new MF_Text($this->data . $mf_text->data); return new MF_Text($this->data . $mf_text->data);

View File

@ -342,6 +342,10 @@ class HTMLDTD_Element
} }
// HTMLDTD_ChildDef and inheritance have three types of output:
// true = leave nodes as is
// false = delete parent node and all children
// array(...) = replace children nodes with these
class HTMLDTD_ChildDef class HTMLDTD_ChildDef
{ {
var $dtd_regex; var $dtd_regex;
@ -354,13 +358,76 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
{ {
var $elements = array(); var $elements = array();
function HTMLDTD_ChildDef_Simple($elements) { function HTMLDTD_ChildDef_Simple($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$elements = array_flip($elements);
foreach ($elements as $i => $x) $elements[$i] = true;
$this->elements = $elements; $this->elements = $elements;
$this->gen = new HTML_Generator();
} }
} }
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
{ {
function validateChildren($tokens_of_children) { function validateChildren($tokens_of_children) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if (is_a($token, 'MF_StartTag')) {
$nesting++;
} elseif (is_a($token, 'MF_EndTag')) {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed) {
$result[] = new MF_Text($this->gen->generateFromToken($token));
}
continue;
}
}
if (!$is_deleting) {
$result[] = $token;
} elseif ($pcdata_allowed) {
$result[] = new MF_Text($this->gen->generateFromToken($token));
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) return false;
if ($tokens_of_children == $result) return true;
return $result;
} }
} }
class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple

View File

@ -30,6 +30,9 @@ class Test_HTML_Generator extends UnitTestCase
$inputs[4] = new MF_StartTag('asdf'); $inputs[4] = new MF_StartTag('asdf');
$expect[4] = '<asdf>'; $expect[4] = '<asdf>';
$inputs[5] = new MF_EmptyTag('br');
$expect[5] = '<br />';
foreach ($inputs as $i => $input) { foreach ($inputs as $i => $input) {
$result = $this->gen->generateFromToken($input); $result = $this->gen->generateFromToken($input);
$this->assertEqual($result, $expect[$i]); $this->assertEqual($result, $expect[$i]);

View File

@ -154,20 +154,26 @@ class Test_PureHTMLDefinition extends UnitTestCase
$inputs[9] = array( $inputs[9] = array(
new MF_StartTag('ol') new MF_StartTag('ol')
,new MF_StartTag('li') ,new MF_StartTag('li')
,new MF_Text('Item 1') ,new MF_Text('Item 1')
,new MF_StartTag('li') ,new MF_StartTag('li')
,new MF_Text('Item 2') ,new MF_Text('Item 2')
,new MF_EndTag('ol') ,new MF_EndTag('ol')
); );
$expect[9] = array( $expect[9] = array(
new MF_StartTag('ol') new MF_StartTag('ol')
,new MF_StartTag('li') ,new MF_StartTag('li')
,new MF_Text('Item 1') ,new MF_Text('Item 1')
,new MF_EndTag('li') ,new MF_EndTag('li')
,new MF_StartTag('li') ,new MF_StartTag('li')
,new MF_Text('Item 2') ,new MF_Text('Item 2')
,new MF_EndTag('li') ,new MF_EndTag('li')
,new MF_EndTag('ol') ,new MF_EndTag('ol')
); );
@ -181,4 +187,122 @@ class Test_PureHTMLDefinition extends UnitTestCase
} }
class Test_HTMLDTD_ChildDef extends UnitTestCase
{
function test_simple() {
$def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo');
$this->assertEqual($def->elements,
array(
'foobar' => true
,'bang' => true
,'gizmo' => true
));
$def = new HTMLDTD_ChildDef_Simple(array('href', 'src'));
$this->assertEqual($def->elements,
array(
'href' => true
,'src' => true
));
}
function test_required_pcdata_forbidden() {
$def = new HTMLDTD_ChildDef_Required('dt | dd');
$inputs[0] = array();
$expect[0] = false;
$inputs[1] = array(
new MF_StartTag('dt')
,new MF_Text('Term')
,new MF_EndTag('dt')
,new MF_Text('Text in an illegal location')
,new MF_StartTag('dd')
,new MF_Text('Definition')
,new MF_EndTag('dd')
,new MF_StartTag('b') // test tag removal too
,new MF_EndTag('b')
);
$expect[1] = array(
new MF_StartTag('dt')
,new MF_Text('Term')
,new MF_EndTag('dt')
,new MF_StartTag('dd')
,new MF_Text('Definition')
,new MF_EndTag('dd')
);
$inputs[2] = array(new MF_Text('How do you do!'));
$expect[2] = false;
// whitespace shouldn't trigger it
$inputs[3] = array(
new MF_Text("\n")
,new MF_StartTag('dd')
,new MF_Text('Definition')
,new MF_EndTag('dd')
,new MF_Text(' ')
);
$expect[3] = true;
$inputs[4] = array(
new MF_StartTag('dd')
,new MF_Text('Definition')
,new MF_EndTag('dd')
,new MF_Text(' ')
,new MF_StartTag('b')
,new MF_EndTag('b')
,new MF_Text(' ')
);
$expect[4] = array(
new MF_StartTag('dd')
,new MF_Text('Definition')
,new MF_EndTag('dd')
,new MF_Text(' ')
,new MF_Text(' ')
);
$inputs[5] = array(
new MF_Text(' ')
,new MF_Text("\t")
);
$expect[5] = false;
foreach ($inputs as $i => $input) {
$result = $def->validateChildren($input);
if (is_bool($expect[$i])) {
$this->assertIdentical($expect[$i], $result);
} else {
$this->assertEqual($expect[$i], $result);
paintIf($result, $result != $expect[$i]);
}
}
}
function test_required_pcdata_allowed() {
$def = new HTMLDTD_ChildDef_Required('#PCDATA | b');
$input = array(
new MF_StartTag('b')
,new MF_Text('Bold text')
,new MF_EndTag('b')
,new MF_EmptyTag('img') // illegal tag
);
$expect = array(
new MF_StartTag('b')
,new MF_Text('Bold text')
,new MF_EndTag('b')
,new MF_Text('<img />')
);
$this->assertEqual($expect, $def->validateChildren($input));
}
}
?> ?>