mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
Finish HTMLDTD_ChildDef_Required.
Fix bug in HTML_Generator that resulted in attribute-less empty elements to have extra spaces in them. Add whitespace designation to MF_Text. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@44 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
20c53d6017
commit
83f735ea7e
@ -21,7 +21,7 @@ class HTML_Generator
|
||||
|
||||
} elseif (is_a($token, 'MF_EmptyTag')) {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
return '<' . $token->name . ' ' . $attr . ' />';
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||
|
||||
} elseif (is_a($token, 'MF_Text')) {
|
||||
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
||||
|
@ -31,9 +31,10 @@ class MF_Text extends MF
|
||||
{
|
||||
var $name = '#PCDATA';
|
||||
var $data;
|
||||
var $is_whitespace = false;
|
||||
function MF_Text($data) {
|
||||
$this->data = trim($data); // fairly certain trimming it's okay
|
||||
// but it's not default SAX behavior
|
||||
$this->data = $data;
|
||||
if (trim($data, " \n\r\t") === '') $this->is_whitespace = true;
|
||||
}
|
||||
function append($mf_text) {
|
||||
return new MF_Text($this->data . $mf_text->data);
|
||||
|
@ -342,6 +342,10 @@ class HTMLDTD_Element
|
||||
|
||||
}
|
||||
|
||||
// HTMLDTD_ChildDef and inheritance have three types of output:
|
||||
// true = leave nodes as is
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
class HTMLDTD_ChildDef
|
||||
{
|
||||
var $dtd_regex;
|
||||
@ -354,13 +358,76 @@ class HTMLDTD_ChildDef_Simple extends HTMLDTD_ChildDef
|
||||
{
|
||||
var $elements = array();
|
||||
function HTMLDTD_ChildDef_Simple($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTML_Generator();
|
||||
}
|
||||
}
|
||||
class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple
|
||||
{
|
||||
function validateChildren($tokens_of_children) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if (is_a($token, 'MF_StartTag')) {
|
||||
$nesting++;
|
||||
} elseif (is_a($token, 'MF_EndTag')) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed) {
|
||||
$result[] = new MF_Text($this->gen->generateFromToken($token));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed) {
|
||||
$result[] = new MF_Text($this->gen->generateFromToken($token));
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
class HTMLDTD_ChildDef_Optional extends HTMLDTD_ChildDef_Simple
|
||||
|
@ -30,6 +30,9 @@ class Test_HTML_Generator extends UnitTestCase
|
||||
$inputs[4] = new MF_StartTag('asdf');
|
||||
$expect[4] = '<asdf>';
|
||||
|
||||
$inputs[5] = new MF_EmptyTag('br');
|
||||
$expect[5] = '<br />';
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->gen->generateFromToken($input);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
|
@ -154,20 +154,26 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
||||
|
||||
$inputs[9] = array(
|
||||
new MF_StartTag('ol')
|
||||
|
||||
,new MF_StartTag('li')
|
||||
,new MF_Text('Item 1')
|
||||
|
||||
,new MF_StartTag('li')
|
||||
,new MF_Text('Item 2')
|
||||
|
||||
,new MF_EndTag('ol')
|
||||
);
|
||||
$expect[9] = array(
|
||||
new MF_StartTag('ol')
|
||||
|
||||
,new MF_StartTag('li')
|
||||
,new MF_Text('Item 1')
|
||||
,new MF_EndTag('li')
|
||||
|
||||
,new MF_StartTag('li')
|
||||
,new MF_Text('Item 2')
|
||||
,new MF_EndTag('li')
|
||||
|
||||
,new MF_EndTag('ol')
|
||||
);
|
||||
|
||||
@ -181,4 +187,122 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
class Test_HTMLDTD_ChildDef extends UnitTestCase
|
||||
{
|
||||
|
||||
function test_simple() {
|
||||
|
||||
$def = new HTMLDTD_ChildDef_Simple('foobar | bang |gizmo');
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'foobar' => true
|
||||
,'bang' => true
|
||||
,'gizmo' => true
|
||||
));
|
||||
|
||||
$def = new HTMLDTD_ChildDef_Simple(array('href', 'src'));
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'href' => true
|
||||
,'src' => true
|
||||
));
|
||||
}
|
||||
|
||||
function test_required_pcdata_forbidden() {
|
||||
|
||||
$def = new HTMLDTD_ChildDef_Required('dt | dd');
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = false;
|
||||
|
||||
$inputs[1] = array(
|
||||
new MF_StartTag('dt')
|
||||
,new MF_Text('Term')
|
||||
,new MF_EndTag('dt')
|
||||
|
||||
,new MF_Text('Text in an illegal location')
|
||||
|
||||
,new MF_StartTag('dd')
|
||||
,new MF_Text('Definition')
|
||||
,new MF_EndTag('dd')
|
||||
|
||||
,new MF_StartTag('b') // test tag removal too
|
||||
,new MF_EndTag('b')
|
||||
);
|
||||
$expect[1] = array(
|
||||
new MF_StartTag('dt')
|
||||
,new MF_Text('Term')
|
||||
,new MF_EndTag('dt')
|
||||
|
||||
,new MF_StartTag('dd')
|
||||
,new MF_Text('Definition')
|
||||
,new MF_EndTag('dd')
|
||||
);
|
||||
|
||||
$inputs[2] = array(new MF_Text('How do you do!'));
|
||||
$expect[2] = false;
|
||||
|
||||
// whitespace shouldn't trigger it
|
||||
$inputs[3] = array(
|
||||
new MF_Text("\n")
|
||||
,new MF_StartTag('dd')
|
||||
,new MF_Text('Definition')
|
||||
,new MF_EndTag('dd')
|
||||
,new MF_Text(' ')
|
||||
);
|
||||
$expect[3] = true;
|
||||
|
||||
$inputs[4] = array(
|
||||
new MF_StartTag('dd')
|
||||
,new MF_Text('Definition')
|
||||
,new MF_EndTag('dd')
|
||||
,new MF_Text(' ')
|
||||
,new MF_StartTag('b')
|
||||
,new MF_EndTag('b')
|
||||
,new MF_Text(' ')
|
||||
);
|
||||
$expect[4] = array(
|
||||
new MF_StartTag('dd')
|
||||
,new MF_Text('Definition')
|
||||
,new MF_EndTag('dd')
|
||||
,new MF_Text(' ')
|
||||
,new MF_Text(' ')
|
||||
);
|
||||
$inputs[5] = array(
|
||||
new MF_Text(' ')
|
||||
,new MF_Text("\t")
|
||||
);
|
||||
$expect[5] = false;
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $def->validateChildren($input);
|
||||
if (is_bool($expect[$i])) {
|
||||
$this->assertIdentical($expect[$i], $result);
|
||||
} else {
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_allowed() {
|
||||
$def = new HTMLDTD_ChildDef_Required('#PCDATA | b');
|
||||
$input = array(
|
||||
new MF_StartTag('b')
|
||||
,new MF_Text('Bold text')
|
||||
,new MF_EndTag('b')
|
||||
,new MF_EmptyTag('img') // illegal tag
|
||||
);
|
||||
$expect = array(
|
||||
new MF_StartTag('b')
|
||||
,new MF_Text('Bold text')
|
||||
,new MF_EndTag('b')
|
||||
,new MF_Text('<img />')
|
||||
);
|
||||
$this->assertEqual($expect, $def->validateChildren($input));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Loading…
Reference in New Issue
Block a user