0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

Implement automatic <p> and <li> closing. We trimmed down the closing P tags list to something more sane in the spec.

git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@41 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-04-16 19:34:28 +00:00
parent ffbeaf66d1
commit 8c80b70c37
3 changed files with 105 additions and 6 deletions

View File

@ -5,6 +5,27 @@ class PureHTMLDefinition
var $generator; var $generator;
var $info = array(); var $info = array();
var $info_closes_p = array(
'address' => true,
'blockquote' => true,
'dd' => true,
'dir' => true,
'div' => true,
'dl' => true,
'dt' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'hr' => true,
'ol' => true,
'p' => true,
'pre' => true,
'table' => true,
'ul' => true
);
function PureHTMLDefinition() { function PureHTMLDefinition() {
$this->generator = new HTML_Generator(); $this->generator = new HTML_Generator();
@ -58,6 +79,7 @@ class PureHTMLDefinition
$this->info['del'] = $this->info['del'] =
$this->info['blockquote'] = $this->info['blockquote'] =
$this->info['dd'] = $this->info['dd'] =
$this->info['li'] =
$this->info['div'] = new HTMLDTD_Element($e_Flow); $this->info['div'] = new HTMLDTD_Element($e_Flow);
$this->info['em'] = $this->info['em'] =
@ -187,8 +209,37 @@ class PureHTMLDefinition
continue; continue;
} }
// we give start tags precedence, so automatically accept // we give start tags precedence, so automatically accept unless...
// it's one of those special cases
if (is_a($token, 'MF_StartTag')) { if (is_a($token, 'MF_StartTag')) {
// if there's a parent, check for special case
if (!empty($current_nesting)) {
$current_parent = array_pop($current_nesting);
// check if we're closing a P tag
if ($current_parent->name == 'p' &&
isset($this->info_closes_p[$token->name])
) {
$result[] = new MF_EndTag('p');
$result[] = $token;
$current_nesting[] = $token;
continue;
}
// check if we're closing a LI tag
if ($current_parent->name == 'li' &&
$token->name == 'li'
) {
$result[] = new MF_EndTag('li');
$result[] = $token;
$current_nesting[] = $token;
continue;
}
$current_nesting[] = $current_parent; // undo the pop
}
$result[] = $token; $result[] = $token;
$current_nesting[] = $token; $current_nesting[] = $token;
continue; continue;

View File

@ -49,11 +49,11 @@ time, it's making sure the tags match up, but there's some trickery going on
for HTML's quirks. They are: for HTML's quirks. They are:
* Set of tags that close P * Set of tags that close P
'address', 'blockquote', 'center', 'dd', 'dir', 'div', 'address', 'blockquote', 'dd', 'dir', 'div',
'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'hr', 'isindex', 'listing', 'marquee', 'h5', 'h6', 'hr',
'menu', 'multicol', 'ol', 'p', 'plaintext', 'pre', 'ol', 'p', 'pre',
'table', 'ul', 'xmp', 'table', 'ul'
* Li closes li * Li closes li
* more? * more?

View File

@ -119,7 +119,55 @@ class Test_PureHTMLDefinition extends UnitTestCase
,new MF_EndTag('div') ,new MF_EndTag('div')
); );
// demonstrates start tag precedence // test automatic paragraph closing
$inputs[7] = array(
new MF_StartTag('p')
,new MF_Text('Paragraph 1')
,new MF_StartTag('p')
,new MF_Text('Paragraph 2')
);
$expect[7] = array(
new MF_StartTag('p')
,new MF_Text('Paragraph 1')
,new MF_EndTag('p')
,new MF_StartTag('p')
,new MF_Text('Paragraph 2')
,new MF_EndTag('p')
);
$inputs[8] = array(
new MF_StartTag('div')
,new MF_StartTag('p')
,new MF_Text('Paragraph 1 in a div')
,new MF_EndTag('div')
);
$expect[8] = array(
new MF_StartTag('div')
,new MF_StartTag('p')
,new MF_Text('Paragraph 1 in a div')
,new MF_EndTag('p')
,new MF_EndTag('div')
);
$inputs[9] = array(
new MF_StartTag('ol')
,new MF_StartTag('li')
,new MF_Text('Item 1')
,new MF_StartTag('li')
,new MF_Text('Item 2')
,new MF_EndTag('ol')
);
$expect[9] = array(
new MF_StartTag('ol')
,new MF_StartTag('li')
,new MF_Text('Item 1')
,new MF_EndTag('li')
,new MF_StartTag('li')
,new MF_Text('Item 2')
,new MF_EndTag('li')
,new MF_EndTag('ol')
);
foreach ($inputs as $i => $input) { foreach ($inputs as $i => $input) {
$result = $this->def->makeWellFormed($input); $result = $this->def->makeWellFormed($input);