0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

Properly handle nested sublists by folding into previous list item.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2011-12-26 14:00:34 +08:00
parent 8d572993b4
commit 3570c9985a
9 changed files with 201 additions and 9 deletions

2
NEWS
View File

@ -16,6 +16,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
entities, even if target encoding is UTF-8. entities, even if target encoding is UTF-8.
! Added support for 'scope' attribute on tables. ! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links. ! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
- Color keywords are now case insensitive. Thanks Yzmir Ramirez - Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting. <yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null. - Explicitly initialize anonModule variable to null.

View File

@ -135,6 +135,7 @@ require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php'; require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php'; require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php'; require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php'; require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php'; require 'HTMLPurifier/ChildDef/StrictBlockquote.php';

View File

@ -129,6 +129,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php'; require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';

View File

@ -0,0 +1,120 @@
<?php
/**
* Definition for list containers ul and ol.
*/
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
{
public $type = 'list';
// lying a little bit, so that we can handle ul and ol ourselves
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
$seen_li = false;
$need_close_li = false;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
if ($nesting == 1 && $need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
$nesting--;
$need_close_li = false;
}
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
if ($token->name === 'li') {
// good
$seen_li = true;
} elseif ($token->name === 'ul' || $token->name === 'ol') {
// we want to tuck this into the previous li
$need_close_li = true;
$nesting++;
if (!$seen_li) {
// create a new li element
$result[] = new HTMLPurifier_Token_Start('li');
} else {
// backtrack until </li> found
while(true) {
$t = array_pop($result);
if ($t instanceof HTMLPurifier_Token_End) {
// XXX actually, these invariants could very plausibly be violated
// if we are doing silly things with modifying the set of allowed elements.
// FORTUNATELY, it doesn't make a difference, since the allowed
// elements are hard-coded here!
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
break;
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
// XXX this should have a helper for it...
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
break;
} else {
if (!$t->is_whitespace) {
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
}
}
}
} else {
// start wrapping (this doesn't precisely mimic
// browser behavior, but what browsers do is kind of
// hard to mimic in a standards compliant way
// XXX Actually, this has no impact in practice,
// because this gets handled earlier. Arguably,
// we should rip out all of that processing
$result[] = new HTMLPurifier_Token_Start('li');
$nesting++;
$seen_li = true;
$need_close_li = true;
}
}
$result[] = $token;
}
if ($need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
}
if (empty($result)) return false;
if ($all_whitespace) {
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List'); public $content_sets = array('Flow' => 'List');
public function setup($config) { public function setup($config) {
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ol->wrap = "li"; $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); // XXX The wrap attribute is handled by MakeWellFormed. This is all
$ul->wrap = "li"; // quite unsatisfactory, because we generated this
// *specifically* for lists, and now a big chunk of the handling
// is done properly by the List ChildDef. So actually, we just
// want enough information to make autoclosing work properly,
// and then hand off the tricky stuff to the ChildDef.
$ol->wrap = 'li';
$ul->wrap = 'li';
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common'); $this->addElement('li', false, 'Flow', 'Common');

View File

@ -0,0 +1,50 @@
<?php
class HTMLPurifier_ChildDef_ListTest extends HTMLPurifier_ChildDefHarness
{
function setUp() {
parent::setUp();
$this->obj = new HTMLPurifier_ChildDef_List();
}
function testEmptyInput() {
$this->assertResult('', false);
}
function testSingleLi() {
$this->assertResult('<li />');
}
function testSomeLi() {
$this->assertResult('<li>asdf</li><li />');
}
function testIllegal() {
// XXX actually this never gets triggered in practice
$this->assertResult('<li /><b />', '<li /><li><b /></li>');
}
function testOlAtBeginning() {
$this->assertResult('<ol />', '<li><ol /></li>');
}
function testOlAtBeginningWithOtherJunk() {
$this->assertResult('<ol /><li />', '<li><ol /></li><li />');
}
function testOlInMiddle() {
$this->assertResult('<li>Foo</li><ol><li>Bar</li></ol>', '<li>Foo<ol><li>Bar</li></ol></li>');
}
function testMultipleOl() {
$this->assertResult('<li /><ol /><ol />', '<li><ol /><ol /></li>');
}
function testUlAtBeginning() {
$this->assertResult('<ul />', '<li><ul /></li>');
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,5 @@
--HTML--
<ul><li>Sublist 1</li><ul><li>Bullet</li></ul></ul>
--EXPECT--
<ul><li>Sublist 1<ul><li>Bullet</li></ul></li></ul>
--# vim: et sw=4 sts=4

View File

@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('<ul></ul>', ''); $this->assertResult('<ul></ul>', '');
} }
function testRemoveIllegalPCDATA() { function testListHandleIllegalPCDATA() {
$this->assertResult( $this->assertResult(
'<ul>Illegal text<li>Legal item</li></ul>', '<ul>Illegal text<li>Legal item</li></ul>',
'<ul><li>Legal item</li></ul>' '<ul><li>Illegal text</li><li>Legal item</li></ul>'
);
}
function testRemoveIllegalPCDATA() {
$this->assertResult(
'<table><tr>Illegal text<td></td></tr></table>',
'<table><tr><td></td></tr></table>'
); );
} }

View File

@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testNestedOl() { function testNestedOl() {
$this->assertResult( $this->assertResult(
'<ol><ol><li>foo</li></ol></ol>', '<ol><ol><li>foo</li></ol></ol>',
'<ol><li><ol><li>foo</li></ol></li></ol>' '<ol><ol><li>foo</li></ol></ol>'
); );
} }
function testNestedUl() { function testNestedUl() {
$this->assertResult( $this->assertResult(
'<ul><ul><li>foo</li></ul></ul>', '<ul><ul><li>foo</li></ul></ul>',
'<ul><li><ul><li>foo</li></ul></li></ul>' '<ul><ul><li>foo</li></ul></ul>'
); );
} }
function testNestedOlWithStrangeEnding() { function testNestedOlWithStrangeEnding() {
$this->assertResult( $this->assertResult(
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>', '<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
'<ol><li><ol><li><ol><li>foo</li></ol></li><li>foo</li></ol></li></ol>' '<ol><li><ol><ol><li>foo</li></ol></ol></li><li>foo</li></ol>'
); );
} }