mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
Properly handle nested sublists by folding into previous list item.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
8d572993b4
commit
3570c9985a
2
NEWS
2
NEWS
@ -16,6 +16,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
entities, even if target encoding is UTF-8.
|
||||
! Added support for 'scope' attribute on tables.
|
||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||
! Properly handle sub-lists directly nested inside of lists in
|
||||
a standards compliant way, by moving them into the preceding <li>
|
||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||
- Explicitly initialize anonModule variable to null.
|
||||
|
@ -135,6 +135,7 @@ require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require 'HTMLPurifier/ChildDef/List.php';
|
||||
require 'HTMLPurifier/ChildDef/Required.php';
|
||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
@ -129,6 +129,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
120
library/HTMLPurifier/ChildDef/List.php
Normal file
120
library/HTMLPurifier/ChildDef/List.php
Normal file
@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for list containers ul and ol.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $type = 'list';
|
||||
// lying a little bit, so that we can handle ul and ol ourselves
|
||||
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
|
||||
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// Flag for subclasses
|
||||
$this->whitespace = false;
|
||||
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
$seen_li = false;
|
||||
$need_close_li = false;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
if ($nesting == 1 && $need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$nesting--;
|
||||
$need_close_li = false;
|
||||
}
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
if ($token->name === 'li') {
|
||||
// good
|
||||
$seen_li = true;
|
||||
} elseif ($token->name === 'ul' || $token->name === 'ol') {
|
||||
// we want to tuck this into the previous li
|
||||
$need_close_li = true;
|
||||
$nesting++;
|
||||
if (!$seen_li) {
|
||||
// create a new li element
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
} else {
|
||||
// backtrack until </li> found
|
||||
while(true) {
|
||||
$t = array_pop($result);
|
||||
if ($t instanceof HTMLPurifier_Token_End) {
|
||||
// XXX actually, these invariants could very plausibly be violated
|
||||
// if we are doing silly things with modifying the set of allowed elements.
|
||||
// FORTUNATELY, it doesn't make a difference, since the allowed
|
||||
// elements are hard-coded here!
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
// XXX this should have a helper for it...
|
||||
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
|
||||
break;
|
||||
} else {
|
||||
if (!$t->is_whitespace) {
|
||||
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// start wrapping (this doesn't precisely mimic
|
||||
// browser behavior, but what browsers do is kind of
|
||||
// hard to mimic in a standards compliant way
|
||||
// XXX Actually, this has no impact in practice,
|
||||
// because this gets handled earlier. Arguably,
|
||||
// we should rip out all of that processing
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
$nesting++;
|
||||
$seen_li = true;
|
||||
$need_close_li = true;
|
||||
}
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
if ($need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) {
|
||||
return false;
|
||||
}
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
||||
public $content_sets = array('Flow' => 'List');
|
||||
|
||||
public function setup($config) {
|
||||
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
|
||||
$ol->wrap = "li";
|
||||
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
|
||||
$ul->wrap = "li";
|
||||
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||
// XXX The wrap attribute is handled by MakeWellFormed. This is all
|
||||
// quite unsatisfactory, because we generated this
|
||||
// *specifically* for lists, and now a big chunk of the handling
|
||||
// is done properly by the List ChildDef. So actually, we just
|
||||
// want enough information to make autoclosing work properly,
|
||||
// and then hand off the tricky stuff to the ChildDef.
|
||||
$ol->wrap = 'li';
|
||||
$ul->wrap = 'li';
|
||||
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
||||
|
||||
$this->addElement('li', false, 'Flow', 'Common');
|
||||
|
50
tests/HTMLPurifier/ChildDef/ListTest.php
Normal file
50
tests/HTMLPurifier/ChildDef/ListTest.php
Normal file
@ -0,0 +1,50 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_ChildDef_ListTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_ChildDef_List();
|
||||
}
|
||||
|
||||
function testEmptyInput() {
|
||||
$this->assertResult('', false);
|
||||
}
|
||||
|
||||
function testSingleLi() {
|
||||
$this->assertResult('<li />');
|
||||
}
|
||||
|
||||
function testSomeLi() {
|
||||
$this->assertResult('<li>asdf</li><li />');
|
||||
}
|
||||
|
||||
function testIllegal() {
|
||||
// XXX actually this never gets triggered in practice
|
||||
$this->assertResult('<li /><b />', '<li /><li><b /></li>');
|
||||
}
|
||||
|
||||
function testOlAtBeginning() {
|
||||
$this->assertResult('<ol />', '<li><ol /></li>');
|
||||
}
|
||||
|
||||
function testOlAtBeginningWithOtherJunk() {
|
||||
$this->assertResult('<ol /><li />', '<li><ol /></li><li />');
|
||||
}
|
||||
|
||||
function testOlInMiddle() {
|
||||
$this->assertResult('<li>Foo</li><ol><li>Bar</li></ol>', '<li>Foo<ol><li>Bar</li></ol></li>');
|
||||
}
|
||||
|
||||
function testMultipleOl() {
|
||||
$this->assertResult('<li /><ol /><ol />', '<li><ol /><ol /></li>');
|
||||
}
|
||||
|
||||
function testUlAtBeginning() {
|
||||
$this->assertResult('<ul />', '<li><ul /></li>');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
5
tests/HTMLPurifier/HTMLT/list-nesting.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/list-nesting.htmlt
Normal file
@ -0,0 +1,5 @@
|
||||
--HTML--
|
||||
<ul><li>Sublist 1</li><ul><li>Bullet</li></ul></ul>
|
||||
--EXPECT--
|
||||
<ul><li>Sublist 1<ul><li>Bullet</li></ul></li></ul>
|
||||
--# vim: et sw=4 sts=4
|
@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
||||
$this->assertResult('<ul></ul>', '');
|
||||
}
|
||||
|
||||
function testRemoveIllegalPCDATA() {
|
||||
function testListHandleIllegalPCDATA() {
|
||||
$this->assertResult(
|
||||
'<ul>Illegal text<li>Legal item</li></ul>',
|
||||
'<ul><li>Legal item</li></ul>'
|
||||
'<ul><li>Illegal text</li><li>Legal item</li></ul>'
|
||||
);
|
||||
}
|
||||
|
||||
function testRemoveIllegalPCDATA() {
|
||||
$this->assertResult(
|
||||
'<table><tr>Illegal text<td></td></tr></table>',
|
||||
'<table><tr><td></td></tr></table>'
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
|
||||
function testNestedOl() {
|
||||
$this->assertResult(
|
||||
'<ol><ol><li>foo</li></ol></ol>',
|
||||
'<ol><li><ol><li>foo</li></ol></li></ol>'
|
||||
'<ol><ol><li>foo</li></ol></ol>'
|
||||
);
|
||||
}
|
||||
|
||||
function testNestedUl() {
|
||||
$this->assertResult(
|
||||
'<ul><ul><li>foo</li></ul></ul>',
|
||||
'<ul><li><ul><li>foo</li></ul></li></ul>'
|
||||
'<ul><ul><li>foo</li></ul></ul>'
|
||||
);
|
||||
}
|
||||
|
||||
function testNestedOlWithStrangeEnding() {
|
||||
$this->assertResult(
|
||||
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
|
||||
'<ol><li><ol><li><ol><li>foo</li></ol></li><li>foo</li></ol></li></ol>'
|
||||
'<ol><li><ol><ol><li>foo</li></ol></ol></li><li>foo</li></ol>'
|
||||
);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user