mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-20 12:31:53 +00:00
Properly handle nested sublists by folding into previous list item.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
8d572993b4
commit
3570c9985a
2
NEWS
2
NEWS
@ -16,6 +16,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
entities, even if target encoding is UTF-8.
|
entities, even if target encoding is UTF-8.
|
||||||
! Added support for 'scope' attribute on tables.
|
! Added support for 'scope' attribute on tables.
|
||||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||||
|
! Properly handle sub-lists directly nested inside of lists in
|
||||||
|
a standards compliant way, by moving them into the preceding <li>
|
||||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||||
- Explicitly initialize anonModule variable to null.
|
- Explicitly initialize anonModule variable to null.
|
||||||
|
@ -135,6 +135,7 @@ require 'HTMLPurifier/AttrTransform/Textarea.php';
|
|||||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require 'HTMLPurifier/ChildDef/List.php';
|
||||||
require 'HTMLPurifier/ChildDef/Required.php';
|
require 'HTMLPurifier/ChildDef/Required.php';
|
||||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
|
@ -129,6 +129,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
|
120
library/HTMLPurifier/ChildDef/List.php
Normal file
120
library/HTMLPurifier/ChildDef/List.php
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for list containers ul and ol.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
public $type = 'list';
|
||||||
|
// lying a little bit, so that we can handle ul and ol ourselves
|
||||||
|
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
|
||||||
|
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
|
||||||
|
public function validateChildren($tokens_of_children, $config, $context) {
|
||||||
|
// Flag for subclasses
|
||||||
|
$this->whitespace = false;
|
||||||
|
|
||||||
|
// if there are no tokens, delete parent node
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// the new set of children
|
||||||
|
$result = array();
|
||||||
|
|
||||||
|
// current depth into the nest
|
||||||
|
$nesting = 0;
|
||||||
|
|
||||||
|
// a little sanity check to make sure it's not ALL whitespace
|
||||||
|
$all_whitespace = true;
|
||||||
|
|
||||||
|
$seen_li = false;
|
||||||
|
$need_close_li = false;
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
if (!empty($token->is_whitespace)) {
|
||||||
|
$result[] = $token;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||||
|
|
||||||
|
if ($nesting == 1 && $need_close_li) {
|
||||||
|
$result[] = new HTMLPurifier_Token_End('li');
|
||||||
|
$nesting--;
|
||||||
|
$need_close_li = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
if ($token->name === 'li') {
|
||||||
|
// good
|
||||||
|
$seen_li = true;
|
||||||
|
} elseif ($token->name === 'ul' || $token->name === 'ol') {
|
||||||
|
// we want to tuck this into the previous li
|
||||||
|
$need_close_li = true;
|
||||||
|
$nesting++;
|
||||||
|
if (!$seen_li) {
|
||||||
|
// create a new li element
|
||||||
|
$result[] = new HTMLPurifier_Token_Start('li');
|
||||||
|
} else {
|
||||||
|
// backtrack until </li> found
|
||||||
|
while(true) {
|
||||||
|
$t = array_pop($result);
|
||||||
|
if ($t instanceof HTMLPurifier_Token_End) {
|
||||||
|
// XXX actually, these invariants could very plausibly be violated
|
||||||
|
// if we are doing silly things with modifying the set of allowed elements.
|
||||||
|
// FORTUNATELY, it doesn't make a difference, since the allowed
|
||||||
|
// elements are hard-coded here!
|
||||||
|
if ($t->name !== 'li') {
|
||||||
|
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
|
||||||
|
if ($t->name !== 'li') {
|
||||||
|
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// XXX this should have a helper for it...
|
||||||
|
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
if (!$t->is_whitespace) {
|
||||||
|
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// start wrapping (this doesn't precisely mimic
|
||||||
|
// browser behavior, but what browsers do is kind of
|
||||||
|
// hard to mimic in a standards compliant way
|
||||||
|
// XXX Actually, this has no impact in practice,
|
||||||
|
// because this gets handled earlier. Arguably,
|
||||||
|
// we should rip out all of that processing
|
||||||
|
$result[] = new HTMLPurifier_Token_Start('li');
|
||||||
|
$nesting++;
|
||||||
|
$seen_li = true;
|
||||||
|
$need_close_li = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$result[] = $token;
|
||||||
|
}
|
||||||
|
if ($need_close_li) {
|
||||||
|
$result[] = new HTMLPurifier_Token_End('li');
|
||||||
|
}
|
||||||
|
if (empty($result)) return false;
|
||||||
|
if ($all_whitespace) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ($tokens_of_children == $result) return true;
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// vim: et sw=4 sts=4
|
@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
|||||||
public $content_sets = array('Flow' => 'List');
|
public $content_sets = array('Flow' => 'List');
|
||||||
|
|
||||||
public function setup($config) {
|
public function setup($config) {
|
||||||
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
|
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||||
$ol->wrap = "li";
|
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||||
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
|
// XXX The wrap attribute is handled by MakeWellFormed. This is all
|
||||||
$ul->wrap = "li";
|
// quite unsatisfactory, because we generated this
|
||||||
|
// *specifically* for lists, and now a big chunk of the handling
|
||||||
|
// is done properly by the List ChildDef. So actually, we just
|
||||||
|
// want enough information to make autoclosing work properly,
|
||||||
|
// and then hand off the tricky stuff to the ChildDef.
|
||||||
|
$ol->wrap = 'li';
|
||||||
|
$ul->wrap = 'li';
|
||||||
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
||||||
|
|
||||||
$this->addElement('li', false, 'Flow', 'Common');
|
$this->addElement('li', false, 'Flow', 'Common');
|
||||||
|
50
tests/HTMLPurifier/ChildDef/ListTest.php
Normal file
50
tests/HTMLPurifier/ChildDef/ListTest.php
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifier_ChildDef_ListTest extends HTMLPurifier_ChildDefHarness
|
||||||
|
{
|
||||||
|
|
||||||
|
function setUp() {
|
||||||
|
parent::setUp();
|
||||||
|
$this->obj = new HTMLPurifier_ChildDef_List();
|
||||||
|
}
|
||||||
|
|
||||||
|
function testEmptyInput() {
|
||||||
|
$this->assertResult('', false);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testSingleLi() {
|
||||||
|
$this->assertResult('<li />');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testSomeLi() {
|
||||||
|
$this->assertResult('<li>asdf</li><li />');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testIllegal() {
|
||||||
|
// XXX actually this never gets triggered in practice
|
||||||
|
$this->assertResult('<li /><b />', '<li /><li><b /></li>');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testOlAtBeginning() {
|
||||||
|
$this->assertResult('<ol />', '<li><ol /></li>');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testOlAtBeginningWithOtherJunk() {
|
||||||
|
$this->assertResult('<ol /><li />', '<li><ol /></li><li />');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testOlInMiddle() {
|
||||||
|
$this->assertResult('<li>Foo</li><ol><li>Bar</li></ol>', '<li>Foo<ol><li>Bar</li></ol></li>');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testMultipleOl() {
|
||||||
|
$this->assertResult('<li /><ol /><ol />', '<li><ol /><ol /></li>');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testUlAtBeginning() {
|
||||||
|
$this->assertResult('<ul />', '<li><ul /></li>');
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// vim: et sw=4 sts=4
|
5
tests/HTMLPurifier/HTMLT/list-nesting.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/list-nesting.htmlt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
--HTML--
|
||||||
|
<ul><li>Sublist 1</li><ul><li>Bullet</li></ul></ul>
|
||||||
|
--EXPECT--
|
||||||
|
<ul><li>Sublist 1<ul><li>Bullet</li></ul></li></ul>
|
||||||
|
--# vim: et sw=4 sts=4
|
@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
|||||||
$this->assertResult('<ul></ul>', '');
|
$this->assertResult('<ul></ul>', '');
|
||||||
}
|
}
|
||||||
|
|
||||||
function testRemoveIllegalPCDATA() {
|
function testListHandleIllegalPCDATA() {
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<ul>Illegal text<li>Legal item</li></ul>',
|
'<ul>Illegal text<li>Legal item</li></ul>',
|
||||||
'<ul><li>Legal item</li></ul>'
|
'<ul><li>Illegal text</li><li>Legal item</li></ul>'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testRemoveIllegalPCDATA() {
|
||||||
|
$this->assertResult(
|
||||||
|
'<table><tr>Illegal text<td></td></tr></table>',
|
||||||
|
'<table><tr><td></td></tr></table>'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
|
|||||||
function testNestedOl() {
|
function testNestedOl() {
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<ol><ol><li>foo</li></ol></ol>',
|
'<ol><ol><li>foo</li></ol></ol>',
|
||||||
'<ol><li><ol><li>foo</li></ol></li></ol>'
|
'<ol><ol><li>foo</li></ol></ol>'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testNestedUl() {
|
function testNestedUl() {
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<ul><ul><li>foo</li></ul></ul>',
|
'<ul><ul><li>foo</li></ul></ul>',
|
||||||
'<ul><li><ul><li>foo</li></ul></li></ul>'
|
'<ul><ul><li>foo</li></ul></ul>'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testNestedOlWithStrangeEnding() {
|
function testNestedOlWithStrangeEnding() {
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
|
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
|
||||||
'<ol><li><ol><li><ol><li>foo</li></ol></li><li>foo</li></ol></li></ol>'
|
'<ol><li><ol><ol><li>foo</li></ol></ol></li><li>foo</li></ol>'
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user