0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-03-11 17:18:44 +00:00

Implement "carryover" functionality, requested by Kinderlehrer <bitweaver@7doves.com>

This commit is a limited implementation of the "active formatting
elements" algorithm implemented in HTML5, which preserves certain
formatting elements such as <a> and <b> when exiting or entering nodes.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-12-20 13:06:00 -05:00
parent 119ebcda71
commit bfe474042f
17 changed files with 121 additions and 33 deletions

@ -28,7 +28,7 @@ abstract class HTMLPurifier_ChildDef
* Get lookup of tag names that should not close this element automatically.
* All other elements will do so.
*/
public function getNonAutoCloseElements($config) {
public function getAllowedElements($config) {
return $this->elements;
}

@ -15,7 +15,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
* @note We don't want MakeWellFormed to auto-close inline elements since
* they might be allowed.
*/
public function getNonAutoCloseElements($config) {
public function getAllowedElements($config) {
$this->init($config);
return $this->fake_elements;
}

@ -5,6 +5,8 @@
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
* @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
* Please update that class too.
* @warning If you add new properties to this class, you MUST update
* the mergeIn() method.
*/
class HTMLPurifier_ElementDef
{
@ -90,6 +92,17 @@ class HTMLPurifier_ElementDef
*/
public $excludes = array();
/**
* This tag is explicitly auto-closed by the following tags.
*/
public $autoclose = array();
/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.
*/
public $formatting;
/**
* Low-level factory constructor for creating new standalone element defs
*/
@ -137,6 +150,7 @@ class HTMLPurifier_ElementDef
$this->child = false;
}
if(!is_null($def->child)) $this->child = $def->child;
if(!is_null($def->formatting)) $this->formatting = $def->formatting;
if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
}

@ -22,6 +22,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
// 'type' => 'ContentType',
)
);
$a->formatting = true;
$a->excludes = array('a' => true);
}

@ -41,9 +41,15 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$this->addElement('menu', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
$this->addElement('s', 'Inline', 'Inline', 'Common');
$this->addElement('strike', 'Inline', 'Inline', 'Common');
$this->addElement('u', 'Inline', 'Inline', 'Common');
$s = $this->addElement('s', 'Inline', 'Inline', 'Common');
$s->formatting = true;
$strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
$strike->formatting = true;
$u = $this->addElement('u', 'Inline', 'Inline', 'Common');
$u->formatting = true;
// setup modifications to old elements

@ -16,14 +16,19 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
public $name = 'Presentation';
public function setup($config) {
$this->addElement('b', 'Inline', 'Inline', 'Common');
$this->addElement('big', 'Inline', 'Inline', 'Common');
$this->addElement('hr', 'Block', 'Empty', 'Common');
$this->addElement('i', 'Inline', 'Inline', 'Common');
$this->addElement('small', 'Inline', 'Inline', 'Common');
$this->addElement('sub', 'Inline', 'Inline', 'Common');
$this->addElement('sup', 'Inline', 'Inline', 'Common');
$this->addElement('tt', 'Inline', 'Inline', 'Common');
$b = $this->addElement('b', 'Inline', 'Inline', 'Common');
$b->formatting = true;
$big = $this->addElement('big', 'Inline', 'Inline', 'Common');
$big->formatting = true;
$i = $this->addElement('i', 'Inline', 'Inline', 'Common');
$i->formatting = true;
$small = $this->addElement('small', 'Inline', 'Inline', 'Common');
$small->formatting = true;
$tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
$tt->formatting = true;
}
}

@ -26,15 +26,21 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->addElement('abbr', 'Inline', 'Inline', 'Common');
$this->addElement('acronym', 'Inline', 'Inline', 'Common');
$this->addElement('cite', 'Inline', 'Inline', 'Common');
$this->addElement('code', 'Inline', 'Inline', 'Common');
$this->addElement('dfn', 'Inline', 'Inline', 'Common');
$this->addElement('em', 'Inline', 'Inline', 'Common');
$this->addElement('kbd', 'Inline', 'Inline', 'Common');
$this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
$this->addElement('samp', 'Inline', 'Inline', 'Common');
$this->addElement('strong', 'Inline', 'Inline', 'Common');
$this->addElement('var', 'Inline', 'Inline', 'Common');
$em = $this->addElement('em', 'Inline', 'Inline', 'Common');
$em->formatting = true;
$strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
$strong->formatting = true;
$code = $this->addElement('code', 'Inline', 'Inline', 'Common');
$code->formatting = true;
// Inline Structural ----------------------------------------------
$this->addElement('span', 'Inline', 'Inline', 'Common');
$this->addElement('br', 'Inline', 'Empty', 'Core');
@ -53,7 +59,9 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->addElement('h6', 'Heading', 'Inline', 'Common');
// Block Structural -----------------------------------------------
$this->addElement('p', 'Block', 'Inline', 'Common');
$p = $this->addElement('p', 'Block', 'Inline', 'Common');
$p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul"));
$this->addElement('div', 'Block', 'Flow', 'Common');
}

@ -37,6 +37,7 @@ $messages = array(
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact',
'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact',
'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized',

@ -159,12 +159,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
continue;
}
// if all goes well, this token will be passed through unharmed
$token = $tokens[$t];
//echo '<hr>';
//printTokens($tokens, $t);
//var_dump($this->stack);
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
@ -214,18 +211,36 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->stack[] = $parent;
if (isset($definition->info[$parent->name])) {
$elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config);
$elements = $definition->info[$parent->name]->child->getAllowedElements($config);
$autoclose = !isset($elements[$token->name]);
} else {
$autoclose = false;
}
$carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true;
}
if ($autoclose) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// insert parent end tag before this tag
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
$this->insertBefore($new_token);
if ($carryover) {
$element = clone $parent;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
}
}
$reprocess = true;
continue;
}
@ -339,12 +354,20 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
// insert tags, in FORWARD $j order: c,b,a with </a></b></c>
$replace = array($token);
for ($j = 1; $j < $c; $j++) {
// ...as well as from the insertions
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
$new_token->start = $skipped_tags[$j];
$this->insertBefore($new_token);
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$replace[] = $element;
}
}
$this->processToken($replace);
$reprocess = true;
continue;
}

@ -11,6 +11,9 @@
* Thanks to
* http://style.cleverchimp.com/font_size_intervals/altintervals.html
* for reasonable mappings.
* @warning This doesn't work completely correctly; specifically, this
* TagTransform operates before well-formedness is enforced, so
* the "active formatting elements" algorithm doesn't get applied.
*/
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
{

@ -19,6 +19,7 @@ class HTMLPurifier_Token {
*/
public $skip;
public $rewind;
public $carryover;
public function __get($n) {
if ($n === 'type') {

@ -0,0 +1,5 @@
--HTML--
<a href="foo.html"><h1>Foobar</h1></a>
--EXPECT--
<a href="foo.html"></a><h1><a href="foo.html">Foobar</a></h1><a href="foo.html"></a>
--# vim: et sw=4 sts=4

@ -6,8 +6,8 @@
class HTMLPurifier_Harness extends UnitTestCase
{
public function __construct() {
parent::__construct();
public function __construct($name = null) {
parent::__construct($name);
}
protected $config, $context, $purifier;

@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
function testFixNesting() {
$this->assertResult(
'<b><div>Fix nesting.</div></b>',
'<b></b><div>Fix nesting.</div>'
'<b></b><div><b>Fix nesting.</b></div><b></b>'
);
}
@ -36,7 +36,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
function testFirstThree() {
$this->assertResult(
'<foo><b><div>All three.</div></b>',
'<b></b><div>All three.</div>'
'<b></b><div><b>All three.</b></div><b></b>'
);
}

@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_EndInsertInjectorTest extends HTMLPur
$this->assertResult('<i><i>Foo', '<i><i>Foo<b>Comment</b></i><b>Comment</b></i>');
}
function testEndOfNodeProcessing() {
$this->assertResult('<div><i>Foo</div>', '<div><i>Foo<b>Comment</b></i></div>');
$this->assertResult('<div><i>Foo</div>asdf', '<div><i>Foo<b>Comment</b></i></div><i>asdf<b>Comment</b></i>');
}
function testEmptyToStartEndProcessing() {
$this->assertResult('<i />', '<i><b>Comment</b></i>');

@ -26,7 +26,7 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testUnclosedTagTerminatedByParentNodeEnd() {
$this->assertResult(
'<b><i>Bold and italic?</b>',
'<b><i>Bold and italic?</i></b>'
'<b><i>Bold and italic?</i></b><i></i>'
);
}
@ -81,8 +81,8 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testAutoCloseMultiple() {
$this->assertResult(
'<span><span><div></div>',
'<span><span></span></span><div></div>'
'<b><span><div></div>asdf',
'<b><span></span></b><div><b></b></div><b>asdf</b>'
);
}
@ -102,6 +102,20 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
);
}
function testLongCarryOver() {
$this->assertResult(
'<b>asdf<div>asdf<i>df</i></div>asdf</b>',
'<b>asdf</b><div><b>asdf<i>df</i></b></div><b>asdf</b>'
);
}
function testInterleaved() {
$this->assertResult(
'<u>foo<i>bar</u>baz</i>',
'<u>foo<i>bar</i></u><i>baz</i>'
);
}
}
// vim: et sw=4 sts=4

@ -20,8 +20,15 @@ class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strat
$this->invoke('</b>');
}
function testTagAutoClosed() {
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('b', array(), 1, 0));
function testTagAutoclose() {
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('p', array(), 1, 0));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
$this->invoke('<p>Foo<div>Bar</div>');
}
function testTagCarryOver() {
$b = new HTMLPurifier_Token_Start('b', array(), 1, 0);
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $b);
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
$this->invoke('<b>Foo<div>Bar</div>');
}