0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-03-23 14:27:02 +00:00

Implement "carryover" functionality, requested by Kinderlehrer <bitweaver@7doves.com>

This commit is a limited implementation of the "active formatting
elements" algorithm implemented in HTML5, which preserves certain
formatting elements such as <a> and <b> when exiting or entering nodes.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-12-20 13:06:00 -05:00
parent 119ebcda71
commit bfe474042f
17 changed files with 121 additions and 33 deletions

View File

@ -28,7 +28,7 @@ abstract class HTMLPurifier_ChildDef
* Get lookup of tag names that should not close this element automatically. * Get lookup of tag names that should not close this element automatically.
* All other elements will do so. * All other elements will do so.
*/ */
public function getNonAutoCloseElements($config) { public function getAllowedElements($config) {
return $this->elements; return $this->elements;
} }

View File

@ -15,7 +15,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
* @note We don't want MakeWellFormed to auto-close inline elements since * @note We don't want MakeWellFormed to auto-close inline elements since
* they might be allowed. * they might be allowed.
*/ */
public function getNonAutoCloseElements($config) { public function getAllowedElements($config) {
$this->init($config); $this->init($config);
return $this->fake_elements; return $this->fake_elements;
} }

View File

@ -5,6 +5,8 @@
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule. * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
* @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition. * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
* Please update that class too. * Please update that class too.
* @warning If you add new properties to this class, you MUST update
* the mergeIn() method.
*/ */
class HTMLPurifier_ElementDef class HTMLPurifier_ElementDef
{ {
@ -90,6 +92,17 @@ class HTMLPurifier_ElementDef
*/ */
public $excludes = array(); public $excludes = array();
/**
* This tag is explicitly auto-closed by the following tags.
*/
public $autoclose = array();
/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.
*/
public $formatting;
/** /**
* Low-level factory constructor for creating new standalone element defs * Low-level factory constructor for creating new standalone element defs
*/ */
@ -137,6 +150,7 @@ class HTMLPurifier_ElementDef
$this->child = false; $this->child = false;
} }
if(!is_null($def->child)) $this->child = $def->child; if(!is_null($def->child)) $this->child = $def->child;
if(!is_null($def->formatting)) $this->formatting = $def->formatting;
if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
} }

View File

@ -22,6 +22,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
// 'type' => 'ContentType', // 'type' => 'ContentType',
) )
); );
$a->formatting = true;
$a->excludes = array('a' => true); $a->excludes = array('a' => true);
} }

View File

@ -41,9 +41,15 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$this->addElement('menu', 'Block', 'Required: li', 'Common', array( $this->addElement('menu', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact' 'compact' => 'Bool#compact'
)); ));
$this->addElement('s', 'Inline', 'Inline', 'Common');
$this->addElement('strike', 'Inline', 'Inline', 'Common'); $s = $this->addElement('s', 'Inline', 'Inline', 'Common');
$this->addElement('u', 'Inline', 'Inline', 'Common'); $s->formatting = true;
$strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
$strike->formatting = true;
$u = $this->addElement('u', 'Inline', 'Inline', 'Common');
$u->formatting = true;
// setup modifications to old elements // setup modifications to old elements

View File

@ -16,14 +16,19 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
public $name = 'Presentation'; public $name = 'Presentation';
public function setup($config) { public function setup($config) {
$this->addElement('b', 'Inline', 'Inline', 'Common');
$this->addElement('big', 'Inline', 'Inline', 'Common');
$this->addElement('hr', 'Block', 'Empty', 'Common'); $this->addElement('hr', 'Block', 'Empty', 'Common');
$this->addElement('i', 'Inline', 'Inline', 'Common');
$this->addElement('small', 'Inline', 'Inline', 'Common');
$this->addElement('sub', 'Inline', 'Inline', 'Common'); $this->addElement('sub', 'Inline', 'Inline', 'Common');
$this->addElement('sup', 'Inline', 'Inline', 'Common'); $this->addElement('sup', 'Inline', 'Inline', 'Common');
$this->addElement('tt', 'Inline', 'Inline', 'Common'); $b = $this->addElement('b', 'Inline', 'Inline', 'Common');
$b->formatting = true;
$big = $this->addElement('big', 'Inline', 'Inline', 'Common');
$big->formatting = true;
$i = $this->addElement('i', 'Inline', 'Inline', 'Common');
$i->formatting = true;
$small = $this->addElement('small', 'Inline', 'Inline', 'Common');
$small->formatting = true;
$tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
$tt->formatting = true;
} }
} }

View File

@ -26,15 +26,21 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->addElement('abbr', 'Inline', 'Inline', 'Common'); $this->addElement('abbr', 'Inline', 'Inline', 'Common');
$this->addElement('acronym', 'Inline', 'Inline', 'Common'); $this->addElement('acronym', 'Inline', 'Inline', 'Common');
$this->addElement('cite', 'Inline', 'Inline', 'Common'); $this->addElement('cite', 'Inline', 'Inline', 'Common');
$this->addElement('code', 'Inline', 'Inline', 'Common');
$this->addElement('dfn', 'Inline', 'Inline', 'Common'); $this->addElement('dfn', 'Inline', 'Inline', 'Common');
$this->addElement('em', 'Inline', 'Inline', 'Common');
$this->addElement('kbd', 'Inline', 'Inline', 'Common'); $this->addElement('kbd', 'Inline', 'Inline', 'Common');
$this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI')); $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
$this->addElement('samp', 'Inline', 'Inline', 'Common'); $this->addElement('samp', 'Inline', 'Inline', 'Common');
$this->addElement('strong', 'Inline', 'Inline', 'Common');
$this->addElement('var', 'Inline', 'Inline', 'Common'); $this->addElement('var', 'Inline', 'Inline', 'Common');
$em = $this->addElement('em', 'Inline', 'Inline', 'Common');
$em->formatting = true;
$strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
$strong->formatting = true;
$code = $this->addElement('code', 'Inline', 'Inline', 'Common');
$code->formatting = true;
// Inline Structural ---------------------------------------------- // Inline Structural ----------------------------------------------
$this->addElement('span', 'Inline', 'Inline', 'Common'); $this->addElement('span', 'Inline', 'Inline', 'Common');
$this->addElement('br', 'Inline', 'Empty', 'Core'); $this->addElement('br', 'Inline', 'Empty', 'Core');
@ -53,7 +59,9 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->addElement('h6', 'Heading', 'Inline', 'Common'); $this->addElement('h6', 'Heading', 'Inline', 'Common');
// Block Structural ----------------------------------------------- // Block Structural -----------------------------------------------
$this->addElement('p', 'Block', 'Inline', 'Common'); $p = $this->addElement('p', 'Block', 'Inline', 'Common');
$p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul"));
$this->addElement('div', 'Block', 'Flow', 'Common'); $this->addElement('div', 'Block', 'Flow', 'Common');
} }

View File

@ -37,6 +37,7 @@ $messages = array(
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', 'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact',
'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact',
'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', 'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized',

View File

@ -159,12 +159,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
continue; continue;
} }
// if all goes well, this token will be passed through unharmed
$token = $tokens[$t]; $token = $tokens[$t];
//echo '<hr>'; //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
//printTokens($tokens, $t);
//var_dump($this->stack);
// quick-check: if it's not a tag, no need to process // quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) { if (empty($token->is_tag)) {
@ -214,18 +211,36 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->stack[] = $parent; $this->stack[] = $parent;
if (isset($definition->info[$parent->name])) { if (isset($definition->info[$parent->name])) {
$elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config); $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
$autoclose = !isset($elements[$token->name]); $autoclose = !isset($elements[$token->name]);
} else { } else {
$autoclose = false; $autoclose = false;
} }
$carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true;
}
if ($autoclose) { if ($autoclose) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); // errors need to be updated
// insert parent end tag before this tag
$new_token = new HTMLPurifier_Token_End($parent->name); $new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent; $new_token->start = $parent;
$this->insertBefore($new_token); if ($carryover) {
$element = clone $parent;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
}
}
$reprocess = true; $reprocess = true;
continue; continue;
} }
@ -339,12 +354,20 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
} }
// insert tags, in FORWARD $j order: c,b,a with </a></b></c> // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
$replace = array($token);
for ($j = 1; $j < $c; $j++) { for ($j = 1; $j < $c; $j++) {
// ...as well as from the insertions // ...as well as from the insertions
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
$new_token->start = $skipped_tags[$j]; $new_token->start = $skipped_tags[$j];
$this->insertBefore($new_token); array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$replace[] = $element;
}
} }
$this->processToken($replace);
$reprocess = true; $reprocess = true;
continue; continue;
} }

View File

@ -11,6 +11,9 @@
* Thanks to * Thanks to
* http://style.cleverchimp.com/font_size_intervals/altintervals.html * http://style.cleverchimp.com/font_size_intervals/altintervals.html
* for reasonable mappings. * for reasonable mappings.
* @warning This doesn't work completely correctly; specifically, this
* TagTransform operates before well-formedness is enforced, so
* the "active formatting elements" algorithm doesn't get applied.
*/ */
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
{ {

View File

@ -19,6 +19,7 @@ class HTMLPurifier_Token {
*/ */
public $skip; public $skip;
public $rewind; public $rewind;
public $carryover;
public function __get($n) { public function __get($n) {
if ($n === 'type') { if ($n === 'type') {

View File

@ -0,0 +1,5 @@
--HTML--
<a href="foo.html"><h1>Foobar</h1></a>
--EXPECT--
<a href="foo.html"></a><h1><a href="foo.html">Foobar</a></h1><a href="foo.html"></a>
--# vim: et sw=4 sts=4

View File

@ -6,8 +6,8 @@
class HTMLPurifier_Harness extends UnitTestCase class HTMLPurifier_Harness extends UnitTestCase
{ {
public function __construct() { public function __construct($name = null) {
parent::__construct(); parent::__construct($name);
} }
protected $config, $context, $purifier; protected $config, $context, $purifier;

View File

@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
function testFixNesting() { function testFixNesting() {
$this->assertResult( $this->assertResult(
'<b><div>Fix nesting.</div></b>', '<b><div>Fix nesting.</div></b>',
'<b></b><div>Fix nesting.</div>' '<b></b><div><b>Fix nesting.</b></div><b></b>'
); );
} }
@ -36,7 +36,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
function testFirstThree() { function testFirstThree() {
$this->assertResult( $this->assertResult(
'<foo><b><div>All three.</div></b>', '<foo><b><div>All three.</div></b>',
'<b></b><div>All three.</div>' '<b></b><div><b>All three.</b></div><b></b>'
); );
} }

View File

@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_EndInsertInjectorTest extends HTMLPur
$this->assertResult('<i><i>Foo', '<i><i>Foo<b>Comment</b></i><b>Comment</b></i>'); $this->assertResult('<i><i>Foo', '<i><i>Foo<b>Comment</b></i><b>Comment</b></i>');
} }
function testEndOfNodeProcessing() { function testEndOfNodeProcessing() {
$this->assertResult('<div><i>Foo</div>', '<div><i>Foo<b>Comment</b></i></div>'); $this->assertResult('<div><i>Foo</div>asdf', '<div><i>Foo<b>Comment</b></i></div><i>asdf<b>Comment</b></i>');
} }
function testEmptyToStartEndProcessing() { function testEmptyToStartEndProcessing() {
$this->assertResult('<i />', '<i><b>Comment</b></i>'); $this->assertResult('<i />', '<i><b>Comment</b></i>');

View File

@ -26,7 +26,7 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testUnclosedTagTerminatedByParentNodeEnd() { function testUnclosedTagTerminatedByParentNodeEnd() {
$this->assertResult( $this->assertResult(
'<b><i>Bold and italic?</b>', '<b><i>Bold and italic?</b>',
'<b><i>Bold and italic?</i></b>' '<b><i>Bold and italic?</i></b><i></i>'
); );
} }
@ -81,8 +81,8 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testAutoCloseMultiple() { function testAutoCloseMultiple() {
$this->assertResult( $this->assertResult(
'<span><span><div></div>', '<b><span><div></div>asdf',
'<span><span></span></span><div></div>' '<b><span></span></b><div><b></b></div><b>asdf</b>'
); );
} }
@ -102,6 +102,20 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
); );
} }
function testLongCarryOver() {
$this->assertResult(
'<b>asdf<div>asdf<i>df</i></div>asdf</b>',
'<b>asdf</b><div><b>asdf<i>df</i></b></div><b>asdf</b>'
);
}
function testInterleaved() {
$this->assertResult(
'<u>foo<i>bar</u>baz</i>',
'<u>foo<i>bar</i></u><i>baz</i>'
);
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -20,8 +20,15 @@ class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strat
$this->invoke('</b>'); $this->invoke('</b>');
} }
function testTagAutoClosed() { function testTagAutoclose() {
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('b', array(), 1, 0)); $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('p', array(), 1, 0));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
$this->invoke('<p>Foo<div>Bar</div>');
}
function testTagCarryOver() {
$b = new HTMLPurifier_Token_Start('b', array(), 1, 0);
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $b);
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6)); $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
$this->invoke('<b>Foo<div>Bar</div>'); $this->invoke('<b>Foo<div>Bar</div>');
} }