mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-11 17:18:44 +00:00
Implement "carryover" functionality, requested by Kinderlehrer <bitweaver@7doves.com>
This commit is a limited implementation of the "active formatting elements" algorithm implemented in HTML5, which preserves certain formatting elements such as <a> and <b> when exiting or entering nodes. Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
parent
119ebcda71
commit
bfe474042f
library/HTMLPurifier
tests/HTMLPurifier
@ -28,7 +28,7 @@ abstract class HTMLPurifier_ChildDef
|
||||
* Get lookup of tag names that should not close this element automatically.
|
||||
* All other elements will do so.
|
||||
*/
|
||||
public function getNonAutoCloseElements($config) {
|
||||
public function getAllowedElements($config) {
|
||||
return $this->elements;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi
|
||||
* @note We don't want MakeWellFormed to auto-close inline elements since
|
||||
* they might be allowed.
|
||||
*/
|
||||
public function getNonAutoCloseElements($config) {
|
||||
public function getAllowedElements($config) {
|
||||
$this->init($config);
|
||||
return $this->fake_elements;
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
|
||||
* @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
|
||||
* Please update that class too.
|
||||
* @warning If you add new properties to this class, you MUST update
|
||||
* the mergeIn() method.
|
||||
*/
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
@ -90,6 +92,17 @@ class HTMLPurifier_ElementDef
|
||||
*/
|
||||
public $excludes = array();
|
||||
|
||||
/**
|
||||
* This tag is explicitly auto-closed by the following tags.
|
||||
*/
|
||||
public $autoclose = array();
|
||||
|
||||
/**
|
||||
* Whether or not this is a formatting element affected by the
|
||||
* "Active Formatting Elements" algorithm.
|
||||
*/
|
||||
public $formatting;
|
||||
|
||||
/**
|
||||
* Low-level factory constructor for creating new standalone element defs
|
||||
*/
|
||||
@ -137,6 +150,7 @@ class HTMLPurifier_ElementDef
|
||||
$this->child = false;
|
||||
}
|
||||
if(!is_null($def->child)) $this->child = $def->child;
|
||||
if(!is_null($def->formatting)) $this->formatting = $def->formatting;
|
||||
if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
|
||||
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
||||
// 'type' => 'ContentType',
|
||||
)
|
||||
);
|
||||
$a->formatting = true;
|
||||
$a->excludes = array('a' => true);
|
||||
}
|
||||
|
||||
|
@ -41,9 +41,15 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||
$this->addElement('menu', 'Block', 'Required: li', 'Common', array(
|
||||
'compact' => 'Bool#compact'
|
||||
));
|
||||
$this->addElement('s', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('strike', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('u', 'Inline', 'Inline', 'Common');
|
||||
|
||||
$s = $this->addElement('s', 'Inline', 'Inline', 'Common');
|
||||
$s->formatting = true;
|
||||
|
||||
$strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
|
||||
$strike->formatting = true;
|
||||
|
||||
$u = $this->addElement('u', 'Inline', 'Inline', 'Common');
|
||||
$u->formatting = true;
|
||||
|
||||
// setup modifications to old elements
|
||||
|
||||
|
@ -16,14 +16,19 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
|
||||
public $name = 'Presentation';
|
||||
|
||||
public function setup($config) {
|
||||
$this->addElement('b', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('big', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('hr', 'Block', 'Empty', 'Common');
|
||||
$this->addElement('i', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('small', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('sub', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('sup', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('tt', 'Inline', 'Inline', 'Common');
|
||||
$b = $this->addElement('b', 'Inline', 'Inline', 'Common');
|
||||
$b->formatting = true;
|
||||
$big = $this->addElement('big', 'Inline', 'Inline', 'Common');
|
||||
$big->formatting = true;
|
||||
$i = $this->addElement('i', 'Inline', 'Inline', 'Common');
|
||||
$i->formatting = true;
|
||||
$small = $this->addElement('small', 'Inline', 'Inline', 'Common');
|
||||
$small->formatting = true;
|
||||
$tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
|
||||
$tt->formatting = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -26,15 +26,21 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||
$this->addElement('abbr', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('acronym', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('cite', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('code', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('dfn', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('em', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('kbd', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
|
||||
$this->addElement('samp', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('strong', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('var', 'Inline', 'Inline', 'Common');
|
||||
|
||||
$em = $this->addElement('em', 'Inline', 'Inline', 'Common');
|
||||
$em->formatting = true;
|
||||
|
||||
$strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
|
||||
$strong->formatting = true;
|
||||
|
||||
$code = $this->addElement('code', 'Inline', 'Inline', 'Common');
|
||||
$code->formatting = true;
|
||||
|
||||
// Inline Structural ----------------------------------------------
|
||||
$this->addElement('span', 'Inline', 'Inline', 'Common');
|
||||
$this->addElement('br', 'Inline', 'Empty', 'Core');
|
||||
@ -53,7 +59,9 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||
$this->addElement('h6', 'Heading', 'Inline', 'Common');
|
||||
|
||||
// Block Structural -----------------------------------------------
|
||||
$this->addElement('p', 'Block', 'Inline', 'Common');
|
||||
$p = $this->addElement('p', 'Block', 'Inline', 'Common');
|
||||
$p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul"));
|
||||
|
||||
$this->addElement('div', 'Block', 'Flow', 'Common');
|
||||
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ $messages = array(
|
||||
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
|
||||
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
|
||||
'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact',
|
||||
'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact',
|
||||
'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed',
|
||||
'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text',
|
||||
'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized',
|
||||
|
@ -159,12 +159,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
continue;
|
||||
}
|
||||
|
||||
// if all goes well, this token will be passed through unharmed
|
||||
$token = $tokens[$t];
|
||||
|
||||
//echo '<hr>';
|
||||
//printTokens($tokens, $t);
|
||||
//var_dump($this->stack);
|
||||
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
|
||||
|
||||
// quick-check: if it's not a tag, no need to process
|
||||
if (empty($token->is_tag)) {
|
||||
@ -214,18 +211,36 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$this->stack[] = $parent;
|
||||
|
||||
if (isset($definition->info[$parent->name])) {
|
||||
$elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config);
|
||||
$elements = $definition->info[$parent->name]->child->getAllowedElements($config);
|
||||
$autoclose = !isset($elements[$token->name]);
|
||||
} else {
|
||||
$autoclose = false;
|
||||
}
|
||||
|
||||
$carryover = false;
|
||||
if ($autoclose && $definition->info[$parent->name]->formatting) {
|
||||
$carryover = true;
|
||||
}
|
||||
|
||||
if ($autoclose) {
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
// insert parent end tag before this tag
|
||||
// errors need to be updated
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
$this->insertBefore($new_token);
|
||||
if ($carryover) {
|
||||
$element = clone $parent;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$element->carryover = true;
|
||||
$this->processToken(array($new_token, $token, $element));
|
||||
} else {
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||
if (!$carryover) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
} else {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||
}
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
@ -339,12 +354,20 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
// insert tags, in FORWARD $j order: c,b,a with </a></b></c>
|
||||
$replace = array($token);
|
||||
for ($j = 1; $j < $c; $j++) {
|
||||
// ...as well as from the insertions
|
||||
$new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
|
||||
$new_token->start = $skipped_tags[$j];
|
||||
$this->insertBefore($new_token);
|
||||
array_unshift($replace, $new_token);
|
||||
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
|
||||
$element = clone $skipped_tags[$j];
|
||||
$element->carryover = true;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$replace[] = $element;
|
||||
}
|
||||
}
|
||||
$this->processToken($replace);
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -11,6 +11,9 @@
|
||||
* Thanks to
|
||||
* http://style.cleverchimp.com/font_size_intervals/altintervals.html
|
||||
* for reasonable mappings.
|
||||
* @warning This doesn't work completely correctly; specifically, this
|
||||
* TagTransform operates before well-formedness is enforced, so
|
||||
* the "active formatting elements" algorithm doesn't get applied.
|
||||
*/
|
||||
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
||||
{
|
||||
|
@ -19,6 +19,7 @@ class HTMLPurifier_Token {
|
||||
*/
|
||||
public $skip;
|
||||
public $rewind;
|
||||
public $carryover;
|
||||
|
||||
public function __get($n) {
|
||||
if ($n === 'type') {
|
||||
|
5
tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt
Normal file
5
tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt
Normal file
@ -0,0 +1,5 @@
|
||||
--HTML--
|
||||
<a href="foo.html"><h1>Foobar</h1></a>
|
||||
--EXPECT--
|
||||
<a href="foo.html"></a><h1><a href="foo.html">Foobar</a></h1><a href="foo.html"></a>
|
||||
--# vim: et sw=4 sts=4
|
@ -6,8 +6,8 @@
|
||||
class HTMLPurifier_Harness extends UnitTestCase
|
||||
{
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
public function __construct($name = null) {
|
||||
parent::__construct($name);
|
||||
}
|
||||
|
||||
protected $config, $context, $purifier;
|
||||
|
@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
|
||||
function testFixNesting() {
|
||||
$this->assertResult(
|
||||
'<b><div>Fix nesting.</div></b>',
|
||||
'<b></b><div>Fix nesting.</div>'
|
||||
'<b></b><div><b>Fix nesting.</b></div><b></b>'
|
||||
);
|
||||
}
|
||||
|
||||
@ -36,7 +36,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
|
||||
function testFirstThree() {
|
||||
$this->assertResult(
|
||||
'<foo><b><div>All three.</div></b>',
|
||||
'<b></b><div>All three.</div>'
|
||||
'<b></b><div><b>All three.</b></div><b></b>'
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_EndInsertInjectorTest extends HTMLPur
|
||||
$this->assertResult('<i><i>Foo', '<i><i>Foo<b>Comment</b></i><b>Comment</b></i>');
|
||||
}
|
||||
function testEndOfNodeProcessing() {
|
||||
$this->assertResult('<div><i>Foo</div>', '<div><i>Foo<b>Comment</b></i></div>');
|
||||
$this->assertResult('<div><i>Foo</div>asdf', '<div><i>Foo<b>Comment</b></i></div><i>asdf<b>Comment</b></i>');
|
||||
}
|
||||
function testEmptyToStartEndProcessing() {
|
||||
$this->assertResult('<i />', '<i><b>Comment</b></i>');
|
||||
|
@ -26,7 +26,7 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
|
||||
function testUnclosedTagTerminatedByParentNodeEnd() {
|
||||
$this->assertResult(
|
||||
'<b><i>Bold and italic?</b>',
|
||||
'<b><i>Bold and italic?</i></b>'
|
||||
'<b><i>Bold and italic?</i></b><i></i>'
|
||||
);
|
||||
}
|
||||
|
||||
@ -81,8 +81,8 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
|
||||
|
||||
function testAutoCloseMultiple() {
|
||||
$this->assertResult(
|
||||
'<span><span><div></div>',
|
||||
'<span><span></span></span><div></div>'
|
||||
'<b><span><div></div>asdf',
|
||||
'<b><span></span></b><div><b></b></div><b>asdf</b>'
|
||||
);
|
||||
}
|
||||
|
||||
@ -102,6 +102,20 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
|
||||
);
|
||||
}
|
||||
|
||||
function testLongCarryOver() {
|
||||
$this->assertResult(
|
||||
'<b>asdf<div>asdf<i>df</i></div>asdf</b>',
|
||||
'<b>asdf</b><div><b>asdf<i>df</i></b></div><b>asdf</b>'
|
||||
);
|
||||
}
|
||||
|
||||
function testInterleaved() {
|
||||
$this->assertResult(
|
||||
'<u>foo<i>bar</u>baz</i>',
|
||||
'<u>foo<i>bar</i></u><i>baz</i>'
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@ -20,8 +20,15 @@ class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strat
|
||||
$this->invoke('</b>');
|
||||
}
|
||||
|
||||
function testTagAutoClosed() {
|
||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('b', array(), 1, 0));
|
||||
function testTagAutoclose() {
|
||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('p', array(), 1, 0));
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
|
||||
$this->invoke('<p>Foo<div>Bar</div>');
|
||||
}
|
||||
|
||||
function testTagCarryOver() {
|
||||
$b = new HTMLPurifier_Token_Start('b', array(), 1, 0);
|
||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $b);
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6));
|
||||
$this->invoke('<b>Foo<div>Bar</div>');
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user