From bfe474042f191abc87c49a8a373c39fc3b449833 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 20 Dec 2008 13:06:00 -0500 Subject: [PATCH] Implement "carryover" functionality, requested by Kinderlehrer This commit is a limited implementation of the "active formatting elements" algorithm implemented in HTML5, which preserves certain formatting elements such as and when exiting or entering nodes. Signed-off-by: Edward Z. Yang --- library/HTMLPurifier/ChildDef.php | 2 +- .../ChildDef/StrictBlockquote.php | 2 +- library/HTMLPurifier/ElementDef.php | 14 +++++++ library/HTMLPurifier/HTMLModule/Hypertext.php | 1 + library/HTMLPurifier/HTMLModule/Legacy.php | 12 ++++-- .../HTMLPurifier/HTMLModule/Presentation.php | 15 ++++--- library/HTMLPurifier/HTMLModule/Text.php | 16 ++++++-- library/HTMLPurifier/Language/messages/en.php | 1 + .../HTMLPurifier/Strategy/MakeWellFormed.php | 41 +++++++++++++++---- library/HTMLPurifier/TagTransform/Font.php | 3 ++ library/HTMLPurifier/Token.php | 1 + .../HTMLT/inline-wraps-block.htmlt | 5 +++ tests/HTMLPurifier/Harness.php | 4 +- tests/HTMLPurifier/Strategy/CoreTest.php | 4 +- .../MakeWellFormed/EndInsertInjectorTest.php | 2 +- .../Strategy/MakeWellFormedTest.php | 20 +++++++-- .../Strategy/MakeWellFormed_ErrorsTest.php | 11 ++++- 17 files changed, 121 insertions(+), 33 deletions(-) create mode 100644 tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt diff --git a/library/HTMLPurifier/ChildDef.php b/library/HTMLPurifier/ChildDef.php index ccb6a21d..c5d5216d 100644 --- a/library/HTMLPurifier/ChildDef.php +++ b/library/HTMLPurifier/ChildDef.php @@ -28,7 +28,7 @@ abstract class HTMLPurifier_ChildDef * Get lookup of tag names that should not close this element automatically. * All other elements will do so. */ - public function getNonAutoCloseElements($config) { + public function getAllowedElements($config) { return $this->elements; } diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php index 913e32c1..dfae8a6e 100644 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -15,7 +15,7 @@ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Requi * @note We don't want MakeWellFormed to auto-close inline elements since * they might be allowed. */ - public function getNonAutoCloseElements($config) { + public function getAllowedElements($config) { $this->init($config); return $this->fake_elements; } diff --git a/library/HTMLPurifier/ElementDef.php b/library/HTMLPurifier/ElementDef.php index 68b50b95..b55c7bd7 100644 --- a/library/HTMLPurifier/ElementDef.php +++ b/library/HTMLPurifier/ElementDef.php @@ -5,6 +5,8 @@ * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule. * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition. * Please update that class too. + * @warning If you add new properties to this class, you MUST update + * the mergeIn() method. */ class HTMLPurifier_ElementDef { @@ -90,6 +92,17 @@ class HTMLPurifier_ElementDef */ public $excludes = array(); + /** + * This tag is explicitly auto-closed by the following tags. + */ + public $autoclose = array(); + + /** + * Whether or not this is a formatting element affected by the + * "Active Formatting Elements" algorithm. + */ + public $formatting; + /** * Low-level factory constructor for creating new standalone element defs */ @@ -137,6 +150,7 @@ class HTMLPurifier_ElementDef $this->child = false; } if(!is_null($def->child)) $this->child = $def->child; + if(!is_null($def->formatting)) $this->formatting = $def->formatting; if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; } diff --git a/library/HTMLPurifier/HTMLModule/Hypertext.php b/library/HTMLPurifier/HTMLModule/Hypertext.php index 2e1088a1..d7e9bdd2 100644 --- a/library/HTMLPurifier/HTMLModule/Hypertext.php +++ b/library/HTMLPurifier/HTMLModule/Hypertext.php @@ -22,6 +22,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule // 'type' => 'ContentType', ) ); + $a->formatting = true; $a->excludes = array('a' => true); } diff --git a/library/HTMLPurifier/HTMLModule/Legacy.php b/library/HTMLPurifier/HTMLModule/Legacy.php index e888b765..df33927b 100644 --- a/library/HTMLPurifier/HTMLModule/Legacy.php +++ b/library/HTMLPurifier/HTMLModule/Legacy.php @@ -41,9 +41,15 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule $this->addElement('menu', 'Block', 'Required: li', 'Common', array( 'compact' => 'Bool#compact' )); - $this->addElement('s', 'Inline', 'Inline', 'Common'); - $this->addElement('strike', 'Inline', 'Inline', 'Common'); - $this->addElement('u', 'Inline', 'Inline', 'Common'); + + $s = $this->addElement('s', 'Inline', 'Inline', 'Common'); + $s->formatting = true; + + $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common'); + $strike->formatting = true; + + $u = $this->addElement('u', 'Inline', 'Inline', 'Common'); + $u->formatting = true; // setup modifications to old elements diff --git a/library/HTMLPurifier/HTMLModule/Presentation.php b/library/HTMLPurifier/HTMLModule/Presentation.php index 89b9ad0c..8ff0b5ed 100644 --- a/library/HTMLPurifier/HTMLModule/Presentation.php +++ b/library/HTMLPurifier/HTMLModule/Presentation.php @@ -16,14 +16,19 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule public $name = 'Presentation'; public function setup($config) { - $this->addElement('b', 'Inline', 'Inline', 'Common'); - $this->addElement('big', 'Inline', 'Inline', 'Common'); $this->addElement('hr', 'Block', 'Empty', 'Common'); - $this->addElement('i', 'Inline', 'Inline', 'Common'); - $this->addElement('small', 'Inline', 'Inline', 'Common'); $this->addElement('sub', 'Inline', 'Inline', 'Common'); $this->addElement('sup', 'Inline', 'Inline', 'Common'); - $this->addElement('tt', 'Inline', 'Inline', 'Common'); + $b = $this->addElement('b', 'Inline', 'Inline', 'Common'); + $b->formatting = true; + $big = $this->addElement('big', 'Inline', 'Inline', 'Common'); + $big->formatting = true; + $i = $this->addElement('i', 'Inline', 'Inline', 'Common'); + $i->formatting = true; + $small = $this->addElement('small', 'Inline', 'Inline', 'Common'); + $small->formatting = true; + $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common'); + $tt->formatting = true; } } diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php index 884c9e14..ae77c718 100644 --- a/library/HTMLPurifier/HTMLModule/Text.php +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -26,15 +26,21 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule $this->addElement('abbr', 'Inline', 'Inline', 'Common'); $this->addElement('acronym', 'Inline', 'Inline', 'Common'); $this->addElement('cite', 'Inline', 'Inline', 'Common'); - $this->addElement('code', 'Inline', 'Inline', 'Common'); $this->addElement('dfn', 'Inline', 'Inline', 'Common'); - $this->addElement('em', 'Inline', 'Inline', 'Common'); $this->addElement('kbd', 'Inline', 'Inline', 'Common'); $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI')); $this->addElement('samp', 'Inline', 'Inline', 'Common'); - $this->addElement('strong', 'Inline', 'Inline', 'Common'); $this->addElement('var', 'Inline', 'Inline', 'Common'); + $em = $this->addElement('em', 'Inline', 'Inline', 'Common'); + $em->formatting = true; + + $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common'); + $strong->formatting = true; + + $code = $this->addElement('code', 'Inline', 'Inline', 'Common'); + $code->formatting = true; + // Inline Structural ---------------------------------------------- $this->addElement('span', 'Inline', 'Inline', 'Common'); $this->addElement('br', 'Inline', 'Empty', 'Core'); @@ -53,7 +59,9 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule $this->addElement('h6', 'Heading', 'Inline', 'Common'); // Block Structural ----------------------------------------------- - $this->addElement('p', 'Block', 'Inline', 'Common'); + $p = $this->addElement('p', 'Block', 'Inline', 'Common'); + $p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")); + $this->addElement('div', 'Block', 'Flow', 'Common'); } diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index 04496af9..aab2e52e 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -37,6 +37,7 @@ $messages = array( 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', +'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact', 'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index e04b8265..0ec811f8 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -159,12 +159,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy continue; } - // if all goes well, this token will be passed through unharmed $token = $tokens[$t]; - //echo '
'; - //printTokens($tokens, $t); - //var_dump($this->stack); + //echo '
'; printTokens($tokens, $t); printTokens($this->stack); // quick-check: if it's not a tag, no need to process if (empty($token->is_tag)) { @@ -214,18 +211,36 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->stack[] = $parent; if (isset($definition->info[$parent->name])) { - $elements = $definition->info[$parent->name]->child->getNonAutoCloseElements($config); + $elements = $definition->info[$parent->name]->child->getAllowedElements($config); $autoclose = !isset($elements[$token->name]); } else { $autoclose = false; } + $carryover = false; + if ($autoclose && $definition->info[$parent->name]->formatting) { + $carryover = true; + } + if ($autoclose) { - if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); - // insert parent end tag before this tag + // errors need to be updated $new_token = new HTMLPurifier_Token_End($parent->name); $new_token->start = $parent; - $this->insertBefore($new_token); + if ($carryover) { + $element = clone $parent; + $element->armor['MakeWellFormed_TagClosedError'] = true; + $element->carryover = true; + $this->processToken(array($new_token, $token, $element)); + } else { + $this->insertBefore($new_token); + } + if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { + if (!$carryover) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); + } else { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + } + } $reprocess = true; continue; } @@ -339,12 +354,20 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } // insert tags, in FORWARD $j order: c,b,a with
+ $replace = array($token); for ($j = 1; $j < $c; $j++) { // ...as well as from the insertions $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); $new_token->start = $skipped_tags[$j]; - $this->insertBefore($new_token); + array_unshift($replace, $new_token); + if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { + $element = clone $skipped_tags[$j]; + $element->carryover = true; + $element->armor['MakeWellFormed_TagClosedError'] = true; + $replace[] = $element; + } } + $this->processToken($replace); $reprocess = true; continue; } diff --git a/library/HTMLPurifier/TagTransform/Font.php b/library/HTMLPurifier/TagTransform/Font.php index 0822a9f5..ed246378 100644 --- a/library/HTMLPurifier/TagTransform/Font.php +++ b/library/HTMLPurifier/TagTransform/Font.php @@ -11,6 +11,9 @@ * Thanks to * http://style.cleverchimp.com/font_size_intervals/altintervals.html * for reasonable mappings. + * @warning This doesn't work completely correctly; specifically, this + * TagTransform operates before well-formedness is enforced, so + * the "active formatting elements" algorithm doesn't get applied. */ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform { diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php index a96a1eb1..7900e6cb 100644 --- a/library/HTMLPurifier/Token.php +++ b/library/HTMLPurifier/Token.php @@ -19,6 +19,7 @@ class HTMLPurifier_Token { */ public $skip; public $rewind; + public $carryover; public function __get($n) { if ($n === 'type') { diff --git a/tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt b/tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt new file mode 100644 index 00000000..da6bae68 --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/inline-wraps-block.htmlt @@ -0,0 +1,5 @@ +--HTML-- +

Foobar

+--EXPECT-- +

Foobar

+--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php index 56088c15..fc484bfd 100644 --- a/tests/HTMLPurifier/Harness.php +++ b/tests/HTMLPurifier/Harness.php @@ -6,8 +6,8 @@ class HTMLPurifier_Harness extends UnitTestCase { - public function __construct() { - parent::__construct(); + public function __construct($name = null) { + parent::__construct($name); } protected $config, $context, $purifier; diff --git a/tests/HTMLPurifier/Strategy/CoreTest.php b/tests/HTMLPurifier/Strategy/CoreTest.php index 96b69930..9bca5f60 100644 --- a/tests/HTMLPurifier/Strategy/CoreTest.php +++ b/tests/HTMLPurifier/Strategy/CoreTest.php @@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness function testFixNesting() { $this->assertResult( '
Fix nesting.
', - '
Fix nesting.
' + '
Fix nesting.
' ); } @@ -36,7 +36,7 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness function testFirstThree() { $this->assertResult( '
All three.
', - '
All three.
' + '
All three.
' ); } diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed/EndInsertInjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed/EndInsertInjectorTest.php index 7e62b721..5b77420e 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormed/EndInsertInjectorTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormed/EndInsertInjectorTest.php @@ -22,7 +22,7 @@ class HTMLPurifier_Strategy_MakeWellFormed_EndInsertInjectorTest extends HTMLPur $this->assertResult('Foo', 'FooCommentComment'); } function testEndOfNodeProcessing() { - $this->assertResult('
Foo
', '
FooComment
'); + $this->assertResult('
Foo
asdf', '
FooComment
asdfComment'); } function testEmptyToStartEndProcessing() { $this->assertResult('', 'Comment'); diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index 3d6892d2..c3326c9f 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -26,7 +26,7 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn function testUnclosedTagTerminatedByParentNodeEnd() { $this->assertResult( 'Bold and italic?', - 'Bold and italic?' + 'Bold and italic?' ); } @@ -81,8 +81,8 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn function testAutoCloseMultiple() { $this->assertResult( - '
', - '
' + '
asdf', + '
asdf' ); } @@ -102,6 +102,20 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn ); } + function testLongCarryOver() { + $this->assertResult( + 'asdf
asdfdf
asdf
', + 'asdf
asdfdf
asdf' + ); + } + + function testInterleaved() { + $this->assertResult( + 'foobarbaz', + 'foobarbaz' + ); + } + } // vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php index 5a63e3ea..bf917da2 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php @@ -20,8 +20,15 @@ class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strat $this->invoke('
'); } - function testTagAutoClosed() { - $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('b', array(), 1, 0)); + function testTagAutoclose() { + $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', new HTMLPurifier_Token_Start('p', array(), 1, 0)); + $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6)); + $this->invoke('

Foo

Bar
'); + } + + function testTagCarryOver() { + $b = new HTMLPurifier_Token_Start('b', array(), 1, 0); + $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $b); $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('div', array(), 1, 6)); $this->invoke('Foo
Bar
'); }