From c8b020879d458a09aaadf32eb741d1f81a0604ca Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 27 Sep 2007 00:39:05 +0000 Subject: [PATCH] [2.1.3] Refine injector algorithm regarding behavior inside nodes that allow paragraphs inside them git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1423 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 3 ++ .../HTMLPurifier/Injector/AutoParagraph.php | 36 +++++++++++++------ .../HTMLPurifier/Strategy/MakeWellFormed.php | 2 ++ .../Injector/AutoParagraphTest.php | 33 +++++++++++++---- 4 files changed, 57 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index 86520f65..2dce9d37 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier has 'id' attribute fixed, thanks NykO18 for reporting - Fix warning emitted when a non-supported URI scheme is passed to the MakeAbsolute URIFilter, thanks NykO18 (again) +- Further refine AutoParagraph injector. Behavior inside of elements + allowing paragraph tags clarified: only inline content delimeted by + double newlines (not block elements) are paragraphed. . %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment to better communicate its purpose diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index 6e0a6a3e..56a6a268 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define( 'AutoFormat', 'AutoParagraph', false, 'bool', '

This directive turns on auto-paragraphing, where double newlines are - converted in to paragraphs whenever possible. Auto-paragraphing - applies when: + converted in to paragraphs whenever possible. Auto-paragraphing:

p tags must be allowed for this directive to take effect. We do not use br tags for paragraphing, as that is semantically incorrect.

+

+ To prevent auto-paragraphing as a content-producer, refrain from using + double-newlines except to specify a new paragraph or in contexts where + it has special meaning (whitespace usually has no meaning except in + tags like pre, so this should not be difficult.) To prevent + the paragraphing of inline text adjacent to block elements, wrap them + in div tags (the behavior is slightly different outside of + the root node.) +

This directive has been available since 2.0.1.

@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector $ok = false; // test if up-coming tokens are either block or have // a double newline in them + $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i]->type == 'start'){ if (!$this->_isInline($this->inputTokens[$i])) { - $ok = true; + // we haven't found a double-newline, and + // we've hit a block element, so don't paragraph + $ok = false; + break; } - break; + $nesting++; + } + if ($this->inputTokens[$i]->type == 'end') { + if ($nesting <= 0) break; + $nesting--; } - if ($this->inputTokens[$i]->type == 'end') break; if ($this->inputTokens[$i]->type == 'text') { + // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; + break; } - if (!$this->inputTokens[$i]->is_whitespace) break; } } if ($ok) { diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index b8173f6d..930bfca1 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -105,6 +105,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; + //printTokens($tokens, $this->inputIndex); + foreach ($this->injectors as $i => $x) { if ($x->skip > 0) $this->injectors[$i]->skip--; } diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php index 23743dff..5c726a11 100644 --- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php +++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php @@ -194,10 +194,7 @@ Bar

', } function testNoParagraphSingleInlineNodeInBlockNode() { - $this->assertResult( -'
Foo
', - '
Foo
' - ); + $this->assertResult( '
Foo
' ); } function testParagraphInBlockquote() { @@ -277,9 +274,7 @@ Par1 function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() { $this->assertResult( '
Par1 -
Par2
', -'

Par1 -

Par2
' +
Par2
' ); } @@ -351,6 +346,30 @@ Par2' ); } + function testInlineAndBlockTagInDivNoParagraph() { + $this->assertResult( + '
bar mmm
asdf
' + ); + } + + function testInlineAndBlockTagInDivNeedingParagraph() { + $this->assertResult( +'
bar mmm + +
asdf
', +'

bar mmm

asdf
' + ); + } + + function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() { + $this->assertResult( +'
asdf bar mmm + +
asdf
', +'

asdf bar mmm

asdf
' + ); + } + function testErrorNeeded() { $this->config->set('HTML', 'Allowed', 'b'); $this->expectError('Cannot enable AutoParagraph injector because p is not allowed');