From c8b020879d458a09aaadf32eb741d1f81a0604ca Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Thu, 27 Sep 2007 00:39:05 +0000
Subject: [PATCH] [2.1.3] Refine injector algorithm regarding behavior inside
nodes that allow paragraphs inside them
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1423 48356398-32a2-884e-a903-53898d9a118a
---
NEWS | 3 ++
.../HTMLPurifier/Injector/AutoParagraph.php | 36 +++++++++++++------
.../HTMLPurifier/Strategy/MakeWellFormed.php | 2 ++
.../Injector/AutoParagraphTest.php | 33 +++++++++++++----
4 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/NEWS b/NEWS
index 86520f65..2dce9d37 100644
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
has 'id' attribute fixed, thanks NykO18 for reporting
- Fix warning emitted when a non-supported URI scheme is passed to the
MakeAbsolute URIFilter, thanks NykO18 (again)
+- Further refine AutoParagraph injector. Behavior inside of elements
+ allowing paragraph tags clarified: only inline content delimeted by
+ double newlines (not block elements) are paragraphed.
. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
to better communicate its purpose
diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php
index 6e0a6a3e..56a6a268 100644
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'AutoParagraph', false, 'bool', '
This directive turns on auto-paragraphing, where double newlines are
- converted in to paragraphs whenever possible. Auto-paragraphing
- applies when:
+ converted in to paragraphs whenever possible. Auto-paragraphing:
- - There are inline elements or text in the root node
- - There are inline elements or text with double newlines or
- block elements in nodes that allow paragraph tags
- - There are double newlines in paragraph tags
+ - Always applies to inline elements or text in the root node,
+ - Applies to inline elements or text with double newlines in nodes
+ that allow paragraph tags,
+ - Applies to double newlines in paragraph tags
p
tags must be allowed for this directive to take effect.
We do not use br
tags for paragraphing, as that is
semantically incorrect.
+
+ To prevent auto-paragraphing as a content-producer, refrain from using
+ double-newlines except to specify a new paragraph or in contexts where
+ it has special meaning (whitespace usually has no meaning except in
+ tags like pre
, so this should not be difficult.) To prevent
+ the paragraphing of inline text adjacent to block elements, wrap them
+ in div
tags (the behavior is slightly different outside of
+ the root node.)
+
This directive has been available since 2.0.1.
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$ok = false;
// test if up-coming tokens are either block or have
// a double newline in them
+ $nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start'){
if (!$this->_isInline($this->inputTokens[$i])) {
- $ok = true;
+ // we haven't found a double-newline, and
+ // we've hit a block element, so don't paragraph
+ $ok = false;
+ break;
}
- break;
+ $nesting++;
+ }
+ if ($this->inputTokens[$i]->type == 'end') {
+ if ($nesting <= 0) break;
+ $nesting--;
}
- if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') {
+ // found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
+ break;
}
- if (!$this->inputTokens[$i]->is_whitespace) break;
}
}
if ($ok) {
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index b8173f6d..930bfca1 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -105,6 +105,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex];
+ //printTokens($tokens, $this->inputIndex);
+
foreach ($this->injectors as $i => $x) {
if ($x->skip > 0) $this->injectors[$i]->skip--;
}
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 23743dff..5c726a11 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -194,10 +194,7 @@ Bar
',
}
function testNoParagraphSingleInlineNodeInBlockNode() {
- $this->assertResult(
-'Foo
',
- 'Foo
'
- );
+ $this->assertResult( 'Foo
' );
}
function testParagraphInBlockquote() {
@@ -277,9 +274,7 @@ Par1
function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() {
$this->assertResult(
'',
-''
+Par2
'
);
}
@@ -351,6 +346,30 @@ Par2'
);
}
+ function testInlineAndBlockTagInDivNoParagraph() {
+ $this->assertResult(
+ ''
+ );
+ }
+
+ function testInlineAndBlockTagInDivNeedingParagraph() {
+ $this->assertResult(
+'',
+''
+ );
+ }
+
+ function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() {
+ $this->assertResult(
+'',
+''
+ );
+ }
+
function testErrorNeeded() {
$this->config->set('HTML', 'Allowed', 'b');
$this->expectError('Cannot enable AutoParagraph injector because p is not allowed');