From 3c734b4c72980a4341912bb4429b69773f5017b1 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Tue, 26 Jun 2007 15:07:07 +0000
Subject: [PATCH] [2.0.1] Implement error messages for MakeWellFormed. Armor
AutoParagraph generated p start tags from these tag closing errors. Fix
another auto-paragraphing edge-case. Create common Strategy error harness.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1242 48356398-32a2-884e-a903-53898d9a118a
---
.../HTMLPurifier/Injector/AutoParagraph.php | 64 ++++++++++++-------
library/HTMLPurifier/Language/messages/en.php | 14 ++--
.../HTMLPurifier/Strategy/MakeWellFormed.php | 22 ++++++-
.../Strategy/RemoveForeignElements.php | 4 +-
library/HTMLPurifier/Token.php | 3 +-
.../Injector/AutoParagraphTest.php | 7 ++
tests/HTMLPurifier/Strategy/ErrorsHarness.php | 20 ++++++
.../Strategy/MakeWellFormed_ErrorsTest.php | 52 +++++++++++++++
.../RemoveForeignElements_ErrorsTest.php | 13 ++--
tests/test_files.php | 1 +
10 files changed, 162 insertions(+), 38 deletions(-)
create mode 100644 tests/HTMLPurifier/Strategy/ErrorsHarness.php
create mode 100644 tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php
diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php
index e2b17ba2..919785ae 100644
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@@ -27,6 +27,12 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
{
+ function _pStart() {
+ $par = new HTMLPurifier_Token_Start('p');
+ $par->armor['MakeWellFormed_TagClosedError'] = true;
+ return $par;
+ }
+
function handleText(&$token) {
$text = $token->data;
if (empty($this->currentNesting)) {
@@ -42,7 +48,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// case 3: we're in an element that allows paragraphs
if (strpos($text, "\n\n") !== false) {
// case 3.1: this text node has a double-newline
- $token = array(new HTMLPurifier_Token_Start('p'));
+ $token = array($this->_pStart());
$this->_splitText($text, $token);
} else {
$ok = false;
@@ -66,7 +72,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if ($ok) {
// case 3.2: this text node is next to another node
// that will start a paragraph
- $token = array(new HTMLPurifier_Token_Start('p'), $token);
+ $token = array($this->_pStart(), $token);
}
}
}
@@ -87,7 +93,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// not adjacent, we can abort early
// add lead paragraph tag if our token is inline
if ($this->_isInline($token)) {
- $token = array(new HTMLPurifier_Token_Start('p'), $token);
+ $token = array($this->_pStart(), $token);
}
return;
}
@@ -112,7 +118,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if ($j <= 0) break;
}
if ($ok) {
- $token = array(new HTMLPurifier_Token_Start('p'), $token);
+ $token = array($this->_pStart(), $token);
}
}
return;
@@ -122,7 +128,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if (!$this->_isInline($token)) return;
// append a paragraph tag before the token
- $token = array(new HTMLPurifier_Token_Start('p'), $token);
+ $token = array($this->_pStart(), $token);
}
/**
@@ -142,11 +148,15 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// remove empty paragraphs
$paragraphs = array();
$needs_start = false;
- $first = true;
- foreach ($raw_paragraphs as $par) {
+ $needs_end = false;
+
+ for ($i = 0, $c = count($raw_paragraphs); $i < $c; $i++) {
+ $par = $raw_paragraphs[$i];
if (trim($par) !== '') {
- $paragraphs[] = $par;
- } elseif (empty($result) && $first) {
+ $paragraphs[] = $par;
+ continue;
+ }
+ if ($i == 0 && empty($result)) {
// The empty result indicates that the AutoParagraph
// injector did not add any start paragraph tokens.
// The fact that the first paragraph is empty indicates
@@ -161,8 +171,13 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// next start paragraph tag will be handled by the
// next run-around the injector
$needs_start = true;
+ } elseif ($i + 1 == $c) {
+ // a double-paragraph at the end indicates that
+ // there is an overriding need to start a new paragraph
+ // for the next section. This has no effect until
+ // we've processed all of the other paragraphs though
+ $needs_end = true;
}
- $first = false;
}
// check if there are no "real" paragraphs to be processed
@@ -173,13 +188,13 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// add a start tag if an end tag was added while processing
// the raw paragraphs (that happens if there's a leading double
// newline)
- if ($needs_start) $result[] = new HTMLPurifier_Token_Start('p');
+ if ($needs_start) $result[] = $this->_pStart();
// append the paragraphs onto the result
foreach ($paragraphs as $par) {
$result[] = new HTMLPurifier_Token_Text($par);
$result[] = new HTMLPurifier_Token_End('p');
- $result[] = new HTMLPurifier_Token_Start('p');
+ $result[] = $this->_pStart();
}
// remove trailing start token, if one is needed, it will
@@ -190,19 +205,22 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// end paragraph tag should be removed. It should be removed
// unless the next non-whitespace token is a paragraph
// or a block element.
-
$remove_paragraph_end = true;
- // Start of the checks one after the current token's index
- for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
- if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') {
- $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
- break;
+
+ if (!$needs_end) {
+ // Start of the checks one after the current token's index
+ for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
+ if ($this->inputTokens[$i]->type == 'start' || $this->inputTokens[$i]->type == 'empty') {
+ $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
+ }
+ // check if we can abort early (whitespace means we carry-on!)
+ if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break;
+ // end tags will automatically be handled by MakeWellFormed,
+ // so we don't have to worry about them
+ if ($this->inputTokens[$i]->type == 'end') break;
}
- // check if we can abort early (whitespace means we carry-on!)
- if ($this->inputTokens[$i]->type == 'text' && !$this->inputTokens[$i]->is_whitespace) break;
- // end tags will automatically be handled by MakeWellFormed,
- // so we don't have to worry about them
- if ($this->inputTokens[$i]->type == 'end') break;
+ } else {
+ $remove_paragraph_end = false;
}
// check the outside to determine whether or not the
diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php
index 6634556a..fa4736c6 100644
--- a/library/HTMLPurifier/Language/messages/en.php
+++ b/library/HTMLPurifier/Language/messages/en.php
@@ -16,14 +16,20 @@ $messages = array(
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
-'Strategy_RemoveForeignElements: Tag transform' => '$1 element transformed into $CurrentToken.Serialized',
-'Strategy_RemoveForeignElements: Missing required attribute' => '$1 element missing required attribute $2',
-'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $1 element converted to text',
-'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $1 element removed',
+'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
+'Strategy_RemoveForeignElements: Missing required attribute' => '<$1> element missing required attribute $2',
+'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text',
+'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$1" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Inline scripting removed',
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
+'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $1> tag removed',
+'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $1> tag converted to text',
+'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $1> tag removed',
+'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $1> tag converted to text',
+'Strategy_MakeWellFormed: Tag closed by element end' => '<$1> tag closed by end of $CurrentToken.Serialized',
+'Strategy_MakeWellFormed: Tag closed by document end' => '<$1> tag closed by end of document',
);
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index 1783e722..834d6b10 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -56,6 +56,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$generator = new HTMLPurifier_Generator();
+ $e =& $context->get('ErrorCollector', true);
+
// -- begin INJECTOR --
$this->injectors = array();
@@ -90,6 +92,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// -- end INJECTOR --
+ $token = false;
+ $context->register('CurrentToken', $token);
+
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
// if all goes well, this token will be passed through unharmed
@@ -177,9 +182,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// make sure that we have something open
if (empty($this->currentNesting)) {
if ($escape_invalid_tags) {
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text', $token->name);
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
+ } elseif ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed', $token->name);
}
continue;
}
@@ -215,6 +223,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
+ if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text', $token->name);
+ } elseif ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed', $token->name);
}
continue;
}
@@ -222,10 +233,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
$size = count($skipped_tags);
- for ($i = $size - 1; $i >= 0; $i--) {
+ for ($i = $size - 1; $i > 0; $i--) {
+ if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]->name);
+ }
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
+ $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+
}
// we're at the end now, fix all still unclosed tags
@@ -234,6 +250,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if (!empty($this->currentNesting)) {
$size = count($this->currentNesting);
for ($i = $size - 1; $i >= 0; $i--) {
+ if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]->name);
+ }
$result[] =
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
}
@@ -242,6 +261,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('CurrentNesting');
$context->destroy('InputTokens');
$context->destroy('InputIndex');
+ $context->destroy('CurrentToken');
unset($this->outputTokens, $this->injectors, $this->currentInjector,
$this->currentNesting, $this->inputTokens, $this->inputIndex);
diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
index f8a03aec..16ffbbba 100644
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -115,7 +115,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML representation and insert in
- if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text', $token->name);
+ if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
@@ -132,7 +132,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed');
} else {
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed', $token->name);
+ if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
}
continue;
}
diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php
index 10aff570..42dd16f1 100644
--- a/library/HTMLPurifier/Token.php
+++ b/library/HTMLPurifier/Token.php
@@ -15,7 +15,8 @@ class HTMLPurifier_Token {
/**
* Lookup array of processing that this token is exempt from.
- * Currently, the only valid value is "ValidateAttributes".
+ * Currently, valid values are "ValidateAttributes" and
+ * "MakeWellFormed_TagClosedError"
*/
var $armor = array();
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 0c64d54f..04963789 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -230,6 +230,13 @@ Par1
Par2
'
);
+ $this->assertResult(
+'Par1
+
+Par2',
+'Par1
Par2
'
+ );
+
}
function testInlineRootNode() {
diff --git a/tests/HTMLPurifier/Strategy/ErrorsHarness.php b/tests/HTMLPurifier/Strategy/ErrorsHarness.php
new file mode 100644
index 00000000..34a32b99
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/ErrorsHarness.php
@@ -0,0 +1,20 @@
+getStrategy();
+ $lexer = new HTMLPurifier_Lexer_DirectLex();
+ $tokens = $lexer->tokenizeHTML($input, $this->config, $this->context);
+ $strategy->execute($tokens, $this->config, $this->context);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php
new file mode 100644
index 00000000..baeff527
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php
@@ -0,0 +1,52 @@
+ '',
+'Strategy_MakeWellFormed: Tag closed by document end' => '',
+*/
+
+class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strategy_ErrorsHarness
+{
+
+ function getStrategy() {
+ return new HTMLPurifier_Strategy_MakeWellFormed();
+ }
+
+ function testUnnecessaryEndTagRemoved() {
+ $this->expectErrorCollection(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed', 'b');
+ $this->invoke('');
+ }
+
+ function testUnnecessaryEndTagToText() {
+ $this->config->set('Core', 'EscapeInvalidTags', true);
+ $this->expectErrorCollection(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text', 'b');
+ $this->invoke('');
+ }
+
+ function testStrayEndTagRemoved() {
+ $this->expectErrorCollection(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed', 'b');
+ $this->invoke('');
+ }
+
+ function testStrayEndTagToText() {
+ $this->config->set('Core', 'EscapeInvalidTags', true);
+ $this->expectErrorCollection(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text', 'b');
+ $this->invoke('');
+ }
+
+ function testTagClosedByElementEnd() {
+ $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', 'b');
+ $this->invoke('Foobar');
+ }
+
+ function testTagClosedByDocumentEnd() {
+ $this->expectErrorCollection(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', 'b');
+ $this->invoke('Foobar');
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
index fa7dd91b..78988cb3 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
@@ -1,9 +1,9 @@
config->set('HTML', 'TidyLevel', 'heavy');
}
- function invoke($input) {
- $strategy = new HTMLPurifier_Strategy_RemoveForeignElements();
- $lexer = new HTMLPurifier_Lexer_DirectLex();
- $tokens = $lexer->tokenizeHTML($input, $this->config, $this->context);
- $strategy->execute($tokens, $this->config, $this->context);
+ function getStrategy() {
+ return new HTMLPurifier_Strategy_RemoveForeignElements();
}
function testTagTransform() {
@@ -31,12 +28,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
}
function testForeignElementToText() {
+ // uses $CurrentToken.Serialized
$this->config->set('Core', 'EscapeInvalidTags', true);
$this->expectErrorCollection(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text', 'cannot-possibly-exist-element');
$this->invoke('');
}
function testForeignElementRemoved() {
+ // uses $CurrentToken.Serialized
$this->expectErrorCollection(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed', 'cannot-possibly-exist-element');
$this->invoke('');
}
diff --git a/tests/test_files.php b/tests/test_files.php
index 4cc365ed..865e53c7 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -94,6 +94,7 @@ $test_files[] = 'HTMLPurifier/Strategy/CompositeTest.php';
$test_files[] = 'HTMLPurifier/Strategy/CoreTest.php';
$test_files[] = 'HTMLPurifier/Strategy/FixNestingTest.php';
$test_files[] = 'HTMLPurifier/Strategy/MakeWellFormedTest.php';
+$test_files[] = 'HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElementsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';