diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index bd214d29..00e6b5dc 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -10,27 +10,32 @@ $messages = array( 'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', 'ErrorCollector: At line' => ' at line $line', -'Lexer: Unclosed comment' => 'Unclosed comment', -'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', -'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing attribute key' => 'Attribute declaration has no key', -'Lexer: Missing end quote' => 'Attribute declaration has no end quote', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', -'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', +'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', -'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', -'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', -'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', -'Strategy_RemoveForeignElements: Script removed' => 'Script removed', -'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', +'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', +'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', +'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', +'Strategy_RemoveForeignElements: Script removed' => 'Script removed', +'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', -'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', -'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', -'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', -'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', -'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', +'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', +'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', +'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', + +'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', +'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', +'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', +'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', ); diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index e6a779e5..4057a6ce 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -54,6 +54,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $is_inline = $definition->info_parent_def->descendants_are_inline; $context->register('IsInline', $is_inline); + // setup error collector + $e =& $context->get('ErrorCollector', true); + //####################################################################// // Loop initialization @@ -67,6 +70,11 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // processed, i.e. there won't be empty exclusions. $exclude_stack = array(); + // variable that contains the start token while we are processing + // nodes. This enables error reporting to do its job + $start_token = false; + $context->register('CurrentToken', $start_token); + //####################################################################// // Loop @@ -100,6 +108,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // $i is index of start token // $j is index of end token + $start_token = $tokens[$i]; // to make token available via CurrentToken + //################################################################// // Gather information on parent @@ -200,6 +210,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy } elseif($result === false) { // remove entire node + if ($e) { + if ($excluded) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); + } else { + $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); + } + } + // calculate length of inner tokens and current tokens $length = $j - $i + 1; @@ -216,7 +234,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // this is a rought heuristic that covers 100% of HTML's // cases and 99% of all other cases. A child definition // that would be tricked by this would be something like: - // ( | a b c) where it's all or nothing. Fortunantely, + // ( | a b c) where it's all or nothing. Fortunately, // our current implementation claims that that case would // not allow empty, even if it did if (!$parent_def->child->allow_empty) { @@ -234,6 +252,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // calculate length of inner tokens $length = $j - $i - 1; + if ($e) { + if (empty($result) && $length) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); + } else { + $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); + } + } + // perform replacement array_splice($tokens, $i + 1, $length, $result); @@ -291,6 +317,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // remove context variables $context->destroy('IsInline'); + $context->destroy('CurrentToken'); //####################################################################// // Return diff --git a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php new file mode 100644 index 00000000..371266cd --- /dev/null +++ b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php @@ -0,0 +1,39 @@ +expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node removed'); + $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('ul', array(), 1)); + $this->invoke(''); + } + + function testNodeExcluded() { + $this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node excluded'); + $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('a', array(), 2)); + $this->invoke("\n"); + } + + function testNodeReorganized() { + $this->expectErrorCollection(E_WARNING, 'Strategy_FixNesting: Node reorganized'); + $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1)); + $this->invoke("Valid
Invalid
"); + } + + function testNodeContentsRemoved() { + $this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed'); + $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1)); + $this->invoke("
"); + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php index 3eb809cf..62f29db5 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php @@ -3,11 +3,6 @@ require_once 'HTMLPurifier/Strategy/ErrorsHarness.php'; require_once 'HTMLPurifier/Strategy/MakeWellFormed.php'; -/* -'Strategy_MakeWellFormed: Tag closed by element end' => '', -'Strategy_MakeWellFormed: Tag closed by document end' => '', -*/ - class HTMLPurifier_Strategy_MakeWellFormed_ErrorsTest extends HTMLPurifier_Strategy_ErrorsHarness { diff --git a/tests/test_files.php b/tests/test_files.php index 865e53c7..b0ee2389 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -93,6 +93,7 @@ $test_files[] = 'HTMLPurifier/PercentEncoderTest.php'; $test_files[] = 'HTMLPurifier/Strategy/CompositeTest.php'; $test_files[] = 'HTMLPurifier/Strategy/CoreTest.php'; $test_files[] = 'HTMLPurifier/Strategy/FixNestingTest.php'; +$test_files[] = 'HTMLPurifier/Strategy/FixNesting_ErrorsTest.php'; $test_files[] = 'HTMLPurifier/Strategy/MakeWellFormedTest.php'; $test_files[] = 'HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php'; $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElementsTest.php';