0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 08:21:52 +00:00

[3.1.0] Fixed fatal error in PH5P lexer with invalid tag names

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1650 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2008-04-05 04:28:37 +00:00
parent c216968087
commit 9f1e678b48
6 changed files with 93 additions and 24 deletions

1
NEWS
View File

@ -55,6 +55,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
on the same element without emitting errors.
- Fixed fatal error in PH5P lexer with invalid tag names
. Plugins now get their own changelogs according to project conventions.
. Convert tokens to use instanceof, reducing memory footprint and
improving comparison speed.

View File

@ -63,16 +63,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$e =& $context->get('ErrorCollector');
}
// infinite loop protection
// has to be pretty big, since html docs can be big
// we're allow two hundred thousand tags... more than enough?
// NOTE: this is also used for synchronization, so watch out
// for testing synchronization
$loops = 0;
while(true) {
// infinite loop protection
if (++$loops > 200000) return array();
while(++$loops) {
// recalculate lines
if (
@ -381,16 +375,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// space, so let's guarantee that there's always a terminating space.
$string .= ' ';
// infinite loop protection
$loops = 0;
while(true) {
// infinite loop protection
if (++$loops > 1000) {
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
return array();
}
if ($cursor >= $size) {
break;
}

View File

@ -115,7 +115,7 @@ class HTML5 {
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
$date = str_replace("\r", null, $data);
$data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
@ -2143,7 +2143,7 @@ class HTML5TreeConstructer {
/* Reconstruct the active formatting elements, if any. */
$this->reconstructActiveFormattingElements();
$this->insertElement($token);
$this->insertElement($token, true, true);
break;
}
break;
@ -3524,7 +3524,18 @@ class HTML5TreeConstructer {
}
}
private function insertElement($token, $append = true) {
private function insertElement($token, $append = true, $check = false) {
// Proprietary workaround for libxml2's limitations with tag names
if ($check) {
// Slightly modified HTML5 tag-name modification,
// removing anything that's not an ASCII letter, digit, or hyphen
$token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
// Remove leading hyphens and numbers
$token['name'] = ltrim($token['name'], '-0..9');
// In theory, this should ever be needed, but just in case
if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
}
$el = $this->dom->createElement($token['name']);
foreach($token['attr'] as $attr) {

View File

@ -1,5 +1,14 @@
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-05 00:01:51.643585000 -0500
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-04-05 00:26:39.343160000 -0400
@@ -65,7 +65,7 @@
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
- $date = str_replace("\r", null, $data);
+ $data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
@@ -211,7 +211,10 @@
// If nothing is returned, emit a U+0026 AMPERSAND character token.
// Otherwise, emit the character token that was returned.
@ -43,7 +52,36 @@
$entity = $id;
break;
}
@@ -3659,7 +3668,7 @@
@@ -2084,7 +2093,7 @@
/* Reconstruct the active formatting elements, if any. */
$this->reconstructActiveFormattingElements();
- $this->insertElement($token);
+ $this->insertElement($token, true, true);
break;
}
break;
@@ -3465,7 +3474,18 @@
}
}
- private function insertElement($token, $append = true) {
+ private function insertElement($token, $append = true, $check = false) {
+ // Proprietary workaround for libxml2's limitations with tag names
+ if ($check) {
+ // Slightly modified HTML5 tag-name modification,
+ // removing anything that's not an ASCII letter, digit, or hyphen
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
+ // Remove leading hyphens and numbers
+ $token['name'] = ltrim($token['name'], '-0..9');
+ // In theory, this should ever be needed, but just in case
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
+ }
+
$el = $this->dom->createElement($token['name']);
foreach($token['attr'] as $attr) {
@@ -3659,7 +3679,7 @@
}
}
@ -52,7 +90,7 @@
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3673,7 +3682,8 @@
@@ -3673,7 +3693,8 @@
}
}

View File

@ -509,7 +509,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
);
}
function test_tokenizeHTML_() {
function test_tokenizeHTML_style() {
$extra = array(
// PH5P doesn't seem to like style tags
'PH5P' => false,
@ -543,6 +543,26 @@ div {}
);
}
function test_tokenizeHTML_() {
$this->assertTokenization(
'<a@>>',
array(
new HTMLPurifier_Token_Start('a'),
new HTMLPurifier_Token_Text('>'),
new HTMLPurifier_Token_End('a'),
),
array(
'DirectLex' => array(
// Technically this is invalid, but it won't be a
// problem with invalid element removal; also, this
// mimics Mozilla's parsing of the tag.
new HTMLPurifier_Token_Start('a@'),
new HTMLPurifier_Token_Text('>'),
),
)
);
}
/*
function test_tokenizeHTML_() {

View File

@ -209,3 +209,16 @@ function htmlpurifier_flush($php, $reporter) {
exit(1);
}
}
/**
* Dumps error queue, useful if there has been a fatal error.
*/
function htmlpurifier_dump_error_queue() {
$context = &SimpleTest::getContext();
$queue = &$context->get('SimpleErrorQueue');
if ($queue && !empty($queue->_queue)) {
// replace this with something prettier
var_dump($queue->_queue);
}
}
register_shutdown_function('htmlpurifier_dump_error_queue');