mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-24 06:47:02 +00:00
[3.1.0] Fixed fatal error in PH5P lexer with invalid tag names
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1650 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
c216968087
commit
9f1e678b48
1
NEWS
1
NEWS
@ -55,6 +55,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
|
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
|
||||||
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
|
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
|
||||||
on the same element without emitting errors.
|
on the same element without emitting errors.
|
||||||
|
- Fixed fatal error in PH5P lexer with invalid tag names
|
||||||
. Plugins now get their own changelogs according to project conventions.
|
. Plugins now get their own changelogs according to project conventions.
|
||||||
. Convert tokens to use instanceof, reducing memory footprint and
|
. Convert tokens to use instanceof, reducing memory footprint and
|
||||||
improving comparison speed.
|
improving comparison speed.
|
||||||
|
@ -63,16 +63,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
$e =& $context->get('ErrorCollector');
|
$e =& $context->get('ErrorCollector');
|
||||||
}
|
}
|
||||||
|
|
||||||
// infinite loop protection
|
// for testing synchronization
|
||||||
// has to be pretty big, since html docs can be big
|
|
||||||
// we're allow two hundred thousand tags... more than enough?
|
|
||||||
// NOTE: this is also used for synchronization, so watch out
|
|
||||||
$loops = 0;
|
$loops = 0;
|
||||||
|
|
||||||
while(true) {
|
while(++$loops) {
|
||||||
|
|
||||||
// infinite loop protection
|
|
||||||
if (++$loops > 200000) return array();
|
|
||||||
|
|
||||||
// recalculate lines
|
// recalculate lines
|
||||||
if (
|
if (
|
||||||
@ -381,16 +375,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
// space, so let's guarantee that there's always a terminating space.
|
// space, so let's guarantee that there's always a terminating space.
|
||||||
$string .= ' ';
|
$string .= ' ';
|
||||||
|
|
||||||
// infinite loop protection
|
|
||||||
$loops = 0;
|
|
||||||
while(true) {
|
while(true) {
|
||||||
|
|
||||||
// infinite loop protection
|
|
||||||
if (++$loops > 1000) {
|
|
||||||
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
|
|
||||||
return array();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($cursor >= $size) {
|
if ($cursor >= $size) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -115,7 +115,7 @@ class HTML5 {
|
|||||||
|
|
||||||
public function __construct($data) {
|
public function __construct($data) {
|
||||||
$data = str_replace("\r\n", "\n", $data);
|
$data = str_replace("\r\n", "\n", $data);
|
||||||
$date = str_replace("\r", null, $data);
|
$data = str_replace("\r", null, $data);
|
||||||
|
|
||||||
$this->data = $data;
|
$this->data = $data;
|
||||||
$this->char = -1;
|
$this->char = -1;
|
||||||
@ -2143,7 +2143,7 @@ class HTML5TreeConstructer {
|
|||||||
/* Reconstruct the active formatting elements, if any. */
|
/* Reconstruct the active formatting elements, if any. */
|
||||||
$this->reconstructActiveFormattingElements();
|
$this->reconstructActiveFormattingElements();
|
||||||
|
|
||||||
$this->insertElement($token);
|
$this->insertElement($token, true, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -3524,7 +3524,18 @@ class HTML5TreeConstructer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function insertElement($token, $append = true) {
|
private function insertElement($token, $append = true, $check = false) {
|
||||||
|
// Proprietary workaround for libxml2's limitations with tag names
|
||||||
|
if ($check) {
|
||||||
|
// Slightly modified HTML5 tag-name modification,
|
||||||
|
// removing anything that's not an ASCII letter, digit, or hyphen
|
||||||
|
$token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
||||||
|
// Remove leading hyphens and numbers
|
||||||
|
$token['name'] = ltrim($token['name'], '-0..9');
|
||||||
|
// In theory, this should ever be needed, but just in case
|
||||||
|
if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
|
||||||
|
}
|
||||||
|
|
||||||
$el = $this->dom->createElement($token['name']);
|
$el = $this->dom->createElement($token['name']);
|
||||||
|
|
||||||
foreach($token['attr'] as $attr) {
|
foreach($token['attr'] as $attr) {
|
||||||
|
@ -1,5 +1,14 @@
|
|||||||
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
|
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-05 00:01:51.643585000 -0500
|
||||||
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
|
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-04-05 00:26:39.343160000 -0400
|
||||||
|
@@ -65,7 +65,7 @@
|
||||||
|
|
||||||
|
public function __construct($data) {
|
||||||
|
$data = str_replace("\r\n", "\n", $data);
|
||||||
|
- $date = str_replace("\r", null, $data);
|
||||||
|
+ $data = str_replace("\r", null, $data);
|
||||||
|
|
||||||
|
$this->data = $data;
|
||||||
|
$this->char = -1;
|
||||||
@@ -211,7 +211,10 @@
|
@@ -211,7 +211,10 @@
|
||||||
// If nothing is returned, emit a U+0026 AMPERSAND character token.
|
// If nothing is returned, emit a U+0026 AMPERSAND character token.
|
||||||
// Otherwise, emit the character token that was returned.
|
// Otherwise, emit the character token that was returned.
|
||||||
@ -43,7 +52,36 @@
|
|||||||
$entity = $id;
|
$entity = $id;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -3659,7 +3668,7 @@
|
@@ -2084,7 +2093,7 @@
|
||||||
|
/* Reconstruct the active formatting elements, if any. */
|
||||||
|
$this->reconstructActiveFormattingElements();
|
||||||
|
|
||||||
|
- $this->insertElement($token);
|
||||||
|
+ $this->insertElement($token, true, true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
@@ -3465,7 +3474,18 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- private function insertElement($token, $append = true) {
|
||||||
|
+ private function insertElement($token, $append = true, $check = false) {
|
||||||
|
+ // Proprietary workaround for libxml2's limitations with tag names
|
||||||
|
+ if ($check) {
|
||||||
|
+ // Slightly modified HTML5 tag-name modification,
|
||||||
|
+ // removing anything that's not an ASCII letter, digit, or hyphen
|
||||||
|
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
||||||
|
+ // Remove leading hyphens and numbers
|
||||||
|
+ $token['name'] = ltrim($token['name'], '-0..9');
|
||||||
|
+ // In theory, this should ever be needed, but just in case
|
||||||
|
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
$el = $this->dom->createElement($token['name']);
|
||||||
|
|
||||||
|
foreach($token['attr'] as $attr) {
|
||||||
|
@@ -3659,7 +3679,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +90,7 @@
|
|||||||
/* When the steps below require the UA to generate implied end tags,
|
/* When the steps below require the UA to generate implied end tags,
|
||||||
then, if the current node is a dd element, a dt element, an li element,
|
then, if the current node is a dd element, a dt element, an li element,
|
||||||
a p element, a td element, a th element, or a tr element, the UA must
|
a p element, a td element, a th element, or a tr element, the UA must
|
||||||
@@ -3673,7 +3682,8 @@
|
@@ -3673,7 +3693,8 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -509,7 +509,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_tokenizeHTML_() {
|
function test_tokenizeHTML_style() {
|
||||||
$extra = array(
|
$extra = array(
|
||||||
// PH5P doesn't seem to like style tags
|
// PH5P doesn't seem to like style tags
|
||||||
'PH5P' => false,
|
'PH5P' => false,
|
||||||
@ -543,6 +543,26 @@ div {}
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_tokenizeHTML_() {
|
||||||
|
$this->assertTokenization(
|
||||||
|
'<a@>>',
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('a'),
|
||||||
|
new HTMLPurifier_Token_Text('>'),
|
||||||
|
new HTMLPurifier_Token_End('a'),
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'DirectLex' => array(
|
||||||
|
// Technically this is invalid, but it won't be a
|
||||||
|
// problem with invalid element removal; also, this
|
||||||
|
// mimics Mozilla's parsing of the tag.
|
||||||
|
new HTMLPurifier_Token_Start('a@'),
|
||||||
|
new HTMLPurifier_Token_Text('>'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
function test_tokenizeHTML_() {
|
function test_tokenizeHTML_() {
|
||||||
|
@ -209,3 +209,16 @@ function htmlpurifier_flush($php, $reporter) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dumps error queue, useful if there has been a fatal error.
|
||||||
|
*/
|
||||||
|
function htmlpurifier_dump_error_queue() {
|
||||||
|
$context = &SimpleTest::getContext();
|
||||||
|
$queue = &$context->get('SimpleErrorQueue');
|
||||||
|
if ($queue && !empty($queue->_queue)) {
|
||||||
|
// replace this with something prettier
|
||||||
|
var_dump($queue->_queue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
register_shutdown_function('htmlpurifier_dump_error_queue');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user