mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
[3.1.0] Fixed fatal error in PH5P lexer with invalid tag names
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1650 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
c216968087
commit
9f1e678b48
1
NEWS
1
NEWS
@ -55,6 +55,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
|
||||
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
|
||||
on the same element without emitting errors.
|
||||
- Fixed fatal error in PH5P lexer with invalid tag names
|
||||
. Plugins now get their own changelogs according to project conventions.
|
||||
. Convert tokens to use instanceof, reducing memory footprint and
|
||||
improving comparison speed.
|
||||
|
@ -63,16 +63,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$e =& $context->get('ErrorCollector');
|
||||
}
|
||||
|
||||
// infinite loop protection
|
||||
// has to be pretty big, since html docs can be big
|
||||
// we're allow two hundred thousand tags... more than enough?
|
||||
// NOTE: this is also used for synchronization, so watch out
|
||||
// for testing synchronization
|
||||
$loops = 0;
|
||||
|
||||
while(true) {
|
||||
|
||||
// infinite loop protection
|
||||
if (++$loops > 200000) return array();
|
||||
while(++$loops) {
|
||||
|
||||
// recalculate lines
|
||||
if (
|
||||
@ -381,16 +375,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// space, so let's guarantee that there's always a terminating space.
|
||||
$string .= ' ';
|
||||
|
||||
// infinite loop protection
|
||||
$loops = 0;
|
||||
while(true) {
|
||||
|
||||
// infinite loop protection
|
||||
if (++$loops > 1000) {
|
||||
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
|
||||
return array();
|
||||
}
|
||||
|
||||
if ($cursor >= $size) {
|
||||
break;
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ class HTML5 {
|
||||
|
||||
public function __construct($data) {
|
||||
$data = str_replace("\r\n", "\n", $data);
|
||||
$date = str_replace("\r", null, $data);
|
||||
$data = str_replace("\r", null, $data);
|
||||
|
||||
$this->data = $data;
|
||||
$this->char = -1;
|
||||
@ -2143,7 +2143,7 @@ class HTML5TreeConstructer {
|
||||
/* Reconstruct the active formatting elements, if any. */
|
||||
$this->reconstructActiveFormattingElements();
|
||||
|
||||
$this->insertElement($token);
|
||||
$this->insertElement($token, true, true);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -3524,7 +3524,18 @@ class HTML5TreeConstructer {
|
||||
}
|
||||
}
|
||||
|
||||
private function insertElement($token, $append = true) {
|
||||
private function insertElement($token, $append = true, $check = false) {
|
||||
// Proprietary workaround for libxml2's limitations with tag names
|
||||
if ($check) {
|
||||
// Slightly modified HTML5 tag-name modification,
|
||||
// removing anything that's not an ASCII letter, digit, or hyphen
|
||||
$token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
||||
// Remove leading hyphens and numbers
|
||||
$token['name'] = ltrim($token['name'], '-0..9');
|
||||
// In theory, this should ever be needed, but just in case
|
||||
if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
|
||||
}
|
||||
|
||||
$el = $this->dom->createElement($token['name']);
|
||||
|
||||
foreach($token['attr'] as $attr) {
|
||||
|
@ -1,5 +1,14 @@
|
||||
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
|
||||
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
|
||||
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-05 00:01:51.643585000 -0500
|
||||
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-04-05 00:26:39.343160000 -0400
|
||||
@@ -65,7 +65,7 @@
|
||||
|
||||
public function __construct($data) {
|
||||
$data = str_replace("\r\n", "\n", $data);
|
||||
- $date = str_replace("\r", null, $data);
|
||||
+ $data = str_replace("\r", null, $data);
|
||||
|
||||
$this->data = $data;
|
||||
$this->char = -1;
|
||||
@@ -211,7 +211,10 @@
|
||||
// If nothing is returned, emit a U+0026 AMPERSAND character token.
|
||||
// Otherwise, emit the character token that was returned.
|
||||
@ -43,7 +52,36 @@
|
||||
$entity = $id;
|
||||
break;
|
||||
}
|
||||
@@ -3659,7 +3668,7 @@
|
||||
@@ -2084,7 +2093,7 @@
|
||||
/* Reconstruct the active formatting elements, if any. */
|
||||
$this->reconstructActiveFormattingElements();
|
||||
|
||||
- $this->insertElement($token);
|
||||
+ $this->insertElement($token, true, true);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@@ -3465,7 +3474,18 @@
|
||||
}
|
||||
}
|
||||
|
||||
- private function insertElement($token, $append = true) {
|
||||
+ private function insertElement($token, $append = true, $check = false) {
|
||||
+ // Proprietary workaround for libxml2's limitations with tag names
|
||||
+ if ($check) {
|
||||
+ // Slightly modified HTML5 tag-name modification,
|
||||
+ // removing anything that's not an ASCII letter, digit, or hyphen
|
||||
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
|
||||
+ // Remove leading hyphens and numbers
|
||||
+ $token['name'] = ltrim($token['name'], '-0..9');
|
||||
+ // In theory, this should ever be needed, but just in case
|
||||
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
|
||||
+ }
|
||||
+
|
||||
$el = $this->dom->createElement($token['name']);
|
||||
|
||||
foreach($token['attr'] as $attr) {
|
||||
@@ -3659,7 +3679,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,7 +90,7 @@
|
||||
/* When the steps below require the UA to generate implied end tags,
|
||||
then, if the current node is a dd element, a dt element, an li element,
|
||||
a p element, a td element, a th element, or a tr element, the UA must
|
||||
@@ -3673,7 +3682,8 @@
|
||||
@@ -3673,7 +3693,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -509,7 +509,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
function test_tokenizeHTML_style() {
|
||||
$extra = array(
|
||||
// PH5P doesn't seem to like style tags
|
||||
'PH5P' => false,
|
||||
@ -543,6 +543,26 @@ div {}
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
$this->assertTokenization(
|
||||
'<a@>>',
|
||||
array(
|
||||
new HTMLPurifier_Token_Start('a'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
new HTMLPurifier_Token_End('a'),
|
||||
),
|
||||
array(
|
||||
'DirectLex' => array(
|
||||
// Technically this is invalid, but it won't be a
|
||||
// problem with invalid element removal; also, this
|
||||
// mimics Mozilla's parsing of the tag.
|
||||
new HTMLPurifier_Token_Start('a@'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
|
@ -209,3 +209,16 @@ function htmlpurifier_flush($php, $reporter) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumps error queue, useful if there has been a fatal error.
|
||||
*/
|
||||
function htmlpurifier_dump_error_queue() {
|
||||
$context = &SimpleTest::getContext();
|
||||
$queue = &$context->get('SimpleErrorQueue');
|
||||
if ($queue && !empty($queue->_queue)) {
|
||||
// replace this with something prettier
|
||||
var_dump($queue->_queue);
|
||||
}
|
||||
}
|
||||
register_shutdown_function('htmlpurifier_dump_error_queue');
|
||||
|
Loading…
Reference in New Issue
Block a user