diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index d68e2739..759c8f50 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -150,6 +150,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// We are in tag and it is well formed
// Grab the internals of the tag
$strlen_segment = $position_next_gt - $cursor;
+
+ if ($strlen_segment < 1) {
+ // there's nothing to process!
+ $token = new HTMLPurifier_Token_Text('<');
+ $cursor++;
+ continue;
+ }
+
$segment = substr($html, $cursor, $strlen_segment);
// Check if it's a comment
@@ -372,6 +380,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$value = $quoted_value;
}
}
+ if ($value === false) $value = '';
return array($key => $value);
}
@@ -386,7 +395,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// infinite loop protection
$loops = 0;
-
while(true) {
// infinite loop protection
@@ -400,7 +408,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
-
// grab the key
$key_begin = $cursor; //we're currently at the start of the key
@@ -436,6 +443,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$cursor++;
$cursor += strspn($string, $this->_whitespace, $cursor);
+ if ($cursor === false) {
+ $array[$key] = '';
+ break;
+ }
+
// we might be in front of a quote right now
$char = @$string[$cursor];
@@ -453,7 +465,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$value_end = $cursor;
}
+ // we reached a premature end
+ if ($cursor === false) {
+ $cursor = $size;
+ $value_end = $cursor;
+ }
+
$value = substr($string, $value_begin, $value_end - $value_begin);
+ if ($value === false) $value = '';
$array[$key] = $this->parseData($value);
$cursor++;
diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php
index ba7d0fe7..a9154093 100644
--- a/tests/HTMLPurifier/Lexer/DirectLexTest.php
+++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php
@@ -59,6 +59,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
$input[12] = '="" =""';
$expect[12] = array('"' => ''); // tough to say, just don't throw a loop
+ $input[13] = 'href="';
+ $expect[13] = array('href' => '');
+
+ $input[14] = 'href=" <';
+ $expect[14] = array('href' => ' <');
+
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$size = count($input);
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index 9ddefb1f..2fd10268 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -335,10 +335,21 @@ class HTMLPurifier_LexerTest extends UnitTestCase
// test escaping
$input[23] = '';
$expect[23] = array(
- new HTMLPurifier_Token_Comment(' This comment < < & ')
+ new HTMLPurifier_Token_Comment(' This comment < < & ') );
+ $sax_expect[23] = false; $config[23] =
+ HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' =>
+ true));
+
+ // more DirectLex edge-cases
+ $input[24] = '';
+ $expect[24] = array(
+ new HTMLPurifier_Token_Start('a', array('href' => '')),
+ new HTMLPurifier_Token_Text('<">')
+ );
+ $sax_expect[24] = false;
+ $dom_expect[24] = array(
+ new HTMLPurifier_Token_Empty('a', array('href' => '><>'))
);
- $sax_expect[23] = false;
- $config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
$default_config = HTMLPurifier_Config::createDefault();
$default_context = new HTMLPurifier_Context();