diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index d68e2739..759c8f50 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -150,6 +150,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // We are in tag and it is well formed // Grab the internals of the tag $strlen_segment = $position_next_gt - $cursor; + + if ($strlen_segment < 1) { + // there's nothing to process! + $token = new HTMLPurifier_Token_Text('<'); + $cursor++; + continue; + } + $segment = substr($html, $cursor, $strlen_segment); // Check if it's a comment @@ -372,6 +380,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $value = $quoted_value; } } + if ($value === false) $value = ''; return array($key => $value); } @@ -386,7 +395,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // infinite loop protection $loops = 0; - while(true) { // infinite loop protection @@ -400,7 +408,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); - // grab the key $key_begin = $cursor; //we're currently at the start of the key @@ -436,6 +443,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $cursor++; $cursor += strspn($string, $this->_whitespace, $cursor); + if ($cursor === false) { + $array[$key] = ''; + break; + } + // we might be in front of a quote right now $char = @$string[$cursor]; @@ -453,7 +465,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $value_end = $cursor; } + // we reached a premature end + if ($cursor === false) { + $cursor = $size; + $value_end = $cursor; + } + $value = substr($string, $value_begin, $value_end - $value_begin); + if ($value === false) $value = ''; $array[$key] = $this->parseData($value); $cursor++; diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php index ba7d0fe7..a9154093 100644 --- a/tests/HTMLPurifier/Lexer/DirectLexTest.php +++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php @@ -59,6 +59,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase $input[12] = '="" =""'; $expect[12] = array('"' => ''); // tough to say, just don't throw a loop + $input[13] = 'href="'; + $expect[13] = array('href' => ''); + + $input[14] = 'href=" <'; + $expect[14] = array('href' => ' <'); + $config = HTMLPurifier_Config::createDefault(); $context = new HTMLPurifier_Context(); $size = count($input); diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 9ddefb1f..2fd10268 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -335,10 +335,21 @@ class HTMLPurifier_LexerTest extends UnitTestCase // test escaping $input[23] = ''; $expect[23] = array( - new HTMLPurifier_Token_Comment(' This comment < < & ') + new HTMLPurifier_Token_Comment(' This comment < < & ') ); + $sax_expect[23] = false; $config[23] = + HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => + true)); + + // more DirectLex edge-cases + $input[24] = ''; + $expect[24] = array( + new HTMLPurifier_Token_Start('a', array('href' => '')), + new HTMLPurifier_Token_Text('<">') + ); + $sax_expect[24] = false; + $dom_expect[24] = array( + new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ); - $sax_expect[23] = false; - $config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true)); $default_config = HTMLPurifier_Config::createDefault(); $default_context = new HTMLPurifier_Context();