diff --git a/HTML_Lexer.php b/HTML_Lexer.php index ba22b1d5..5ec93600 100644 --- a/HTML_Lexer.php +++ b/HTML_Lexer.php @@ -58,6 +58,7 @@ class HTML_Lexer $position_next_lt = strpos($string, '<', $cursor); $position_next_gt = strpos($string, '>', $cursor); + // triggers on "asdf" but not "asdf " if ($position_next_lt === $cursor) { $inside_tag = true; @@ -184,6 +185,12 @@ class HTML_Lexer $position_next_space = $this->nextWhiteSpace($string, $cursor); } + // if we've hit the end, assign the key an empty value and abort + if ($cursor >= $size) { + $array[$key] = ''; + break; + } + // find the next quote $position_next_quote = $this->nextQuote($string, $cursor); @@ -201,6 +208,13 @@ class HTML_Lexer // otherwise, regular attribute $quote = $string{$position_next_quote}; $position_end_quote = strpos($string, $quote, $position_next_quote + 1); + + // check if the ending quote is missing + if ($position_end_quote === false) { + // it is, assign it to the end of the string + $position_end_quote = $size; + } + $value = substr($string, $position_next_quote + 1, $position_end_quote - $position_next_quote - 1); if ($key) { diff --git a/tests/HTML_Lexer.php b/tests/HTML_Lexer.php index 0006a9ca..9263bd3e 100644 --- a/tests/HTML_Lexer.php +++ b/tests/HTML_Lexer.php @@ -112,6 +112,12 @@ class TestCase_HTML_Lexer extends UnitTestCase // however, we may want to change both styles // into parsed: ''. SAX has an option for this + // [INVALID] + $input[10] = ''; + $expect[10] = array( + new MF_StartTag('a', array('"' => '')) + ); + foreach($input as $i => $discard) { $result = $this->HTML_Lexer->tokenizeHTML($input[$i]); $this->assertEqual($expect[$i], $result); @@ -155,6 +161,9 @@ class TestCase_HTML_Lexer extends UnitTestCase $input[] = 'missile=launch'; $expect[] = array('missile' => 'launch'); + $input[] = 'href="foo'; + $expect[] = array('href' => 'foo'); + $size = count($input); for($i = 0; $i < $size; $i++) { $result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);