diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index d0c14b97..0db45a35 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -136,6 +136,7 @@ require 'HTMLPurifier/HTMLModule/List.php'; require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/Object.php'; require 'HTMLPurifier/HTMLModule/Presentation.php'; +require 'HTMLPurifier/HTMLModule/Proprietary.php'; require 'HTMLPurifier/HTMLModule/Ruby.php'; require 'HTMLPurifier/HTMLModule/Scripting.php'; require 'HTMLPurifier/HTMLModule/StyleAttribute.php'; diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 059027b0..1d38826b 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -81,7 +81,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $cursor > 0 && // cursor is further than zero $loops % $synchronize_interval === 0 // time to synchronize! ) { - $current_line = 1 + substr_count($html, $nl, 0, $cursor); + $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); } $position_next_lt = strpos($html, '<', $cursor); @@ -106,7 +106,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); } $array[] = $token; $cursor = $position_next_lt + 1; @@ -150,7 +150,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // Check if it's a comment if ( - strncmp('!--', $segment, 3) === 0 + substr($segment, 0, 3) === '!--' ) { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); @@ -168,11 +168,13 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $segment = substr($html, $cursor, $strlen_segment); $token = new HTMLPurifier_Token_Comment( - substr($segment, 3) + substr( + $segment, 3, $strlen_segment - 3 + ) ); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $strlen_segment); + $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); } $array[] = $token; $cursor = $end ? $position_comment_end : $position_comment_end + 3; @@ -187,7 +189,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $token = new HTMLPurifier_Token_End($type); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; @@ -211,7 +213,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $cursor = $position_next_gt + 1; @@ -240,7 +242,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; @@ -272,7 +274,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $cursor = $position_next_gt + 1; @@ -300,6 +302,22 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer return $array; } + /** + * PHP 5.0.x compatible substr_count that implements offset and length + */ + protected function substrCount($haystack, $needle, $offset, $length) { + static $oldVersion; + if ($oldVersion === null) { + $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); + } + if ($oldVersion) { + $haystack = substr($haystack, $offset, $length); + return substr_count($haystack, $needle); + } else { + return substr_count($haystack, $needle, $offset, $length); + } + } + /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * diff --git a/maintenance/generate-standalone.php b/maintenance/generate-standalone.php index bcd8340a..8c92544c 100755 --- a/maintenance/generate-standalone.php +++ b/maintenance/generate-standalone.php @@ -110,7 +110,6 @@ function replace_includes_callback($matches) { shell_exec('php generate-includes.php'); chdir(dirname(__FILE__) . '/../library/'); -create_blank('HTMLPurifier.php'); echo 'Creating full file...'; $contents = replace_includes(file_get_contents('HTMLPurifier.includes.php')); diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 257d0584..78e5a056 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -510,6 +510,26 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness } function test_tokenizeHTML_() { + $extra = array( + // PH5P doesn't seem to like style tags + 'PH5P' => false, + // DirectLex defers to RemoveForeignElements for textification + 'DirectLex' => array( + new HTMLPurifier_Token_Start('style', array('type' => 'text/css')), + new HTMLPurifier_Token_Comment("\ndiv {}\n"), + new HTMLPurifier_Token_End('style'), + ), + ); + if (!defined('LIBXML_VERSION') || LIBXML_VERSION < 20628) { + // libxml's behavior is wrong prior to this version, so make + // appropriate accomodations + // :NOTE: LIBXML_VERSION is missing in early versions of PHP + // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise, + // this translates to 5.0.x. In such cases, we assume that an old + // version of libxml is being used, although that *might* not + // be the case (it's very unlikely though) + $extra['DOMLex'] = $extra['DirectLex']; + } $this->assertTokenization( '