diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php
index d0c14b97..0db45a35 100644
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -136,6 +136,7 @@ require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
+require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 059027b0..1d38826b 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -81,7 +81,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$cursor > 0 && // cursor is further than zero
$loops % $synchronize_interval === 0 // time to synchronize!
) {
- $current_line = 1 + substr_count($html, $nl, 0, $cursor);
+ $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
}
$position_next_lt = strpos($html, '<', $cursor);
@@ -106,7 +106,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
}
$array[] = $token;
$cursor = $position_next_lt + 1;
@@ -150,7 +150,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// Check if it's a comment
if (
- strncmp('!--', $segment, 3) === 0
+ substr($segment, 0, 3) === '!--'
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
@@ -168,11 +168,13 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$segment = substr($html, $cursor, $strlen_segment);
$token = new
HTMLPurifier_Token_Comment(
- substr($segment, 3)
+ substr(
+ $segment, 3, $strlen_segment - 3
+ )
);
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $strlen_segment);
+ $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
}
$array[] = $token;
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
@@ -187,7 +189,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$token = new HTMLPurifier_Token_End($type);
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -211,7 +213,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$cursor = $position_next_gt + 1;
@@ -240,7 +242,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -272,7 +274,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->line = $current_line;
- $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
+ $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$cursor = $position_next_gt + 1;
@@ -300,6 +302,22 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
return $array;
}
+ /**
+ * PHP 5.0.x compatible substr_count that implements offset and length
+ */
+ protected function substrCount($haystack, $needle, $offset, $length) {
+ static $oldVersion;
+ if ($oldVersion === null) {
+ $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
+ }
+ if ($oldVersion) {
+ $haystack = substr($haystack, $offset, $length);
+ return substr_count($haystack, $needle);
+ } else {
+ return substr_count($haystack, $needle, $offset, $length);
+ }
+ }
+
/**
* Takes the inside of an HTML tag and makes an assoc array of attributes.
*
diff --git a/maintenance/generate-standalone.php b/maintenance/generate-standalone.php
index bcd8340a..8c92544c 100755
--- a/maintenance/generate-standalone.php
+++ b/maintenance/generate-standalone.php
@@ -110,7 +110,6 @@ function replace_includes_callback($matches) {
shell_exec('php generate-includes.php');
chdir(dirname(__FILE__) . '/../library/');
-create_blank('HTMLPurifier.php');
echo 'Creating full file...';
$contents = replace_includes(file_get_contents('HTMLPurifier.includes.php'));
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index 257d0584..78e5a056 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -510,6 +510,26 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
}
function test_tokenizeHTML_() {
+ $extra = array(
+ // PH5P doesn't seem to like style tags
+ 'PH5P' => false,
+ // DirectLex defers to RemoveForeignElements for textification
+ 'DirectLex' => array(
+ new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
+ new HTMLPurifier_Token_Comment("\ndiv {}\n"),
+ new HTMLPurifier_Token_End('style'),
+ ),
+ );
+ if (!defined('LIBXML_VERSION') || LIBXML_VERSION < 20628) {
+ // libxml's behavior is wrong prior to this version, so make
+ // appropriate accomodations
+ // :NOTE: LIBXML_VERSION is missing in early versions of PHP
+ // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
+ // this translates to 5.0.x. In such cases, we assume that an old
+ // version of libxml is being used, although that *might* not
+ // be the case (it's very unlikely though)
+ $extra['DOMLex'] = $extra['DirectLex'];
+ }
$this->assertTokenization(
'