mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
[3.1.0] [BACKPORT] Fix bug with comments in styles, and some associated issues
- Restore printTokens() git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1570 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
fbc595ebed
commit
6c9c8f2380
6
NEWS
6
NEWS
@ -32,6 +32,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
$schema which defines what HTMLPurifier_ConfigSchema to use besides the
|
$schema which defines what HTMLPurifier_ConfigSchema to use besides the
|
||||||
global default.
|
global default.
|
||||||
- Fix bug with trusted script handling in libxml versions later than 2.6.28.
|
- Fix bug with trusted script handling in libxml versions later than 2.6.28.
|
||||||
|
- Fix bug in ExtractStyleBlocks with comments in style tags
|
||||||
|
- Fix bug in comment parsing for DirectLex
|
||||||
. Plugins now get their own changelogs according to project conventions.
|
. Plugins now get their own changelogs according to project conventions.
|
||||||
. Convert tokens to use instanceof, reducing memory footprint and
|
. Convert tokens to use instanceof, reducing memory footprint and
|
||||||
improving comparison speed.
|
improving comparison speed.
|
||||||
@ -53,6 +55,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Debugger class is deprecated and will be removed soon.
|
. Debugger class is deprecated and will be removed soon.
|
||||||
. Command line argument parsing for testing scripts revamped, now --opt value
|
. Command line argument parsing for testing scripts revamped, now --opt value
|
||||||
format is supported.
|
format is supported.
|
||||||
|
. Smoketests now cleanup after magic quotes
|
||||||
|
. Generator now can output comments (however, comments are still stripped
|
||||||
|
from HTML Purifier output)
|
||||||
|
. substr_count PHP4 compatibility cludge removed
|
||||||
|
|
||||||
3.0.0, released 2008-01-06
|
3.0.0, released 2008-01-06
|
||||||
# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained
|
# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained
|
||||||
|
@ -72,6 +72,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||||||
} else {
|
} else {
|
||||||
$scopes = array();
|
$scopes = array();
|
||||||
}
|
}
|
||||||
|
// remove comments from CSS
|
||||||
|
$css = trim($css);
|
||||||
|
if (strncmp('<!--', $css, 4) === 0) {
|
||||||
|
$css = substr($css, 4);
|
||||||
|
}
|
||||||
|
if (strlen($css) > 3 && substr($css, -3) == '-->') {
|
||||||
|
$css = substr($css, 0, -3);
|
||||||
|
}
|
||||||
|
$css = trim($css);
|
||||||
$this->_tidy->parse($css);
|
$this->_tidy->parse($css);
|
||||||
$css_definition = $config->getDefinition('CSS');
|
$css_definition = $config->getDefinition('CSS');
|
||||||
foreach ($this->_tidy->css as $k => $decls) {
|
foreach ($this->_tidy->css as $k => $decls) {
|
||||||
|
@ -112,6 +112,8 @@ class HTMLPurifier_Generator
|
|||||||
} elseif ($token instanceof HTMLPurifier_Token_Text) {
|
} elseif ($token instanceof HTMLPurifier_Token_Text) {
|
||||||
return $this->escape($token->data);
|
return $this->escape($token->data);
|
||||||
|
|
||||||
|
} elseif ($token instanceof HTMLPurifier_Token_Comment) {
|
||||||
|
return '<!--' . $token->data . '-->';
|
||||||
} else {
|
} else {
|
||||||
return '';
|
return '';
|
||||||
|
|
||||||
|
@ -112,7 +112,6 @@ class HTMLPurifier_Lexer
|
|||||||
case 'DirectLex':
|
case 'DirectLex':
|
||||||
return new HTMLPurifier_Lexer_DirectLex();
|
return new HTMLPurifier_Lexer_DirectLex();
|
||||||
case 'PH5P':
|
case 'PH5P':
|
||||||
// experimental Lexer that must be manually included
|
|
||||||
return new HTMLPurifier_Lexer_PH5P();
|
return new HTMLPurifier_Lexer_PH5P();
|
||||||
default:
|
default:
|
||||||
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
||||||
|
@ -91,7 +91,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
$last = end($tokens);
|
$last = end($tokens);
|
||||||
$data = $node->data;
|
$data = $node->data;
|
||||||
// (note $node->tagname is already normalized)
|
// (note $node->tagname is already normalized)
|
||||||
if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
|
if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
|
||||||
$new_data = trim($data);
|
$new_data = trim($data);
|
||||||
if (substr($new_data, 0, 4) === '<!--') {
|
if (substr($new_data, 0, 4) === '<!--') {
|
||||||
$data = substr($new_data, 4);
|
$data = substr($new_data, 4);
|
||||||
|
@ -81,7 +81,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
$cursor > 0 && // cursor is further than zero
|
$cursor > 0 && // cursor is further than zero
|
||||||
$loops % $synchronize_interval === 0 // time to synchronize!
|
$loops % $synchronize_interval === 0 // time to synchronize!
|
||||||
) {
|
) {
|
||||||
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
|
$current_line = 1 + substr_count($html, $nl, 0, $cursor);
|
||||||
}
|
}
|
||||||
|
|
||||||
$position_next_lt = strpos($html, '<', $cursor);
|
$position_next_lt = strpos($html, '<', $cursor);
|
||||||
@ -106,7 +106,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
);
|
);
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
|
$current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$cursor = $position_next_lt + 1;
|
$cursor = $position_next_lt + 1;
|
||||||
@ -150,7 +150,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
// Check if it's a comment
|
// Check if it's a comment
|
||||||
if (
|
if (
|
||||||
substr($segment, 0, 3) === '!--'
|
strncmp('!--', $segment, 3) === 0
|
||||||
) {
|
) {
|
||||||
// re-determine segment length, looking for -->
|
// re-determine segment length, looking for -->
|
||||||
$position_comment_end = strpos($html, '-->', $cursor);
|
$position_comment_end = strpos($html, '-->', $cursor);
|
||||||
@ -168,13 +168,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
$segment = substr($html, $cursor, $strlen_segment);
|
$segment = substr($html, $cursor, $strlen_segment);
|
||||||
$token = new
|
$token = new
|
||||||
HTMLPurifier_Token_Comment(
|
HTMLPurifier_Token_Comment(
|
||||||
substr(
|
substr($segment, 3)
|
||||||
$segment, 3, $strlen_segment - 3
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
$current_line += substr_count($html, $nl, $cursor, $strlen_segment);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
|
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
|
||||||
@ -189,7 +187,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
$token = new HTMLPurifier_Token_End($type);
|
$token = new HTMLPurifier_Token_End($type);
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
@ -213,7 +211,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
);
|
);
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
@ -242,7 +240,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
}
|
}
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
@ -274,7 +272,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
}
|
}
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
@ -302,22 +300,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
return $array;
|
return $array;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* PHP 4 compatible substr_count that implements offset and length
|
|
||||||
*/
|
|
||||||
protected function substrCount($haystack, $needle, $offset, $length) {
|
|
||||||
static $oldVersion;
|
|
||||||
if ($oldVersion === null) {
|
|
||||||
$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
|
|
||||||
}
|
|
||||||
if ($oldVersion) {
|
|
||||||
$haystack = substr($haystack, $offset, $length);
|
|
||||||
return substr_count($haystack, $needle);
|
|
||||||
} else {
|
|
||||||
return substr_count($haystack, $needle, $offset, $length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes the inside of an HTML tag and makes an assoc array of attributes.
|
* Takes the inside of an HTML tag and makes an assoc array of attributes.
|
||||||
*
|
*
|
||||||
|
@ -15,3 +15,21 @@ function escapeHTML($string) {
|
|||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc()) {
|
||||||
|
function fix_magic_quotes(&$array) {
|
||||||
|
foreach ($array as $k => $val) {
|
||||||
|
if (!is_array($val)) {
|
||||||
|
$array[$k] = stripslashes($val);
|
||||||
|
} else {
|
||||||
|
fix_magic_quotes($array[$k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fix_magic_quotes($_GET);
|
||||||
|
fix_magic_quotes($_POST);
|
||||||
|
fix_magic_quotes($_COOKIE);
|
||||||
|
fix_magic_quotes($_REQUEST);
|
||||||
|
fix_magic_quotes($_ENV);
|
||||||
|
fix_magic_quotes($_SERVER);
|
||||||
|
}
|
||||||
|
@ -55,18 +55,6 @@ function isInScopes($array = array()) {
|
|||||||
}
|
}
|
||||||
/**#@-*/
|
/**#@-*/
|
||||||
|
|
||||||
function printTokens($tokens, $index = null) {
|
|
||||||
$string = '<pre>';
|
|
||||||
$generator = new HTMLPurifier_Generator();
|
|
||||||
foreach ($tokens as $i => $token) {
|
|
||||||
if ($index === $i) $string .= '[<strong>';
|
|
||||||
$string .= "<sup>$i</sup>";
|
|
||||||
$string .= $generator->escape($generator->generateFromToken($token));
|
|
||||||
if ($index === $i) $string .= '</strong>]';
|
|
||||||
}
|
|
||||||
$string .= '</pre>';
|
|
||||||
echo $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The debugging singleton. Most interesting stuff happens here.
|
* The debugging singleton. Most interesting stuff happens here.
|
||||||
|
@ -168,6 +168,19 @@ text-align:right;
|
|||||||
|
|
||||||
p p div {
|
p p div {
|
||||||
text-align:left;
|
text-align:left;
|
||||||
|
}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_removeComments() {
|
||||||
|
$this->assertCleanCSS(
|
||||||
|
"<!--
|
||||||
|
div {
|
||||||
|
text-align:right;
|
||||||
|
}
|
||||||
|
-->",
|
||||||
|
"div {
|
||||||
|
text-align:right;
|
||||||
}"
|
}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -509,6 +509,29 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_tokenizeHTML_() {
|
||||||
|
$this->assertTokenization(
|
||||||
|
'<style type="text/css"><!--
|
||||||
|
div {}
|
||||||
|
--></style>',
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
|
||||||
|
new HTMLPurifier_Token_Text("\ndiv {}\n"),
|
||||||
|
new HTMLPurifier_Token_End('style'),
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
// PH5P doesn't seem to like style tags
|
||||||
|
'PH5P' => false,
|
||||||
|
// DirectLex defers to RemoveForeignElements for textification
|
||||||
|
'DirectLex' => array(
|
||||||
|
new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
|
||||||
|
new HTMLPurifier_Token_Comment("\ndiv {}\n"),
|
||||||
|
new HTMLPurifier_Token_End('style'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
function test_tokenizeHTML_() {
|
function test_tokenizeHTML_() {
|
||||||
|
@ -159,4 +159,20 @@ function htmlpurifier_add_test($test, $test_file, $only_phpt = false) {
|
|||||||
default:
|
default:
|
||||||
trigger_error("$test_file is an invalid file for testing", E_USER_ERROR);
|
trigger_error("$test_file is an invalid file for testing", E_USER_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Debugging function that prints tokens in a user-friendly manner.
|
||||||
|
*/
|
||||||
|
function printTokens($tokens, $index = null) {
|
||||||
|
$string = '<pre>';
|
||||||
|
$generator = new HTMLPurifier_Generator();
|
||||||
|
foreach ($tokens as $i => $token) {
|
||||||
|
if ($index === $i) $string .= '[<strong>';
|
||||||
|
$string .= "<sup>$i</sup>";
|
||||||
|
$string .= $generator->escape($generator->generateFromToken($token));
|
||||||
|
if ($index === $i) $string .= '</strong>]';
|
||||||
|
}
|
||||||
|
$string .= '</pre>';
|
||||||
|
echo $string;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user