mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-18 11:41:52 +00:00
[2.0.1] Improve special case handling for <script>
- DirectLex now honors comments with greater than or less than signs in them - Comments are transformed into script elements, ending comments are scrapped - Buggy generator code rewritten to be more error-proof - AttrValidator checks if token has attributes before processing - Remove invalid documentation from Scripting - "Commenting" of script elements switched to the more advanced version git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1189 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
e55551ecdd
commit
bf0d659c47
3
NEWS
3
NEWS
@ -9,6 +9,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
2.0.1, unknown release date
|
||||||
|
- Clean up special case code for <script> tags
|
||||||
|
|
||||||
2.0.0, released 2007-06-20
|
2.0.0, released 2007-06-20
|
||||||
# Completely refactored HTMLModuleManager, decentralizing safety
|
# Completely refactored HTMLModuleManager, decentralizing safety
|
||||||
information
|
information
|
||||||
|
@ -8,6 +8,8 @@ class HTMLPurifier_AttrValidator
|
|||||||
|
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
|
|
||||||
|
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
|
||||||
|
|
||||||
// create alias to global definition array, see also $defs
|
// create alias to global definition array, see also $defs
|
||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$d_defs = $definition->info_global_attr;
|
$d_defs = $definition->info_global_attr;
|
||||||
|
@ -4,7 +4,7 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'Output', 'CommentScriptContents', true, 'bool',
|
'Output', 'CommentScriptContents', true, 'bool',
|
||||||
'Determines whether or not HTML Purifier should attempt to fix up '.
|
'Determines whether or not HTML Purifier should attempt to fix up '.
|
||||||
'the contents of script tags for legacy browsers with comments. This '.
|
'the contents of script tags for legacy browsers with comments. This '.
|
||||||
'directive was available since 1.7.'
|
'directive was available since 2.0.0.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
|
HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
|
||||||
|
|
||||||
@ -76,13 +76,17 @@ class HTMLPurifier_Generator
|
|||||||
|
|
||||||
if (!$tokens) return '';
|
if (!$tokens) return '';
|
||||||
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
|
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
|
||||||
if ($this->_scriptFix && $tokens[$i]->name === 'script') {
|
if ($this->_scriptFix && $tokens[$i]->name === 'script'
|
||||||
|
&& $i + 2 < $size && $tokens[$i+2]->type == 'end') {
|
||||||
// script special case
|
// script special case
|
||||||
|
// the contents of the script block must be ONE token
|
||||||
|
// for this to work
|
||||||
$html .= $this->generateFromToken($tokens[$i++]);
|
$html .= $this->generateFromToken($tokens[$i++]);
|
||||||
$html .= $this->generateScriptFromToken($tokens[$i++]);
|
$html .= $this->generateScriptFromToken($tokens[$i++]);
|
||||||
while ($tokens[$i]->name != 'script') {
|
// We're not going to do this: it wouldn't be valid anyway
|
||||||
$html .= $this->generateScriptFromToken($tokens[$i++]);
|
//while ($tokens[$i]->name != 'script') {
|
||||||
}
|
// $html .= $this->generateScriptFromToken($tokens[$i++]);
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
$html .= $this->generateFromToken($tokens[$i]);
|
$html .= $this->generateFromToken($tokens[$i]);
|
||||||
}
|
}
|
||||||
@ -148,10 +152,12 @@ class HTMLPurifier_Generator
|
|||||||
* --> somewhere inside the script contents.
|
* --> somewhere inside the script contents.
|
||||||
*/
|
*/
|
||||||
function generateScriptFromToken($token) {
|
function generateScriptFromToken($token) {
|
||||||
if (!$token->type == 'text') return $this->generateFromToken($token);
|
if ($token->type != 'text') return $this->generateFromToken($token);
|
||||||
return '<!--' . PHP_EOL . $token->data . PHP_EOL . '// -->';
|
// return '<!--' . PHP_EOL . trim($token->data) . PHP_EOL . '// -->';
|
||||||
// more advanced version:
|
// more advanced version:
|
||||||
// return '<!--//--><![CDATA[//><!--' . PHP_EOL . $token->data . PHP_EOL . '//--><!]]>';
|
// thanks <http://lachy.id.au/log/2005/05/script-comments>
|
||||||
|
$data = preg_replace('#//\s*$#', '', $token->data);
|
||||||
|
return '<!--//--><![CDATA[//><!--' . PHP_EOL . trim($data) . PHP_EOL . '//--><!]]>';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -5,14 +5,6 @@
|
|||||||
WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
|
WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
|
||||||
INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
|
INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
|
||||||
|
|
||||||
Usage:
|
|
||||||
|
|
||||||
require_once 'HTMLPurifier/HTMLModule/Scripting.php';
|
|
||||||
$def =& $config->getHTMLDefinition(true); // get the raw version
|
|
||||||
$def->manager->addModule('Scripting');
|
|
||||||
|
|
||||||
This must come before any other calls to getHTMLDefinition()
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -63,6 +55,7 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
|
|||||||
);
|
);
|
||||||
$this->info['script']->content_model = '#PCDATA';
|
$this->info['script']->content_model = '#PCDATA';
|
||||||
$this->info['script']->content_model_type = 'optional';
|
$this->info['script']->content_model_type = 'optional';
|
||||||
|
$this->info['script']->attr_transform_pre['type'] =
|
||||||
$this->info['script']->attr_transform_post['type'] =
|
$this->info['script']->attr_transform_post['type'] =
|
||||||
new HTMLPurifier_AttrTransform_ScriptRequired();
|
new HTMLPurifier_AttrTransform_ScriptRequired();
|
||||||
}
|
}
|
||||||
|
@ -259,7 +259,19 @@ class HTMLPurifier_Lexer
|
|||||||
*/
|
*/
|
||||||
function escapeCDATA($string) {
|
function escapeCDATA($string) {
|
||||||
return preg_replace_callback(
|
return preg_replace_callback(
|
||||||
'/<!\[CDATA\[(.+?)\]\]>/',
|
'/<!\[CDATA\[(.+?)\]\]>/s',
|
||||||
|
array('HTMLPurifier_Lexer', 'CDATACallback'),
|
||||||
|
$string
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special CDATA case that is especiall convoluted for <script>
|
||||||
|
*/
|
||||||
|
function escapeCommentedCDATA($string) {
|
||||||
|
// <!--//--><![CDATA[//><!--
|
||||||
|
return preg_replace_callback(
|
||||||
|
'#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
|
||||||
array('HTMLPurifier_Lexer', 'CDATACallback'),
|
array('HTMLPurifier_Lexer', 'CDATACallback'),
|
||||||
$string
|
$string
|
||||||
);
|
);
|
||||||
@ -291,6 +303,11 @@ class HTMLPurifier_Lexer
|
|||||||
$html = $this->extractBody($html);
|
$html = $this->extractBody($html);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($config->get('HTML', 'Trusted')) {
|
||||||
|
// escape convoluted CDATA
|
||||||
|
$html = $this->escapeCommentedCDATA($html);
|
||||||
|
}
|
||||||
|
|
||||||
// escape CDATA
|
// escape CDATA
|
||||||
$html = $this->escapeCDATA($html);
|
$html = $this->escapeCDATA($html);
|
||||||
|
|
||||||
|
@ -83,10 +83,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
// intercept non element nodes. WE MUST catch all of them,
|
// intercept non element nodes. WE MUST catch all of them,
|
||||||
// but we're not getting the character reference nodes because
|
// but we're not getting the character reference nodes because
|
||||||
// those should have been preprocessed
|
// those should have been preprocessed
|
||||||
if ($node->nodeType === XML_TEXT_NODE ||
|
if ($node->nodeType === XML_TEXT_NODE) {
|
||||||
$node->nodeType === XML_CDATA_SECTION_NODE) {
|
|
||||||
$tokens[] = $this->factory->createText($node->data);
|
$tokens[] = $this->factory->createText($node->data);
|
||||||
return;
|
return;
|
||||||
|
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||||
|
// undo DOM's special treatment of <script> tags
|
||||||
|
$tokens[] = $this->factory->createText($this->parseData($node->data));
|
||||||
|
return;
|
||||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||||
$tokens[] = $this->factory->createComment($node->data);
|
$tokens[] = $this->factory->createComment($node->data);
|
||||||
return;
|
return;
|
||||||
|
@ -126,22 +126,34 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
// Check if it's a comment
|
// Check if it's a comment
|
||||||
if (
|
if (
|
||||||
substr($segment, 0, 3) == '!--' &&
|
substr($segment, 0, 3) == '!--'
|
||||||
substr($segment, $strlen_segment-2, 2) == '--'
|
|
||||||
) {
|
) {
|
||||||
|
// re-determine segment length, looking for -->
|
||||||
|
$position_comment_end = strpos($html, '-->', $cursor);
|
||||||
|
if ($position_comment_end === false) {
|
||||||
|
// uh oh, we have a comment that extends to
|
||||||
|
// infinity. Can't be helped: set comment
|
||||||
|
// end position to end of string
|
||||||
|
$position_comment_end = strlen($html);
|
||||||
|
$end = true;
|
||||||
|
} else {
|
||||||
|
$end = false;
|
||||||
|
}
|
||||||
|
$strlen_segment = $position_comment_end - $cursor;
|
||||||
|
$segment = substr($html, $cursor, $strlen_segment);
|
||||||
$token = new
|
$token = new
|
||||||
HTMLPurifier_Token_Comment(
|
HTMLPurifier_Token_Comment(
|
||||||
substr(
|
substr(
|
||||||
$segment, 3, $strlen_segment - 5
|
$segment, 3, $strlen_segment - 3
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
||||||
}
|
}
|
||||||
$array[] = $token;
|
$array[] = $token;
|
||||||
|
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
$cursor = $position_next_gt + 1;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,9 +17,11 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'RemoveScriptContents', true, 'bool', '
|
'Core', 'RemoveScriptContents', true, 'bool', '
|
||||||
This directive enables HTML Purifier to remove not only script tags
|
<p>
|
||||||
but all of their contents. This directive has been available since 2.0.0,
|
This directive enables HTML Purifier to remove not only script tags
|
||||||
revert to pre-2.0.0 behavior by setting to false.
|
but all of their contents. This directive has been available since 2.0.0,
|
||||||
|
revert to pre-2.0.0 behavior by setting to false.
|
||||||
|
</p>
|
||||||
'
|
'
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -48,6 +50,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
// removes tokens until it reaches a closing tag with its value
|
// removes tokens until it reaches a closing tag with its value
|
||||||
$remove_until = false;
|
$remove_until = false;
|
||||||
|
|
||||||
|
// converts comments into text tokens when this is equal to a tag name
|
||||||
|
$textify_comments = false;
|
||||||
|
|
||||||
foreach($tokens as $token) {
|
foreach($tokens as $token) {
|
||||||
if ($remove_until) {
|
if ($remove_until) {
|
||||||
if (empty($token->is_tag) || $token->name !== $remove_until) {
|
if (empty($token->is_tag) || $token->name !== $remove_until) {
|
||||||
@ -88,6 +93,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
$token->armor['ValidateAttributes'] = true;
|
$token->armor['ValidateAttributes'] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CAN BE GENERICIZED
|
||||||
|
if ($token->name == 'script' && $token->type == 'start') {
|
||||||
|
$textify_comments = $token->name;
|
||||||
|
} elseif ($token->name === $textify_comments && $token->type == 'end') {
|
||||||
|
$textify_comments = false;
|
||||||
|
}
|
||||||
|
|
||||||
} elseif ($escape_invalid_tags) {
|
} elseif ($escape_invalid_tags) {
|
||||||
// invalid tag, generate HTML and insert in
|
// invalid tag, generate HTML and insert in
|
||||||
$token = new HTMLPurifier_Token_Text(
|
$token = new HTMLPurifier_Token_Text(
|
||||||
@ -108,8 +120,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} elseif ($token->type == 'comment') {
|
} elseif ($token->type == 'comment') {
|
||||||
|
// textify comments in script tags when they are allowed
|
||||||
|
if ($textify_comments !== false) {
|
||||||
|
$data = $token->data;
|
||||||
|
$token = new HTMLPurifier_Token_Text($data);
|
||||||
|
} else {
|
||||||
// strip comments
|
// strip comments
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
|
@ -141,9 +141,31 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
|||||||
new HTMLPurifier_Token_Text('alert(3 < 5);'),
|
new HTMLPurifier_Token_Text('alert(3 < 5);'),
|
||||||
new HTMLPurifier_Token_End('script')
|
new HTMLPurifier_Token_End('script')
|
||||||
),
|
),
|
||||||
"<script><!--\nalert(3 < 5);\n// --></script>"
|
"<script><!--//--><![CDATA[//><!--\nalert(3 < 5);\n//--><!]]></script>"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// if missing close tag, don't do anything
|
||||||
|
$this->assertGeneration(
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('script'),
|
||||||
|
new HTMLPurifier_Token_Text('alert(3 < 5);'),
|
||||||
|
),
|
||||||
|
"<script>alert(3 < 5);"
|
||||||
|
);
|
||||||
|
|
||||||
|
// if two script blocks, don't do anything
|
||||||
|
$this->assertGeneration(
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('script'),
|
||||||
|
new HTMLPurifier_Token_Text('alert(3 < 5);'),
|
||||||
|
new HTMLPurifier_Token_Text('foo();'),
|
||||||
|
new HTMLPurifier_Token_End('script')
|
||||||
|
),
|
||||||
|
"<script>alert(3 < 5);foo();</script>"
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
$this->config = HTMLPurifier_Config::createDefault();
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
$this->config->set('Core', 'CommentScriptContents', false);
|
$this->config->set('Core', 'CommentScriptContents', false);
|
||||||
|
|
||||||
|
@ -18,6 +18,14 @@ class HTMLPurifier_HTMLModule_ScriptingTest extends HTMLPurifier_HTMLModuleHarne
|
|||||||
array('HTML.Trusted' => true)
|
array('HTML.Trusted' => true)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// CDATA
|
||||||
|
$this->assertResult(
|
||||||
|
'//<![CDATA[
|
||||||
|
alert("<This is compatible with XHTML>");
|
||||||
|
//]]> ', true,
|
||||||
|
array('HTML.Trusted' => true)
|
||||||
|
);
|
||||||
|
|
||||||
// max
|
// max
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<script
|
'<script
|
||||||
|
@ -299,6 +299,22 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
$sax_expect[19] = false; // SAX drops the < character
|
$sax_expect[19] = false; // SAX drops the < character
|
||||||
$dom_expect[19] = false; // DOM drops the entire pseudo-tag
|
$dom_expect[19] = false; // DOM drops the entire pseudo-tag
|
||||||
|
|
||||||
|
// test comment parsing with funky characters inside
|
||||||
|
$input[20] = '<!-- This >< comment --><br />';
|
||||||
|
$expect[20] = array(
|
||||||
|
new HTMLPurifier_Token_Comment(' This >< comment '),
|
||||||
|
new HTMLPurifier_Token_Empty('br')
|
||||||
|
);
|
||||||
|
$sax_expect[20] = false;
|
||||||
|
|
||||||
|
// test comment parsing of missing end
|
||||||
|
$input[21] = '<!-- This >< comment';
|
||||||
|
$expect[21] = array(
|
||||||
|
new HTMLPurifier_Token_Comment(' This >< comment')
|
||||||
|
);
|
||||||
|
$sax_expect[21] = false;
|
||||||
|
$dom_expect[21] = false;
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
$default_config = HTMLPurifier_Config::createDefault();
|
||||||
$default_context = new HTMLPurifier_Context();
|
$default_context = new HTMLPurifier_Context();
|
||||||
foreach($input as $i => $discard) {
|
foreach($input as $i => $discard) {
|
||||||
|
@ -79,6 +79,18 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
|
|||||||
array('HTML.Allowed' => 'div')
|
array('HTML.Allowed' => 'div')
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// text-ify commented script contents ( the trailing comment gets
|
||||||
|
// removed during generation )
|
||||||
|
$this->assertResult(
|
||||||
|
'<script type="text/javascript"><!--
|
||||||
|
alert(<b>bold</b>);
|
||||||
|
// --></script>',
|
||||||
|
'<script type="text/javascript">
|
||||||
|
alert(<b>bold</b>);
|
||||||
|
// </script>',
|
||||||
|
array('HTML.Trusted' => true, 'Output.CommentScriptContents' => false)
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -12,9 +12,9 @@ class HTMLPurifierTest extends UnitTestCase
|
|||||||
$this->purifier = new HTMLPurifier();
|
$this->purifier = new HTMLPurifier();
|
||||||
}
|
}
|
||||||
|
|
||||||
function assertPurification($input, $expect = null) {
|
function assertPurification($input, $expect = null, $config = array()) {
|
||||||
if ($expect === null) $expect = $input;
|
if ($expect === null) $expect = $input;
|
||||||
$result = $this->purifier->purify($input);
|
$result = $this->purifier->purify($input, $config);
|
||||||
$this->assertIdentical($expect, $result);
|
$this->assertIdentical($expect, $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,6 +97,41 @@ class HTMLPurifierTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testScript() {
|
||||||
|
$this->purifier = new HTMLPurifier(array('HTML.Trusted' => true));
|
||||||
|
$ideal = '<script type="text/javascript"><!--//--><![CDATA[//><!--
|
||||||
|
alert("<This is compatible with XHTML>");
|
||||||
|
//--><!]]></script>';
|
||||||
|
|
||||||
|
$this->assertPurification($ideal);
|
||||||
|
|
||||||
|
$this->assertPurification(
|
||||||
|
'<script type="text/javascript"><![CDATA[
|
||||||
|
alert("<This is compatible with XHTML>");
|
||||||
|
]]></script>',
|
||||||
|
$ideal
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertPurification(
|
||||||
|
'<script type="text/javascript">alert("<This is compatible with XHTML>");</script>',
|
||||||
|
$ideal
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertPurification(
|
||||||
|
'<script type="text/javascript"><!--
|
||||||
|
alert("<This is compatible with XHTML>");
|
||||||
|
//--></script>',
|
||||||
|
$ideal
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertPurification(
|
||||||
|
'<script type="text/javascript"><![CDATA[
|
||||||
|
alert("<This is compatible with XHTML>");
|
||||||
|
//]]></script>',
|
||||||
|
$ideal
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue
Block a user