0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00

[2.0.1] Fix DirectLex's incomprehension of un-armored script contents as CDATA using custom preg_replace_callback

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1244 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-26 16:08:42 +00:00
parent ae90bb919d
commit 275932ec05
4 changed files with 28 additions and 8 deletions

View File

@ -274,7 +274,6 @@ class HTMLPurifier_Lexer
* Special CDATA case that is especiall convoluted for <script> * Special CDATA case that is especiall convoluted for <script>
*/ */
function escapeCommentedCDATA($string) { function escapeCommentedCDATA($string) {
// <!--//--><![CDATA[//><!--
return preg_replace_callback( return preg_replace_callback(
'#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s', '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s',
array('HTMLPurifier_Lexer', 'CDATACallback'), array('HTMLPurifier_Lexer', 'CDATACallback'),

View File

@ -35,8 +35,25 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
*/ */
var $_whitespace = "\x20\x09\x0D\x0A"; var $_whitespace = "\x20\x09\x0D\x0A";
/**
* Callback function for script CDATA fudge
* @param $matches, in form of array(opening tag, contents, closing tag)
* @static
*/
function scriptCallback($matches) {
return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
}
function tokenizeHTML($html, $config, &$context) { function tokenizeHTML($html, $config, &$context) {
// special normalization for script tags without any armor
// our "armor" heurstic is a < sign any number of whitespaces after
// the first script tag
if ($config->get('HTML', 'Trusted')) {
$html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
array('HTMLPurifier_Lexer_DirectLex', 'scriptCallback'), $html);
}
$html = $this->normalize($html, $config, $context); $html = $this->normalize($html, $config, $context);
$cursor = 0; // our location in the text $cursor = 0; // our location in the text

View File

@ -46,13 +46,6 @@ alert("<This is compatible with XHTML>");
array('HTML.Trusted' => true, 'Core.CommentScriptContents' => false) array('HTML.Trusted' => true, 'Core.CommentScriptContents' => false)
); );
// invalid children
$this->assertResult(
'<script type="text/javascript">PCDATA<span</script>',
'<script type="text/javascript">PCDATA</script>',
array('HTML.Trusted' => true, 'Core.CommentScriptContents' => false)
);
} }
} }

View File

@ -315,6 +315,17 @@ class HTMLPurifier_LexerTest extends UnitTestCase
$sax_expect[21] = false; $sax_expect[21] = false;
$dom_expect[21] = false; $dom_expect[21] = false;
// test CDATA tags
$input[22] = '<script>alert("<foo>");</script>';
$expect[22] = array(
new HTMLPurifier_Token_Start('script')
,new HTMLPurifier_Token_Text('alert("<foo>");')
,new HTMLPurifier_Token_End('script')
);
$config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
$sax_expect[22] = false;
//$dom_expect[22] = false;
$default_config = HTMLPurifier_Config::createDefault(); $default_config = HTMLPurifier_Config::createDefault();
$default_context = new HTMLPurifier_Context(); $default_context = new HTMLPurifier_Context();
foreach($input as $i => $discard) { foreach($input as $i => $discard) {