0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-20 12:31:53 +00:00

Fix #67, don't use <body> tags in comments for %Core.ConvertDocumentToFragment

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang 2016-03-27 15:19:32 -07:00
parent f14076dc3e
commit b4981c3395
3 changed files with 21 additions and 4 deletions

2
NEWS
View File

@ -24,6 +24,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
ul/ol without allowing li. ul/ol without allowing li.
- On some versions of PHP, the Serializer DefinitionCache could - On some versions of PHP, the Serializer DefinitionCache could
infinite loop when the directory exists but is not listable. (#49) infinite loop when the directory exists but is not listable. (#49)
- Don't match for <body> inside comments with
%Core.ConvertDocumentToFragment. (#67)
4.7.0, released 2015-08-04 4.7.0, released 2015-08-04
# opacity is now considered a "tricky" CSS property rather than a # opacity is now considered a "tricky" CSS property rather than a

View File

@ -345,13 +345,18 @@ class HTMLPurifier_Lexer
public function extractBody($html) public function extractBody($html)
{ {
$matches = array(); $matches = array();
$result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches); $result = preg_match('|(.*?)<body[^>]*>(.*)</body>|is', $html, $matches);
if ($result) { if ($result) {
return $matches[1]; // Make sure it's not in a comment
} else { $comment_start = strrpos($matches[1], '<!--');
return $html; $comment_end = strrpos($matches[1], '-->');
if ($comment_start === false ||
($comment_end !== false && $comment_end > $comment_start)) {
return $matches[2];
} }
} }
return $html;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -169,6 +169,16 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
$this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar'); $this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');
} }
public function test_extractBody_ignoreCommented()
{
$this->assertExtractBody('$<!-- <body>foo</body> -->^');
}
public function test_extractBody_butCanStillWork()
{
$this->assertExtractBody('<!-- b --><body>a</body>', 'a');
}
// HTMLPurifier_Lexer->tokenizeHTML() -------------------------------------- // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
public function assertTokenization($input, $expect, $alt_expect = array()) public function assertTokenization($input, $expect, $alt_expect = array())