0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2011-12-26 15:34:42 +08:00
parent e41af46a8b
commit 6b643ede02
8 changed files with 76 additions and 12 deletions

2
NEWS
View File

@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.

View File

@ -14,7 +14,7 @@
<line>348</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>47</line>
<line>50</line>
</file>
</directive>
<directive id="CSS.MaxImgLength">
@ -478,14 +478,24 @@
<line>19</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<directive id="HTML.AllowedComments">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>24</line>
</file>
</directive>
<directive id="HTML.AllowedCommentsRegexp">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>25</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>28</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>26</line>
<line>29</line>
</file>
</directive>
<directive id="URI.HostBlacklist">

View File

@ -0,0 +1,10 @@
HTML.AllowedComments
TYPE: lookup
VERSION: 4.3.1
DEFAULT: array()
--DESCRIPTION--
A whitelist which indicates what explicit comment bodies should be
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
(these directives are union'ed together, so a comment is considered
valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,15 @@
HTML.AllowedCommentsRegexp
TYPE: string/null
VERSION: 4.3.1
DEFAULT: NULL
--DESCRIPTION--
A regexp, which if it matches the body of a comment, indicates that
it should be allowed. Trailing and leading spaces are removed prior
to running this regular expression.
<strong>Warning:</strong> Make sure you specify
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
is probably not sufficiently strict, since it also allows <code>foobar</code>.
See also %HTML.AllowedComments (these directives are union'ed together,
so a comment is considered valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements');
@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted) {
// keep, but perform comment cleaning
} elseif ($trusted || $check_comments) {
// always cleanup comments
$trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
if ($e && !$found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
$found_double_hyphen = true; // prevent double-erroring
$found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else {
// strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');

View File

@ -100,6 +100,16 @@ alert(&lt;b&gt;bold&lt;/b&gt;);
$this->assertResult('<!-- bo --- asdf--as -->', '<!-- bo - asdf-as -->');
}
function testPreserveCommentsWithLookup() {
$this->config->set('HTML.AllowedComments', array('allowed'));
$this->assertResult('<!-- allowed --><!-- not allowed -->', '<!-- allowed -->');
}
function testPreserveCommentsWithRegexp() {
$this->config->set('HTML.AllowedCommentsRegexp', '/^allowed[1-9]$/');
$this->assertResult('<!-- allowed1 --><!-- not allowed -->', '<!-- allowed1 -->');
}
}
// vim: et sw=4 sts=4

View File

@ -48,14 +48,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
function testTrailingHyphenInCommentRemoved() {
$this->config->set('HTML.Trusted', true);
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --', 1));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test ', 1));
$this->invoke('<!-- test ---->');
}
function testDoubleHyphenInCommentRemoved() {
$this->config->set('HTML.Trusted', true);
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --- test -- test ', 1));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test - test - test ', 1));
$this->invoke('<!-- test --- test -- test -->');
}