mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 15:28:40 +00:00
Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
e41af46a8b
commit
6b643ede02
2
NEWS
2
NEWS
@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||
! Properly handle sub-lists directly nested inside of lists in
|
||||
a standards compliant way, by moving them into the preceding <li>
|
||||
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
|
||||
limited allowed comments in untrusted situations.
|
||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||
- Explicitly initialize anonModule variable to null.
|
||||
|
@ -14,7 +14,7 @@
|
||||
<line>348</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>47</line>
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.MaxImgLength">
|
||||
@ -478,14 +478,24 @@
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<directive id="HTML.AllowedComments">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>24</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedCommentsRegexp">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>28</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.HiddenElements">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>26</line>
|
||||
<line>29</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
|
Binary file not shown.
@ -0,0 +1,10 @@
|
||||
HTML.AllowedComments
|
||||
TYPE: lookup
|
||||
VERSION: 4.3.1
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
A whitelist which indicates what explicit comment bodies should be
|
||||
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
|
||||
(these directives are union'ed together, so a comment is considered
|
||||
valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@ -0,0 +1,15 @@
|
||||
HTML.AllowedCommentsRegexp
|
||||
TYPE: string/null
|
||||
VERSION: 4.3.1
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
A regexp, which if it matches the body of a comment, indicates that
|
||||
it should be allowed. Trailing and leading spaces are removed prior
|
||||
to running this regular expression.
|
||||
<strong>Warning:</strong> Make sure you specify
|
||||
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
|
||||
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
|
||||
is probably not sufficiently strict, since it also allows <code>foobar</code>.
|
||||
See also %HTML.AllowedComments (these directives are union'ed together,
|
||||
so a comment is considered valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
|
||||
// currently only used to determine if comments should be kept
|
||||
$trusted = $config->get('HTML.Trusted');
|
||||
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||
|
||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||
$hidden_elements = $config->get('Core.HiddenElements');
|
||||
@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if ($textify_comments !== false) {
|
||||
$data = $token->data;
|
||||
$token = new HTMLPurifier_Token_Text($data);
|
||||
} elseif ($trusted) {
|
||||
// keep, but perform comment cleaning
|
||||
} elseif ($trusted || $check_comments) {
|
||||
// always cleanup comments
|
||||
$trailing_hyphen = false;
|
||||
if ($e) {
|
||||
// perform check whether or not there's a trailing hyphen
|
||||
if (substr($token->data, -1) == '-') {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
$trailing_hyphen = true;
|
||||
}
|
||||
}
|
||||
$token->data = rtrim($token->data, '-');
|
||||
$found_double_hyphen = false;
|
||||
while (strpos($token->data, '--') !== false) {
|
||||
if ($e && !$found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
$found_double_hyphen = true; // prevent double-erroring
|
||||
$found_double_hyphen = true;
|
||||
$token->data = str_replace('--', '-', $token->data);
|
||||
}
|
||||
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||
// OK good
|
||||
if ($e) {
|
||||
if ($trailing_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
}
|
||||
if ($found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($e) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// strip comments
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
|
@ -100,6 +100,16 @@ alert(<b>bold</b>);
|
||||
$this->assertResult('<!-- bo --- asdf--as -->', '<!-- bo - asdf-as -->');
|
||||
}
|
||||
|
||||
function testPreserveCommentsWithLookup() {
|
||||
$this->config->set('HTML.AllowedComments', array('allowed'));
|
||||
$this->assertResult('<!-- allowed --><!-- not allowed -->', '<!-- allowed -->');
|
||||
}
|
||||
|
||||
function testPreserveCommentsWithRegexp() {
|
||||
$this->config->set('HTML.AllowedCommentsRegexp', '/^allowed[1-9]$/');
|
||||
$this->assertResult('<!-- allowed1 --><!-- not allowed -->', '<!-- allowed1 -->');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@ -48,14 +48,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
|
||||
function testTrailingHyphenInCommentRemoved() {
|
||||
$this->config->set('HTML.Trusted', true);
|
||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --', 1));
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test ', 1));
|
||||
$this->invoke('<!-- test ---->');
|
||||
}
|
||||
|
||||
function testDoubleHyphenInCommentRemoved() {
|
||||
$this->config->set('HTML.Trusted', true);
|
||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --- test -- test ', 1));
|
||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test - test - test ', 1));
|
||||
$this->invoke('<!-- test --- test -- test -->');
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user