0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

[1.7.0] Contents between <script> tags are now completely removed if <script> is not allowed

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1145 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-16 19:31:45 +00:00
parent 7d4b532d6b
commit e840564228
6 changed files with 47 additions and 7 deletions

2
NEWS
View File

@ -23,6 +23,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
# Definitions (esp. HTMLDefinition) are now cached for a significant # Definitions (esp. HTMLDefinition) are now cached for a significant
performance boost. You can disable caching by setting %Core.DefinitionCache performance boost. You can disable caching by setting %Core.DefinitionCache
to null. to null.
# Contents between <script> tags are now completely removed if <script>
is not allowed
! HTML Purifier now works in PHP 4.3.2. ! HTML Purifier now works in PHP 4.3.2.
! Configuration form-editing API makes tweaking HTMLPurifier_Config a ! Configuration form-editing API makes tweaking HTMLPurifier_Config a
breeze! breeze!

5
TODO
View File

@ -19,16 +19,13 @@ TODO List
- Implement IDREF support (harder than it seems, since you cannot have - Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs) IDREFs to non-existent IDs)
- Allow non-ASCII characters in font names - Allow non-ASCII characters in font names
- Genericize special cases in RemoveForeignElements
1.9 release [Error'ed] 1.9 release [Error'ed]
# Error logging for filtering/cleanup procedures # Error logging for filtering/cleanup procedures
- Requires I18N facilities to be created first (COMPLEX) - Requires I18N facilities to be created first (COMPLEX)
- XSS-attempt detection - XSS-attempt detection
- More fine-grained control over escaping behavior - More fine-grained control over escaping behavior
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
specification of elements that, when detected as foreign, trigger removal
of children, although unbalanced tags could wreck havoc (or at least
delete the rest of the document)).
1.10 release [Do What I Mean, Not What I Say] 1.10 release [Do What I Mean, Not What I Say]
# Additional support for poorly written HTML # Additional support for poorly written HTML

View File

@ -13,6 +13,14 @@ HTMLPurifier_ConfigSchema::define(
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.' 'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
); );
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveScriptContents', true, 'bool', '
This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been available since 1.7.0,
revert to pre-1.7.0 behavior by setting to false.
'
);
/** /**
* Removes all unrecognized tags from the list of tokens. * Removes all unrecognized tags from the list of tokens.
* *
@ -28,9 +36,20 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator(); $generator = new HTMLPurifier_Generator();
$result = array(); $result = array();
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
// removes tokens until it reaches a closing tag with its value
$remove_until = false;
foreach($tokens as $token) { foreach($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
continue;
}
}
if (!empty( $token->is_tag )) { if (!empty( $token->is_tag )) {
// DEFINITION CALL // DEFINITION CALL
if (isset($definition->info[$token->name])) { if (isset($definition->info[$token->name])) {
@ -68,6 +87,17 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$generator->generateFromToken($token, $config, $context) $generator->generateFromToken($token, $config, $context)
); );
} else { } else {
// check if we need to destroy all of the tag's children
// CAN BE GENERICIZED
if ($token->name == 'script' && $remove_script_contents) {
if ($token->type == 'start') {
$remove_until = $token->name;
} elseif ($token->type == 'empty') {
// do nothing: we're still looking
} else {
$remove_until = false;
}
}
continue; continue;
} }
} elseif ($token->type == 'comment') { } elseif ($token->type == 'comment') {

View File

@ -7,9 +7,9 @@ class HTMLPurifier_HTMLModule_ScriptingTest extends HTMLPurifier_HTMLModuleHarne
function test() { function test() {
// default // default (remove everything)
$this->assertResult( $this->assertResult(
'<script type="text/javascript">foo();</script>', 'foo();' '<script type="text/javascript">foo();</script>', ''
); );
// enabled // enabled

View File

@ -25,6 +25,17 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
'BlingBong' 'BlingBong'
); );
$this->assertResult(
'<script>alert();</script>',
''
);
$this->assertResult(
'<script>alert();</script>',
'alert();',
array('Core.RemoveScriptContents' => false)
);
$this->assertResult( $this->assertResult(
'<menu><li>Item 1</li></menu>', '<menu><li>Item 1</li></menu>',
'<ul><li>Item 1</li></ul>' '<ul><li>Item 1</li></ul>'

View File

@ -76,7 +76,7 @@ class HTMLPurifierTest extends UnitTestCase
$this->purifier->purifyArray( $this->purifier->purifyArray(
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>') array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
), ),
array('Good', '<b>Sketchy</b>', 'foo' => 'bad') array('Good', '<b>Sketchy</b>', 'foo' => '')
); );
$this->assertIsA($this->purifier->context, 'array'); $this->assertIsA($this->purifier->context, 'array');