0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-12 16:38:40 +00:00

[2.1.0] Genericize element contents removal. This is done in a slightly hacky way since ElementDef is not available, but should be sufficient.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1313 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-07-11 20:42:58 +00:00
parent 732fe5cad7
commit 89622c964e
5 changed files with 56 additions and 14 deletions

3
NEWS
View File

@ -31,6 +31,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
%HTML.Allowed %HTML.Allowed
. ConfigForm generates textareas instead of text inputs for lists, hashes, . ConfigForm generates textareas instead of text inputs for lists, hashes,
lookups, text and itext fields lookups, text and itext fields
. Hidden element content removal genericized: %Core.HiddenElements can
be used to customize this behavior, by default <script> and <style> are
hidden
2.0.1, released 2007-06-27 2.0.1, released 2007-06-27
! Tag auto-closing now based on a ChildDef heuristic rather than a ! Tag auto-closing now based on a ChildDef heuristic rather than a

View File

@ -28,7 +28,7 @@ $messages = array(
'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', 'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text',
'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', 'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', 'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Script removed', 'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed',
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', 'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',

View File

@ -8,19 +8,38 @@ require_once 'HTMLPurifier/TagTransform.php';
require_once 'HTMLPurifier/AttrValidator.php'; require_once 'HTMLPurifier/AttrValidator.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveInvalidImg', true, 'bool', 'Core', 'RemoveInvalidImg', true, 'bool', '
'This directive enables pre-emptive URI checking in <code>img</code> '. <p>
'tags, as the attribute validation strategy is not authorized to '. This directive enables pre-emptive URI checking in <code>img</code>
'remove elements from the document. This directive has been available '. tags, as the attribute validation strategy is not authorized to
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.' remove elements from the document. This directive has been available
since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
</p>
'
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveScriptContents', true, 'bool', ' 'Core', 'RemoveScriptContents', null, 'bool/null', '
<p> <p>
This directive enables HTML Purifier to remove not only script tags This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been available since 2.0.0, but all of their contents. This directive has been deprecated since 2.1.0,
revert to pre-2.0.0 behavior by setting to false. and when not set the value of %Core.HiddenElements will take
precedence. This directive has been available since 2.0.0, and can be used to
revert to pre-2.0.0 behavior by setting it to false.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
<p>
This directive is a lookup array of elements which should have their
contents removed when they are not allowed by the HTML definition.
For example, the contents of a <code>script</code> tag are not
normally shown in a document, so if script tags are to be removed,
their contents should be removed to. This is opposed to a <code>b</code>
tag, which defines some presentational changes but does not hide its
contents.
</p> </p>
' '
); );
@ -43,7 +62,16 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
$remove_script_contents = $config->get('Core', 'RemoveScriptContents'); $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
$hidden_elements = $config->get('Core', 'HiddenElements');
// remove script contents compatibility
if ($remove_script_contents === true) {
$hidden_elements['script'] = true;
} elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
unset($hidden_elements['script']);
}
$attr_validator = new HTMLPurifier_AttrValidator(); $attr_validator = new HTMLPurifier_AttrValidator();
@ -107,7 +135,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} }
// CAN BE GENERICIZED // CAN BE GENERICIZED
if ($token->name == 'script' && $token->type == 'start') { if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
$textify_comments = $token->name; $textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') { } elseif ($token->name === $textify_comments && $token->type == 'end') {
$textify_comments = false; $textify_comments = false;
@ -122,7 +150,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else { } else {
// check if we need to destroy all of the tag's children // check if we need to destroy all of the tag's children
// CAN BE GENERICIZED // CAN BE GENERICIZED
if (($token->name == 'script' && $remove_script_contents) || $token->name == 'style') { if (isset($hidden_elements[$token->name])) {
if ($token->type == 'start') { if ($token->type == 'start') {
$remove_until = $token->name; $remove_until = $token->name;
} elseif ($token->type == 'empty') { } elseif ($token->type == 'empty') {
@ -130,7 +158,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else { } else {
$remove_until = false; $remove_until = false;
} }
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed'); if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
} else { } else {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
} }

View File

@ -30,12 +30,23 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
'' ''
); );
$this->assertResult(
'<style>.foo {blink;}</style>',
''
);
$this->assertResult( $this->assertResult(
'<script>alert();</script>', '<script>alert();</script>',
'alert();', 'alert();',
array('Core.RemoveScriptContents' => false) array('Core.RemoveScriptContents' => false)
); );
$this->assertResult(
'<script>alert();</script>',
'alert();',
array('Core.HiddenElements' => array())
);
$this->assertResult( $this->assertResult(
'<menu><li>Item 1</li></menu>', '<menu><li>Item 1</li></menu>',
'<ul><li>Item 1</li></ul>' '<ul><li>Item 1</li></ul>'

View File

@ -48,8 +48,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
$this->invoke('<!-- test -->'); $this->invoke('<!-- test -->');
} }
function testScriptRemoved() { function testForeignMetaElementRemoved() {
$this->collector->expectAt(0, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Script removed')); $this->collector->expectAt(0, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'));
$this->collector->expectContextAt(0, 'CurrentToken', new HTMLPurifier_Token_Start('script', array(), 1)); $this->collector->expectContextAt(0, 'CurrentToken', new HTMLPurifier_Token_Start('script', array(), 1));
$this->collector->expectAt(1, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', 'script')); $this->collector->expectAt(1, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', 'script'));
$this->invoke('<script>asdf'); $this->invoke('<script>asdf');