mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-23 05:41:53 +00:00
cf7a50163c
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1172 48356398-32a2-884e-a903-53898d9a118a
121 lines
4.7 KiB
PHP
121 lines
4.7 KiB
PHP
<?php
|
|
|
|
require_once 'HTMLPurifier/Strategy.php';
|
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
|
require_once 'HTMLPurifier/Generator.php';
|
|
require_once 'HTMLPurifier/TagTransform.php';
|
|
|
|
HTMLPurifier_ConfigSchema::define(
|
|
'Core', 'RemoveInvalidImg', true, 'bool',
|
|
'This directive enables pre-emptive URI checking in <code>img</code> '.
|
|
'tags, as the attribute validation strategy is not authorized to '.
|
|
'remove elements from the document. This directive has been available '.
|
|
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
|
|
);
|
|
|
|
HTMLPurifier_ConfigSchema::define(
|
|
'Core', 'RemoveScriptContents', true, 'bool', '
|
|
This directive enables HTML Purifier to remove not only script tags
|
|
but all of their contents. This directive has been available since 2.0.0,
|
|
revert to pre-2.0.0 behavior by setting to false.
|
|
'
|
|
);
|
|
|
|
/**
|
|
* Removes all unrecognized tags from the list of tokens.
|
|
*
|
|
* This strategy iterates through all the tokens and removes unrecognized
|
|
* tokens. If a token is not recognized but a TagTransform is defined for
|
|
* that element, the element will be transformed accordingly.
|
|
*/
|
|
|
|
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|
{
|
|
|
|
function execute($tokens, $config, &$context) {
|
|
$definition = $config->getHTMLDefinition();
|
|
$generator = new HTMLPurifier_Generator();
|
|
$result = array();
|
|
|
|
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
|
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
|
|
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
|
|
|
|
// removes tokens until it reaches a closing tag with its value
|
|
$remove_until = false;
|
|
|
|
foreach($tokens as $token) {
|
|
if ($remove_until) {
|
|
if (empty($token->is_tag) || $token->name !== $remove_until) {
|
|
continue;
|
|
}
|
|
}
|
|
if (!empty( $token->is_tag )) {
|
|
// DEFINITION CALL
|
|
|
|
// before any processing, try to transform the element
|
|
if (
|
|
isset($definition->info_tag_transform[$token->name])
|
|
) {
|
|
// there is a transformation for this tag
|
|
// DEFINITION CALL
|
|
$token = $definition->
|
|
info_tag_transform[$token->name]->
|
|
transform($token, $config, $context);
|
|
}
|
|
|
|
if (isset($definition->info[$token->name])) {
|
|
// leave untouched, except for a few special cases:
|
|
|
|
// hard-coded image special case, pre-emptively drop
|
|
// if not available. Probably not abstract-able
|
|
if ( $token->name == 'img' && $remove_invalid_img ) {
|
|
if (!isset($token->attr['src'])) {
|
|
continue;
|
|
}
|
|
if (!isset($definition->info['img']->attr['src'])) {
|
|
continue;
|
|
}
|
|
$token->attr['src'] =
|
|
$definition->
|
|
info['img']->
|
|
attr['src']->
|
|
validate($token->attr['src'],
|
|
$config, $context);
|
|
if ($token->attr['src'] === false) continue;
|
|
}
|
|
|
|
} elseif ($escape_invalid_tags) {
|
|
// invalid tag, generate HTML and insert in
|
|
$token = new HTMLPurifier_Token_Text(
|
|
$generator->generateFromToken($token, $config, $context)
|
|
);
|
|
} else {
|
|
// check if we need to destroy all of the tag's children
|
|
// CAN BE GENERICIZED
|
|
if ($token->name == 'script' && $remove_script_contents) {
|
|
if ($token->type == 'start') {
|
|
$remove_until = $token->name;
|
|
} elseif ($token->type == 'empty') {
|
|
// do nothing: we're still looking
|
|
} else {
|
|
$remove_until = false;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
} elseif ($token->type == 'comment') {
|
|
// strip comments
|
|
continue;
|
|
} elseif ($token->type == 'text') {
|
|
} else {
|
|
continue;
|
|
}
|
|
$result[] = $token;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
}
|
|
|
|
?>
|