0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-25 14:49:59 +00:00
htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php

117 lines
4.6 KiB
PHP

<?php
require_once 'HTMLPurifier/Strategy.php';
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/TagTransform.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveInvalidImg', true, 'bool',
'This directive enables pre-emptive URI checking in <code>img</code> '.
'tags, as the attribute validation strategy is not authorized to '.
'remove elements from the document. This directive has been available '.
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveScriptContents', true, 'bool', '
This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been available since 1.7.0,
revert to pre-1.7.0 behavior by setting to false.
'
);
/**
* Removes all unrecognized tags from the list of tokens.
*
* This strategy iterates through all the tokens and removes unrecognized
* tokens. If a token is not recognized but a TagTransform is defined for
* that element, the element will be transformed accordingly.
*/
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator();
$result = array();
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
// removes tokens until it reaches a closing tag with its value
$remove_until = false;
foreach($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
continue;
}
}
if (!empty( $token->is_tag )) {
// DEFINITION CALL
if (isset($definition->info[$token->name])) {
// leave untouched, except for a few special cases:
// hard-coded image special case, pre-emptively drop
// if not available. Probably not abstract-able
if ( $token->name == 'img' && $remove_invalid_img ) {
if (!isset($token->attr['src'])) {
continue;
}
if (!isset($definition->info['img']->attr['src'])) {
continue;
}
$token->attr['src'] =
$definition->
info['img']->
attr['src']->
validate($token->attr['src'],
$config, $context);
if ($token->attr['src'] === false) continue;
}
} elseif (
isset($definition->info_tag_transform[$token->name])
) {
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->
transform($token, $config, $context);
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
} else {
// check if we need to destroy all of the tag's children
// CAN BE GENERICIZED
if ($token->name == 'script' && $remove_script_contents) {
if ($token->type == 'start') {
$remove_until = $token->name;
} elseif ($token->type == 'empty') {
// do nothing: we're still looking
} else {
$remove_until = false;
}
}
continue;
}
} elseif ($token->type == 'comment') {
// strip comments
continue;
} elseif ($token->type == 'text') {
} else {
continue;
}
$result[] = $token;
}
return $result;
}
}
?>