2006-07-24 01:50:02 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
require_once 'HTMLPurifier/Strategy.php';
|
2006-08-14 21:22:49 +00:00
|
|
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
2006-07-24 01:50:02 +00:00
|
|
|
require_once 'HTMLPurifier/Generator.php';
|
2006-08-02 02:24:03 +00:00
|
|
|
require_once 'HTMLPurifier/TagTransform.php';
|
2006-07-24 01:50:02 +00:00
|
|
|
|
2007-06-20 21:39:28 +00:00
|
|
|
require_once 'HTMLPurifier/AttrValidator.php';
|
|
|
|
|
2006-11-23 23:59:20 +00:00
|
|
|
HTMLPurifier_ConfigSchema::define(
|
2007-07-11 20:42:58 +00:00
|
|
|
'Core', 'RemoveInvalidImg', true, 'bool', '
|
|
|
|
<p>
|
|
|
|
This directive enables pre-emptive URI checking in <code>img</code>
|
|
|
|
tags, as the attribute validation strategy is not authorized to
|
|
|
|
remove elements from the document. This directive has been available
|
|
|
|
since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
|
|
|
|
</p>
|
|
|
|
'
|
2006-11-23 23:59:20 +00:00
|
|
|
);
|
|
|
|
|
2007-06-16 19:31:45 +00:00
|
|
|
HTMLPurifier_ConfigSchema::define(
|
2007-07-11 20:42:58 +00:00
|
|
|
'Core', 'RemoveScriptContents', null, 'bool/null', '
|
2007-06-21 14:44:26 +00:00
|
|
|
<p>
|
|
|
|
This directive enables HTML Purifier to remove not only script tags
|
2007-07-11 20:42:58 +00:00
|
|
|
but all of their contents. This directive has been deprecated since 2.1.0,
|
|
|
|
and when not set the value of %Core.HiddenElements will take
|
|
|
|
precedence. This directive has been available since 2.0.0, and can be used to
|
|
|
|
revert to pre-2.0.0 behavior by setting it to false.
|
|
|
|
</p>
|
|
|
|
'
|
|
|
|
);
|
|
|
|
|
|
|
|
HTMLPurifier_ConfigSchema::define(
|
|
|
|
'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
|
|
|
|
<p>
|
|
|
|
This directive is a lookup array of elements which should have their
|
|
|
|
contents removed when they are not allowed by the HTML definition.
|
|
|
|
For example, the contents of a <code>script</code> tag are not
|
|
|
|
normally shown in a document, so if script tags are to be removed,
|
|
|
|
their contents should be removed to. This is opposed to a <code>b</code>
|
|
|
|
tag, which defines some presentational changes but does not hide its
|
|
|
|
contents.
|
2007-06-21 14:44:26 +00:00
|
|
|
</p>
|
2007-06-16 19:31:45 +00:00
|
|
|
'
|
|
|
|
);
|
|
|
|
|
2006-07-30 19:11:18 +00:00
|
|
|
/**
|
|
|
|
* Removes all unrecognized tags from the list of tokens.
|
|
|
|
*
|
|
|
|
* This strategy iterates through all the tokens and removes unrecognized
|
2006-08-02 02:26:01 +00:00
|
|
|
* tokens. If a token is not recognized but a TagTransform is defined for
|
|
|
|
* that element, the element will be transformed accordingly.
|
2006-07-30 19:11:18 +00:00
|
|
|
*/
|
|
|
|
|
2006-07-24 01:50:02 +00:00
|
|
|
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|
|
|
{
|
|
|
|
|
2006-10-01 20:47:07 +00:00
|
|
|
function execute($tokens, $config, &$context) {
|
2006-08-31 20:33:07 +00:00
|
|
|
$definition = $config->getHTMLDefinition();
|
|
|
|
$generator = new HTMLPurifier_Generator();
|
2006-07-24 01:50:02 +00:00
|
|
|
$result = array();
|
2007-06-16 19:31:45 +00:00
|
|
|
|
2006-08-15 23:58:18 +00:00
|
|
|
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
2007-04-30 00:53:13 +00:00
|
|
|
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
|
2007-07-11 20:42:58 +00:00
|
|
|
|
2007-06-16 19:31:45 +00:00
|
|
|
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
|
2007-07-11 20:42:58 +00:00
|
|
|
$hidden_elements = $config->get('Core', 'HiddenElements');
|
|
|
|
|
|
|
|
// remove script contents compatibility
|
|
|
|
if ($remove_script_contents === true) {
|
|
|
|
$hidden_elements['script'] = true;
|
|
|
|
} elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
|
|
|
|
unset($hidden_elements['script']);
|
|
|
|
}
|
2007-06-16 19:31:45 +00:00
|
|
|
|
2007-06-20 21:39:28 +00:00
|
|
|
$attr_validator = new HTMLPurifier_AttrValidator();
|
|
|
|
|
2007-06-16 19:31:45 +00:00
|
|
|
// removes tokens until it reaches a closing tag with its value
|
|
|
|
$remove_until = false;
|
|
|
|
|
2007-06-21 14:44:26 +00:00
|
|
|
// converts comments into text tokens when this is equal to a tag name
|
|
|
|
$textify_comments = false;
|
|
|
|
|
2007-06-25 01:56:00 +00:00
|
|
|
$token = false;
|
|
|
|
$context->register('CurrentToken', $token);
|
|
|
|
|
2007-06-26 02:49:21 +00:00
|
|
|
$e = false;
|
|
|
|
if ($config->get('Core', 'CollectErrors')) {
|
|
|
|
$e =& $context->get('ErrorCollector');
|
|
|
|
}
|
|
|
|
|
2006-07-24 01:50:02 +00:00
|
|
|
foreach($tokens as $token) {
|
2007-06-16 19:31:45 +00:00
|
|
|
if ($remove_until) {
|
|
|
|
if (empty($token->is_tag) || $token->name !== $remove_until) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2006-07-24 01:50:02 +00:00
|
|
|
if (!empty( $token->is_tag )) {
|
2006-07-30 19:11:18 +00:00
|
|
|
// DEFINITION CALL
|
2007-06-19 22:10:39 +00:00
|
|
|
|
|
|
|
// before any processing, try to transform the element
|
|
|
|
if (
|
|
|
|
isset($definition->info_tag_transform[$token->name])
|
|
|
|
) {
|
2007-06-26 02:49:21 +00:00
|
|
|
$original_name = $token->name;
|
2007-06-19 22:10:39 +00:00
|
|
|
// there is a transformation for this tag
|
|
|
|
// DEFINITION CALL
|
|
|
|
$token = $definition->
|
|
|
|
info_tag_transform[$token->name]->
|
|
|
|
transform($token, $config, $context);
|
2007-06-26 02:49:21 +00:00
|
|
|
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
|
2007-06-19 22:10:39 +00:00
|
|
|
}
|
|
|
|
|
2006-08-31 20:33:07 +00:00
|
|
|
if (isset($definition->info[$token->name])) {
|
2006-11-23 23:59:20 +00:00
|
|
|
|
2007-06-20 21:39:28 +00:00
|
|
|
// mostly everything's good, but
|
|
|
|
// we need to make sure required attributes are in order
|
|
|
|
if (
|
2007-10-02 01:19:46 +00:00
|
|
|
($token->type === 'start' || $token->type === 'empty') &&
|
2007-06-20 21:39:28 +00:00
|
|
|
$definition->info[$token->name]->required_attr &&
|
|
|
|
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
|
|
|
|
) {
|
2007-06-27 02:03:15 +00:00
|
|
|
$attr_validator->validateToken($token, $config, $context);
|
2007-06-20 21:39:28 +00:00
|
|
|
$ok = true;
|
|
|
|
foreach ($definition->info[$token->name]->required_attr as $name) {
|
|
|
|
if (!isset($token->attr[$name])) {
|
|
|
|
$ok = false;
|
|
|
|
break;
|
|
|
|
}
|
2006-11-23 23:59:20 +00:00
|
|
|
}
|
2007-06-26 02:49:21 +00:00
|
|
|
if (!$ok) {
|
2007-06-26 19:33:37 +00:00
|
|
|
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
|
2007-06-26 02:49:21 +00:00
|
|
|
continue;
|
|
|
|
}
|
2007-06-20 21:39:28 +00:00
|
|
|
$token->armor['ValidateAttributes'] = true;
|
2006-11-23 23:59:20 +00:00
|
|
|
}
|
|
|
|
|
2007-07-11 20:42:58 +00:00
|
|
|
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
|
2007-06-21 14:44:26 +00:00
|
|
|
$textify_comments = $token->name;
|
|
|
|
} elseif ($token->name === $textify_comments && $token->type == 'end') {
|
|
|
|
$textify_comments = false;
|
|
|
|
}
|
|
|
|
|
2006-08-15 23:58:18 +00:00
|
|
|
} elseif ($escape_invalid_tags) {
|
2007-06-26 02:49:21 +00:00
|
|
|
// invalid tag, generate HTML representation and insert in
|
2007-06-26 15:07:07 +00:00
|
|
|
if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
|
2006-07-24 01:50:02 +00:00
|
|
|
$token = new HTMLPurifier_Token_Text(
|
2006-10-01 20:47:07 +00:00
|
|
|
$generator->generateFromToken($token, $config, $context)
|
2006-07-24 01:50:02 +00:00
|
|
|
);
|
2006-08-15 23:58:18 +00:00
|
|
|
} else {
|
2007-06-16 19:31:45 +00:00
|
|
|
// check if we need to destroy all of the tag's children
|
|
|
|
// CAN BE GENERICIZED
|
2007-07-11 20:42:58 +00:00
|
|
|
if (isset($hidden_elements[$token->name])) {
|
2007-06-16 19:31:45 +00:00
|
|
|
if ($token->type == 'start') {
|
|
|
|
$remove_until = $token->name;
|
|
|
|
} elseif ($token->type == 'empty') {
|
|
|
|
// do nothing: we're still looking
|
|
|
|
} else {
|
|
|
|
$remove_until = false;
|
|
|
|
}
|
2007-07-11 20:42:58 +00:00
|
|
|
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
|
2007-06-26 02:49:21 +00:00
|
|
|
} else {
|
2007-06-26 15:07:07 +00:00
|
|
|
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
|
2007-06-16 19:31:45 +00:00
|
|
|
}
|
2006-08-15 23:58:18 +00:00
|
|
|
continue;
|
2006-07-24 01:50:02 +00:00
|
|
|
}
|
|
|
|
} elseif ($token->type == 'comment') {
|
2007-06-21 14:44:26 +00:00
|
|
|
// textify comments in script tags when they are allowed
|
|
|
|
if ($textify_comments !== false) {
|
|
|
|
$data = $token->data;
|
|
|
|
$token = new HTMLPurifier_Token_Text($data);
|
|
|
|
} else {
|
|
|
|
// strip comments
|
2007-06-26 19:33:37 +00:00
|
|
|
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
2007-06-21 14:44:26 +00:00
|
|
|
continue;
|
|
|
|
}
|
2006-07-24 01:50:02 +00:00
|
|
|
} elseif ($token->type == 'text') {
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$result[] = $token;
|
|
|
|
}
|
2007-06-26 02:49:21 +00:00
|
|
|
if ($remove_until && $e) {
|
|
|
|
// we removed tokens until the end, throw error
|
|
|
|
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
|
|
|
|
}
|
2007-06-25 01:56:00 +00:00
|
|
|
|
|
|
|
$context->destroy('CurrentToken');
|
|
|
|
|
2006-07-24 01:50:02 +00:00
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|