diff --git a/NEWS b/NEWS index 9fcf645a..a9f437af 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Config object gives more friendly error messages when things go wrong ! Advanced API implemented: easy functions for creating elements (addElement) and attributes (addAttribute) on HTMLDefinition +! Add native support for required attributes - Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work! - DOMLex will not emit errors when a custom error handler that does not honor error_reporting is used @@ -63,6 +64,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . DirectLex can now track line-numbers . Preliminary error collector is in place, although no code actually reports errors yet +. Factor out most of ValidateAttributes to new AttrValidator class 1.6.1, released 2007-05-05 ! Support for more deprecated attributes via transformations: diff --git a/library/HTMLPurifier/AttrCollections.php b/library/HTMLPurifier/AttrCollections.php index da5445db..61bce40e 100644 --- a/library/HTMLPurifier/AttrCollections.php +++ b/library/HTMLPurifier/AttrCollections.php @@ -82,19 +82,47 @@ class HTMLPurifier_AttrCollections * @param $attr_types HTMLPurifier_AttrTypes instance */ function expandIdentifiers(&$attr, $attr_types) { + + // because foreach will process new elements we add, make sure we + // skip duplicates + $processed = array(); + foreach ($attr as $def_i => $def) { + // skip inclusions if ($def_i === 0) continue; - if (!is_string($def)) continue; + + if (isset($processed[$def_i])) continue; + + // determine whether or not attribute is required + if ($required = (strpos($def_i, '*') !== false)) { + // rename the definition + unset($attr[$def_i]); + $def_i = trim($def_i, '*'); + $attr[$def_i] = $def; + } + + $processed[$def_i] = true; + + // if we've already got a literal object, move on + if (is_object($def)) { + // preserve previous required + $attr[$def_i]->required = ($required || $attr[$def_i]->required); + continue; + } + if ($def === false) { unset($attr[$def_i]); continue; } + if ($t = $attr_types->get($def)) { $attr[$def_i] = $t; + $attr[$def_i]->required = $required; } else { unset($attr[$def_i]); } } + } } diff --git a/library/HTMLPurifier/AttrDef.php b/library/HTMLPurifier/AttrDef.php index 3efc8e69..d9d2d944 100644 --- a/library/HTMLPurifier/AttrDef.php +++ b/library/HTMLPurifier/AttrDef.php @@ -14,11 +14,17 @@ class HTMLPurifier_AttrDef { /** - * Tells us whether or not an HTML attribute is minimized. Only the - * boolean attribute vapourware would use this. + * Tells us whether or not an HTML attribute is minimized. Has no + * meaning in other contexts. */ var $minimized = false; + /** + * Tells us whether or not an HTML attribute is required. Has no + * meaning in other contexts + */ + var $required = false; + /** * Validates and cleans passed string according to a definition. * diff --git a/library/HTMLPurifier/AttrTransform/ImgRequired.php b/library/HTMLPurifier/AttrTransform/ImgRequired.php index 4ff356d8..159afd2f 100644 --- a/library/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/library/HTMLPurifier/AttrTransform/ImgRequired.php @@ -20,7 +20,10 @@ HTMLPurifier_ConfigSchema::define( ); /** - * Post-transform that ensures the required attrs of img (alt and src) are set + * Transform that supplies default values for the src and alt attributes + * in img tags, as well as prevents the img tag from being removed + * because of a missing alt tag. This needs to be registered as both + * a pre and post attribute transform. */ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform { @@ -29,6 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform $src = true; if (!isset($attr['src'])) { + if ($config->get('Core', 'RemoveInvalidImg')) return $attr; $attr['src'] = $config->get('Attr', 'DefaultInvalidImage'); $src = false; } diff --git a/library/HTMLPurifier/AttrValidator.php b/library/HTMLPurifier/AttrValidator.php new file mode 100644 index 00000000..d6a1a563 --- /dev/null +++ b/library/HTMLPurifier/AttrValidator.php @@ -0,0 +1,105 @@ +getHTMLDefinition(); + + // create alias to global definition array, see also $defs + // DEFINITION CALL + $d_defs = $definition->info_global_attr; + + // copy out attributes for easy manipulation + $attr = $token->attr; + + // do global transformations (pre) + // nothing currently utilizes this + foreach ($definition->info_attr_transform_pre as $transform) { + $attr = $transform->transform($attr, $config, $context); + } + + // do local transformations only applicable to this element (pre) + // ex.
to
+ foreach ($definition->info[$token->name]->attr_transform_pre
+ as $transform
+ ) {
+ $attr = $transform->transform($attr, $config, $context);
+ }
+
+ // create alias to this element's attribute definition array, see
+ // also $d_defs (global attribute definition array)
+ // DEFINITION CALL
+ $defs = $definition->info[$token->name]->attr;
+
+ // iterate through all the attribute keypairs
+ // Watch out for name collisions: $key has previously been used
+ foreach ($attr as $attr_key => $value) {
+
+ // call the definition
+ if ( isset($defs[$attr_key]) ) {
+ // there is a local definition defined
+ if ($defs[$attr_key] === false) {
+ // We've explicitly been told not to allow this element.
+ // This is usually when there's a global definition
+ // that must be overridden.
+ // Theoretically speaking, we could have a
+ // AttrDef_DenyAll, but this is faster!
+ $result = false;
+ } else {
+ // validate according to the element's definition
+ $result = $defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ }
+ } elseif ( isset($d_defs[$attr_key]) ) {
+ // there is a global definition defined, validate according
+ // to the global definition
+ $result = $d_defs[$attr_key]->validate(
+ $value, $config, $context
+ );
+ } else {
+ // system never heard of the attribute? DELETE!
+ $result = false;
+ }
+
+ // put the results into effect
+ if ($result === false || $result === null) {
+ // remove the attribute
+ unset($attr[$attr_key]);
+ } elseif (is_string($result)) {
+ // simple substitution
+ $attr[$attr_key] = $result;
+ }
+
+ // we'd also want slightly more complicated substitution
+ // involving an array as the return value,
+ // although we're not sure how colliding attributes would
+ // resolve (certain ones would be completely overriden,
+ // others would prepend themselves).
+ }
+
+ // post transforms
+
+ // ex. to
- foreach ($definition->info[$token->name]->attr_transform_pre
- as $transform
- ) {
- $attr = $transform->transform($attr, $config, $context);
- }
-
- // create alias to this element's attribute definition array, see
- // also $d_defs (global attribute definition array)
- // DEFINITION CALL
- $defs = $definition->info[$token->name]->attr;
-
- // iterate through all the attribute keypairs
- // Watch out for name collisions: $key has previously been used
- foreach ($attr as $attr_key => $value) {
-
- // call the definition
- if ( isset($defs[$attr_key]) ) {
- // there is a local definition defined
- if ($defs[$attr_key] === false) {
- // We've explicitly been told not to allow this element.
- // This is usually when there's a global definition
- // that must be overridden.
- // Theoretically speaking, we could have a
- // AttrDef_DenyAll, but this is faster!
- $result = false;
- } else {
- // validate according to the element's definition
- $result = $defs[$attr_key]->validate(
- $value, $config, $context
- );
- }
- } elseif ( isset($d_defs[$attr_key]) ) {
- // there is a global definition defined, validate according
- // to the global definition
- $result = $d_defs[$attr_key]->validate(
- $value, $config, $context
- );
- } else {
- // system never heard of the attribute? DELETE!
- $result = false;
- }
-
- // put the results into effect
- if ($result === false || $result === null) {
- // remove the attribute
- unset($attr[$attr_key]);
- } elseif (is_string($result)) {
- // simple substitution
- $attr[$attr_key] = $result;
- }
-
- // we'd also want slightly more complicated substitution
- // involving an array as the return value,
- // although we're not sure how colliding attributes would
- // resolve (certain ones would be completely overriden,
- // others would prepend themselves).
- }
-
- // post transforms
-
- // ex. img
'.
@@ -41,6 +43,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
$remove_script_contents = $config->get('Core', 'RemoveScriptContents');
+ $attr_validator = new HTMLPurifier_AttrValidator();
+
// removes tokens until it reaches a closing tag with its value
$remove_until = false;
@@ -65,24 +69,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
if (isset($definition->info[$token->name])) {
- // leave untouched, except for a few special cases:
- // hard-coded image special case, pre-emptively drop
- // if not available. Probably not abstract-able
- if ( $token->name == 'img' && $remove_invalid_img ) {
- if (!isset($token->attr['src'])) {
- continue;
+ // mostly everything's good, but
+ // we need to make sure required attributes are in order
+ if (
+ $definition->info[$token->name]->required_attr &&
+ ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
+ ) {
+ $token = $attr_validator->validateToken($token, $config, $context);
+ $ok = true;
+ foreach ($definition->info[$token->name]->required_attr as $name) {
+ if (!isset($token->attr[$name])) {
+ $ok = false;
+ break;
+ }
}
- if (!isset($definition->info['img']->attr['src'])) {
- continue;
- }
- $token->attr['src'] =
- $definition->
- info['img']->
- attr['src']->
- validate($token->attr['src'],
- $config, $context);
- if ($token->attr['src'] === false) continue;
+ if (!$ok) continue;
+ $token->armor['ValidateAttributes'] = true;
}
} elseif ($escape_invalid_tags) {
diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php
index 07744f80..1c9e09b3 100644
--- a/library/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -4,6 +4,8 @@ require_once 'HTMLPurifier/Strategy.php';
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/IDAccumulator.php';
+require_once 'HTMLPurifier/AttrValidator.php';
+
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.');
@@ -17,16 +19,13 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
function execute($tokens, $config, &$context) {
- $definition = $config->getHTMLDefinition();
-
// setup id_accumulator context
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
$context->register('IDAccumulator', $id_accumulator);
- // create alias to global definition array, see also $defs
- // DEFINITION CALL
- $d_defs = $definition->info_global_attr;
+ // setup validator
+ $validator = new HTMLPurifier_AttrValidator();
foreach ($tokens as $key => $token) {
@@ -34,91 +33,12 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// namely start and empty tags
if ($token->type !== 'start' && $token->type !== 'empty') continue;
- // copy out attributes for easy manipulation
- $attr = $token->attr;
+ // skip tokens that are armored
+ if (!empty($token->armor['ValidateAttributes'])) continue;
- // do global transformations (pre)
- // nothing currently utilizes this
- foreach ($definition->info_attr_transform_pre as $transform) {
- $attr = $transform->transform($attr, $config, $context);
- }
-
- // do local transformations only applicable to this element (pre)
- // ex.