From bae5b0c02255972aa1a4bf73957f5056cbab2f6e Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 14 Feb 2007 02:54:41 +0000 Subject: [PATCH] Move out SetParent and TweakSubtractiveWhitelist. Move out some other configurations, disable ID references. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@743 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/AttrDef/ID.php | 39 +++--- library/HTMLPurifier/AttrTypes.php | 2 +- library/HTMLPurifier/HTMLDefinition.php | 126 +++--------------- library/HTMLPurifier/HTMLModule/SetParent.php | 31 +++++ .../HTMLModule/TweakSubtractiveWhitelist.php | 68 ++++++++++ tests/HTMLPurifier/AttrDef/IDTest.php | 4 +- 6 files changed, 146 insertions(+), 124 deletions(-) create mode 100644 library/HTMLPurifier/HTMLModule/SetParent.php create mode 100644 library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php diff --git a/library/HTMLPurifier/AttrDef/ID.php b/library/HTMLPurifier/AttrDef/ID.php index 63d30394..a4c54e4f 100644 --- a/library/HTMLPurifier/AttrDef/ID.php +++ b/library/HTMLPurifier/AttrDef/ID.php @@ -3,6 +3,22 @@ require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/IDAccumulator.php'; +HTMLPurifier_ConfigSchema::define( + 'Attr', 'EnableID', false, 'bool', + 'Allows the ID attribute in HTML. This is disabled by default '. + 'due to the fact that without proper configuration user input can '. + 'easily break the validation of a webpage by specifying an ID that is '. + 'already on the surrounding HTML. If you don\'t mind throwing caution to '. + 'the wind, enable this directive, but I strongly recommend you also '. + 'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '. + 'user supplied IDs (%Attr.IDPrefix). This directive has been available '. + 'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '. + 'versions.' +); +HTMLPurifier_ConfigSchema::defineAlias( + 'HTML', 'EnableAttrID', 'Attr', 'EnableID' +); + HTMLPurifier_ConfigSchema::define( 'Attr', 'IDPrefix', '', 'string', 'String to prefix to IDs. If you have no idea what IDs your pages '. @@ -39,22 +55,13 @@ HTMLPurifier_ConfigSchema::define( class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef { - /** - * Is the ID an actual ID, or a reference to one? - * @note IDAccumulator checking is disabled for references - * @bool - */ - var $ref = false; - - /** - * @param $ref bool indication if it's ID or IDREF - */ - function HTMLPurifier_AttrDef_ID($ref = false) { - $this->ref = $ref; - } + // ref functionality disabled, since we also have to verify + // whether or not the ID it refers to exists function validate($id, $config, &$context) { + if (!$config->get('Attr', 'EnableID')) return false; + $id = trim($id); // trim it first if ($id === '') return false; @@ -69,10 +76,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef '%Attr.IDPrefix is set', E_USER_WARNING); } - if (!$this->ref) { + //if (!$this->ref) { $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) return false; - } + //} // we purposely avoid using regex, hopefully this is faster @@ -87,7 +94,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef $result = ($trim === ''); } - if (!$this->ref && $result) $id_accumulator->add($id); + if (/*!$this->ref && */$result) $id_accumulator->add($id); // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php index c942c856..29be5288 100644 --- a/library/HTMLPurifier/AttrTypes.php +++ b/library/HTMLPurifier/AttrTypes.php @@ -38,4 +38,4 @@ class HTMLPurifier_AttrTypes } } -?> \ No newline at end of file +?> diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 0b66fc2f..3e71dab7 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -38,22 +38,14 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php'; +// config modules +require_once 'HTMLPurifier/HTMLModule/SetParent.php'; + // tweak modules +require_once 'HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php'; // this definition and its modules MUST NOT define configuration directives // outside of the HTML or Attr namespaces -HTMLPurifier_ConfigSchema::define( - 'HTML', 'EnableAttrID', false, 'bool', - 'Allows the ID attribute in HTML. This is disabled by default '. - 'due to the fact that without proper configuration user input can '. - 'easily break the validation of a webpage by specifying an ID that is '. - 'already on the surrounding HTML. If you don\'t mind throwing caution to '. - 'the wind, enable this directive, but I strongly recommend you also '. - 'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '. - 'user supplied IDs (%Attr.IDPrefix). This directive has been available '. - 'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '. - 'versions.' -); HTMLPurifier_ConfigSchema::define( 'HTML', 'Strict', false, 'bool', @@ -72,39 +64,6 @@ HTMLPurifier_ConfigSchema::define( 'This directive has been available since 1.3.0.' ); -HTMLPurifier_ConfigSchema::define( - 'HTML', 'Parent', 'div', 'string', - 'String name of element that HTML fragment passed to library will be '. - 'inserted in. An interesting variation would be using span as the '. - 'parent element, meaning that only inline tags would be allowed. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedElements', null, 'lookup/null', - 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. - 'can overload it with your own list of tags to allow. Note that this '. - 'method is subtractive: it does its job by taking away from HTML Purifier '. - 'usual feature set, so you cannot add a tag that HTML Purifier never '. - 'supported in the first place (like embed, form or head). If you change this, you '. - 'probably also want to change %HTML.AllowedAttributes. '. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedAttributes', null, 'lookup/null', - 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. - 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. - '(style, id, class, dir, lang, xml:lang).'. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. For '. - 'example, %HTML.EnableAttrID will take precedence over *.id in this '. - 'directive. You must set that directive to true before you can use '. - 'IDs at all. This directive has been available since 1.3.0.' -); - /** * Definition of the purified HTML that describes allowed children, * attributes, and many other things. @@ -246,11 +205,19 @@ class HTMLPurifier_HTMLDefinition * @public */ var $order_keywords = array( - 'setup' => 10, - 'early' => 20, - 'main' => 30, - 'late' => 40, - 'cleanup' => 50, + 'begin' => 10, + 'setup' => 20, + + 'pre' => 30, + + 'early' => 40, + 'main' => 50, + 'late' => 60, + + 'post' => 70, + + 'cleanup' => 80, + 'end' => 90 ); /** @@ -277,23 +244,16 @@ class HTMLPurifier_HTMLDefinition // modules - // early - - // main $main_modules = array('Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute'); foreach ($main_modules as $module) $this->addModule($module, 'main'); - // late if (!$this->strict) $this->addModule('Legacy', 'late'); - // cleanup - $this->addModule('TransformToStrict', 'cleanup'); + $this->addModule('SetParent', 'post'); - // remove ID module (refactor to module) - if (!$config->get('HTML', 'EnableAttrID')) { - $this->attr_collections->info['Core']['id'] = false; - } + $this->addModule('TransformToStrict', 'cleanup'); + $this->addModule('TweakSubtractiveWhitelist', 'cleanup'); } @@ -341,8 +301,6 @@ class HTMLPurifier_HTMLDefinition $this->processModules(); $this->setupAttrTransform(); $this->setupBlockWrapper(); - $this->setupParent(); - $this->setupCompat(); unset($this->config); @@ -453,50 +411,6 @@ class HTMLPurifier_HTMLDefinition } } - /** - * Sets up parent of fragment based on config - */ - function setupParent() { - $parent = $this->config->get('HTML', 'Parent'); - if (isset($this->info[$parent])) { - $this->info_parent = $parent; - } else { - trigger_error('Cannot use unrecognized element as parent.', - E_USER_ERROR); - } - $this->info_parent_def = $this->info[$this->info_parent]; - } - - /** - * Sets up compat code from HTMLDefinition that has not been - * delegated to modules yet - */ - function setupCompat() { - - // setup allowed elements, SubtractiveWhitelist module - $allowed_elements = $this->config->get('HTML', 'AllowedElements'); - if (is_array($allowed_elements)) { - foreach ($this->info as $name => $d) { - if(!isset($allowed_elements[$name])) unset($this->info[$name]); - } - } - $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); - if (is_array($allowed_attributes)) { - foreach ($this->info_global_attr as $attr_key => $info) { - if (!isset($allowed_attributes["*.$attr_key"])) { - unset($this->info_global_attr[$attr_key]); - } - } - foreach ($this->info as $tag => $info) { - foreach ($info->attr as $attr => $attr_info) { - if (!isset($allowed_attributes["$tag.$attr"])) { - unset($this->info[$tag]->attr[$attr]); - } - } - } - } - - } } diff --git a/library/HTMLPurifier/HTMLModule/SetParent.php b/library/HTMLPurifier/HTMLModule/SetParent.php new file mode 100644 index 00000000..0ca1ddfb --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/SetParent.php @@ -0,0 +1,31 @@ +config->get('HTML', 'Parent'); + if (isset($definition->info[$parent])) { + $definition->info_parent = $parent; + } else { + trigger_error('Cannot use unrecognized element as parent.', + E_USER_ERROR); + } + $definition->info_parent_def = $definition->info[$definition->info_parent]; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php b/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php new file mode 100644 index 00000000..47f8e32b --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php @@ -0,0 +1,68 @@ +Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', + 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. + 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. + '(style, id, class, dir, lang, xml:lang).'. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. For '. + 'example, %HTML.EnableAttrID will take precedence over *.id in this '. + 'directive. You must set that directive to true before you can use '. + 'IDs at all. This directive has been available since 1.3.0.' +); + +/** + * Proprietary module that further narrows down allowed elements and + * attributes that were allowed to a user-defined whitelist. + * @warning This module cannot ADD elements or attributes, you must + * implement full definitions yourself! + */ + +class HTMLPurifier_HTMLModule_TweakSubtractiveWhitelist extends HTMLPurifier_HTMLModule +{ + + function postProcess(&$definition) { + + // setup allowed elements, SubtractiveWhitelist module + $allowed_elements = $definition->config->get('HTML', 'AllowedElements'); + if (is_array($allowed_elements)) { + foreach ($definition->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($definition->info[$name]); + } + } + $allowed_attributes = $definition->config->get('HTML', 'AllowedAttributes'); + if (is_array($allowed_attributes)) { + foreach ($definition->info_global_attr as $attr_key => $info) { + if (!isset($allowed_attributes["*.$attr_key"])) { + unset($definition->info_global_attr[$attr_key]); + } + } + foreach ($definition->info as $tag => $info) { + foreach ($info->attr as $attr => $attr_info) { + if (!isset($allowed_attributes["$tag.$attr"]) && + !isset($allowed_attributes["*.$attr"])) { + unset($definition->info[$tag]->attr[$attr]); + } + } + } + } + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/IDTest.php b/tests/HTMLPurifier/AttrDef/IDTest.php index 7fba690f..42be7e13 100644 --- a/tests/HTMLPurifier/AttrDef/IDTest.php +++ b/tests/HTMLPurifier/AttrDef/IDTest.php @@ -12,6 +12,7 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness $id_accumulator = new HTMLPurifier_IDAccumulator(); $this->context->register('IDAccumulator', $id_accumulator); + $this->config->set('Attr', 'EnableID', true); $this->def = new HTMLPurifier_AttrDef_ID(); } @@ -74,7 +75,8 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness } - function testIDReference() { + // reference functionality is disabled for now + function disabled_testIDReference() { $this->def = new HTMLPurifier_AttrDef_ID(true);