From 84aa2ca3903aa9aebcb7d38a23bcb6544b4afe97 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 26 Apr 2008 03:14:01 +0000 Subject: [PATCH] [3.1.0] Implement tag@attr for Allowed and Forbidden - Fix (or null) bug in configdoc git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1695 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 8 + TODO | 2 + configdoc/usage.xml | 4 +- .../HTMLPurifier/ConfigSchema/Builder/Xml.php | 6 +- .../schema/HTML.ForbiddenAttributes.txt | 14 +- library/HTMLPurifier/HTMLDefinition.php | 101 ++++++++---- tests/HTMLPurifier/HTMLDefinitionTest.php | 148 +++++++++++++++++- tests/HTMLPurifier/Harness.php | 16 +- tests/HTMLPurifierTest.php | 61 +++----- 9 files changed, 276 insertions(+), 84 deletions(-) diff --git a/NEWS b/NEWS index d20c92b0..68b3ad6e 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier + HTMLPurifier_HTMLModule->addBlankElement() + HTMLPurifier_LanguageFactory::instance() # Printer_ConfigForm's get*() functions were static-ified +# %HTML.ForbiddenAttributes requires attribute declarations to be in the + form of tag@attr, NOT tag.attr (which will throw an error and won't do + anything). This is for forwards compatibility with XML; you'd do best + to migrate an %HTML.AllowedAttributes directives to this syntax too. ! Allow index to be false for config from form creation ! Added HTMLPurifier::VERSION constant - InterchangeBuilder now alphabetizes its lists @@ -33,10 +37,14 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier - HTMLPurifier::instance() created for consistency, is equivalent to getInstance() - Fixed and revamped broken ConfigForm smoketest - Bug with bool/null fields in Printer_ConfigForm fixed +- Improved error messages for allowed and forbidden HTML elements and attributes +- Missing (or null) in configdoc documentation restored . Out-of-date documentation revised . UTF-8 encoding check optimization as suggested by Diego . HTMLPurifier_Error removed in favor of exceptions . More copy() function removed; should use clone instead +. More extensive unit tests for HTMLDefinition +. assertPurification moved to central harness 3.1.0rc1, released 2008-04-22 # Autoload support added. Internal require_once's removed in favor of an diff --git a/TODO b/TODO index 9ed7dd8e..9af74f95 100644 --- a/TODO +++ b/TODO @@ -52,6 +52,8 @@ FUTURE VERSIONS Also, enable disabling of directionality 5.0 release [To XML and Beyond] + - AllowedAttributes and ForbiddenAttributes step on the toes of XML by + using periods; this needs to be changed. - Extended HTML capabilities based on namespacing and tag transforms (COMPLEX) - Hooks for adding custom processors to custom namespaced tags and attributes, offer default implementation diff --git a/configdoc/usage.xml b/configdoc/usage.xml index c2eb4413..800180cc 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -131,12 +131,12 @@ - 303 + 326 - 304 + 327 diff --git a/library/HTMLPurifier/ConfigSchema/Builder/Xml.php b/library/HTMLPurifier/ConfigSchema/Builder/Xml.php index f5aae325..3c398d66 100644 --- a/library/HTMLPurifier/ConfigSchema/Builder/Xml.php +++ b/library/HTMLPurifier/ConfigSchema/Builder/Xml.php @@ -71,8 +71,10 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter $this->startElement('constraints'); if ($directive->version) $this->writeElement('version', $directive->version); - $this->writeElement('type', $directive->type); - if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes'); + $this->startElement('type'); + if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes'); + $this->text($directive->type); + $this->endElement(); // type if ($directive->allowed) { $this->startElement('allowed'); foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value); diff --git a/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt b/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt index 2a626e41..68c49cd6 100644 --- a/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt +++ b/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt @@ -4,7 +4,17 @@ VERSION: 3.1.0 DEFAULT: array() --DESCRIPTION--

- This directive complements %HTML.ForbiddenElements and is the inverse of - %HTML.AllowedAttributes. Please see the former for a discussion of why you + While this directive is similar to %HTML.AllowedAttributes, for + forwards-compatibility with XML, this attribute has a different syntax. Instead of + tag.attr, use tag@attr. To disallow href + attributes in a tags, set this directive to + a@href. You can also disallow an attribute globally with + attr or *@attr (either syntax is fine; the latter + is provided for consistency with %HTML.AllowedAttributes). +

+

+ Warning: This directive complements %HTML.ForbiddenElements, + accordingly, check + out that directive for a discussion of why you should think twice before using this directive.

diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 33768396..3310bbc2 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -233,10 +233,10 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition $support = "(for information on implementing this, see the ". "support forums) "; - // setup allowed elements + // setup allowed elements ----------------------------------------- $allowed_elements = $config->get('HTML', 'AllowedElements'); - $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { $allowed = $config->get('HTML', 'Allowed'); @@ -252,55 +252,80 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition } // emit errors foreach ($allowed_elements as $element => $d) { - // :TODO: Is this htmlspecialchars() call really necessary? - $element = htmlspecialchars($element); + $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! trigger_error("Element '$element' is not supported $support", E_USER_WARNING); } } + // setup allowed attributes --------------------------------------- + $allowed_attributes_mutable = $allowed_attributes; // by copy! if (is_array($allowed_attributes)) { - foreach ($this->info_global_attr as $attr_key => $info) { - if (!isset($allowed_attributes["*.$attr_key"])) { - unset($this->info_global_attr[$attr_key]); - } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) { - unset($allowed_attributes_mutable["*.$attr_key"]); + + // This actually doesn't do anything, since we went away from + // global attributes. It's possible that userland code uses + // it, but HTMLModuleManager doesn't! + foreach ($this->info_global_attr as $attr => $x) { + $keys = array($attr, "*@$attr", "*.$attr"); + $delete = true; + foreach ($keys as $key) { + if ($delete && isset($allowed_attributes[$key])) { + $delete = false; + } + if (isset($allowed_attributes_mutable[$key])) { + unset($allowed_attributes_mutable[$key]); + } } + if ($delete) unset($this->info_global_attr[$attr]); } + foreach ($this->info as $tag => $info) { - foreach ($info->attr as $attr => $attr_info) { - if (!isset($allowed_attributes["$tag.$attr"]) && - !isset($allowed_attributes["*.$attr"])) { - unset($this->info[$tag]->attr[$attr]); - } else { - if (isset($allowed_attributes_mutable["$tag.$attr"])) { - unset($allowed_attributes_mutable["$tag.$attr"]); - } elseif (isset($allowed_attributes_mutable["*.$attr"])) { - unset($allowed_attributes_mutable["*.$attr"]); + foreach ($info->attr as $attr => $x) { + $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); + $delete = true; + foreach ($keys as $key) { + if ($delete && isset($allowed_attributes[$key])) { + $delete = false; + } + if (isset($allowed_attributes_mutable[$key])) { + unset($allowed_attributes_mutable[$key]); } } + if ($delete) unset($this->info[$tag]->attr[$attr]); } } // emit errors foreach ($allowed_attributes_mutable as $elattr => $d) { - list($element, $attribute) = explode('.', $elattr); - // :TODO: Is this htmlspecialchars() call really necessary? - $element = htmlspecialchars($element); - $attribute = htmlspecialchars($attribute); - if ($element == '*') { - trigger_error("Global attribute '$attribute' is not ". - "supported in any elements $support", - E_USER_WARNING); - } else { - trigger_error("Attribute '$attribute' in element '$element' not supported $support", - E_USER_WARNING); + $bits = preg_split('/[.@]/', $elattr, 2); + $c = count($bits); + switch ($c) { + case 2: + if ($bits[0] !== '*') { + $element = htmlspecialchars($bits[0]); + $attribute = htmlspecialchars($bits[1]); + if (!isset($this->info[$element])) { + trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); + } else { + trigger_error("Attribute '$attribute' in element '$element' not supported $support", + E_USER_WARNING); + } + break; + } + // otherwise fall through + case 1: + $attribute = htmlspecialchars($bits[0]); + trigger_error("Global attribute '$attribute' is not ". + "supported in any elements $support", + E_USER_WARNING); + break; } } } - // setup forbidden elements - $forbidden_elements = $config->get('HTML', 'ForbiddenElements'); + // setup forbidden elements --------------------------------------- + + $forbidden_elements = $config->get('HTML', 'ForbiddenElements'); $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes'); foreach ($this->info as $tag => $info) { @@ -308,10 +333,18 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition unset($this->info[$tag]); continue; } - foreach ($info->attr as $name => $def) { - if (isset($forbidden_attributes["$tag.$name"])) { - unset($this->info[$tag]->attr[$name]); + foreach ($info->attr as $attr => $x) { + if ( + isset($forbidden_attributes["$tag@$attr"]) || + isset($forbidden_attributes["*@$attr"]) || + isset($forbidden_attributes[$attr]) + ) { + unset($this->info[$tag]->attr[$attr]); continue; + } // this segment might get removed eventually + elseif (isset($forbidden_attributes["$tag.$attr"])) { + // $tag.$attr are not user supplied, so no worries! + trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); } } } diff --git a/tests/HTMLPurifier/HTMLDefinitionTest.php b/tests/HTMLPurifier/HTMLDefinitionTest.php index 42010f58..5b4f5e77 100644 --- a/tests/HTMLPurifier/HTMLDefinitionTest.php +++ b/tests/HTMLPurifier/HTMLDefinitionTest.php @@ -59,7 +59,7 @@ a[href|title] $config1 = HTMLPurifier_Config::create(array( 'HTML.AllowedElements' => array('b', 'i', 'p', 'a'), - 'HTML.AllowedAttributes' => array('a.href', '*.id') + 'HTML.AllowedAttributes' => array('a@href', '*@id') )); $config2 = HTMLPurifier_Config::create(array( @@ -70,6 +70,150 @@ a[href|title] } + function assertPurification_AllowedElements_p() { + $this->assertPurification('

Jelly

', '

Jelly

'); + } + + function test_AllowedElements() { + $this->config->set('HTML', 'AllowedElements', 'p'); + $this->assertPurification_AllowedElements_p(); + } + + function test_AllowedElements_multiple() { + $this->config->set('HTML', 'AllowedElements', 'p,div'); + $this->assertPurification('

Jelly

', '

Jelly

'); + } + + function test_AllowedElements_invalidElement() { + $this->config->set('Cache', 'DefinitionImpl', null); // Necessary to ensure error is thrown + $this->config->set('HTML', 'AllowedElements', 'obviously_invalid,p'); + $this->expectError(new PatternExpectation("/Element 'obviously_invalid' is not supported/")); + $this->assertPurification_AllowedElements_p(); + } + + function test_AllowedElements_invalidElement_xssAttempt() { + $this->config->set('Cache', 'DefinitionImpl', null); + $this->config->set('HTML', 'AllowedElements', '') @@ -111,23 +92,24 @@ class HTMLPurifierTest extends HTMLPurifier_Harness } - function testEnableAttrID() { - - $this->purifier = new HTMLPurifier(); + function testAttrIDDisabledByDefault() { $this->assertPurification( 'foobar', 'foobar' ); - $this->purifier = new HTMLPurifier(array('Attr.EnableID' => true)); + } + + function testEnableAttrID() { + $this->config->set('Attr', 'EnableID', true); $this->assertPurification('foobar'); $this->assertPurification('Omigosh!'); - } function testScript() { - $this->purifier = new HTMLPurifier(array('HTML.Trusted' => true)); + $this->config->set('HTML', 'Trusted', true); + $ideal = ''; @@ -168,24 +150,21 @@ alert(""); } function testMakeAbsolute() { + $this->config->set('URI', 'Base', 'http://example.com/bar/baz.php'); + $this->config->set('URI', 'MakeAbsolute', true); $this->assertPurification( 'Foobar', - 'Foobar', - array( - 'URI.Base' => 'http://example.com/bar/baz.php', - 'URI.MakeAbsolute' => true - ) + 'Foobar' ); } function test_addFilter_deprecated() { - $purifier = new HTMLPurifier(); $this->expectError('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom'); generate_mock_once('HTMLPurifier_Filter'); - $purifier->addFilter($mock = new HTMLPurifier_FilterMock()); + $this->purifier->addFilter($mock = new HTMLPurifier_FilterMock()); $mock->expectOnce('preFilter'); $mock->expectOnce('postFilter'); - $purifier->purify('foo'); + $this->purifier->purify('foo'); } }