diff --git a/NEWS b/NEWS index a93723f9..8e46860e 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Partial support for 'border-radius' properties when %CSS.AllowProprietary is true. The slash syntax, i.e., 'border-radius: 2em 1em 4em / 0.5em 3em' is not yet supported. +! %Attr.ID.HTML5 turns on HTML5-style ID handling. - alt truncation could result in malformed UTF-8 sequence. Don't truncate. Thanks Brandon Farber for reporting. - Linkify regex is smarter, based off of Gruber's regex. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index d59b6b1c..69f9b3a5 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -355,9 +355,14 @@ 58 + + + 75 + + - 89 + 97 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php index 3d86efb4..4ba45610 100644 --- a/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef // we purposely avoid using regex, hopefully this is faster - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) { + if ($config->get('Attr.ID.HTML5') === true) { + if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } - // primitive style of regexps, I suppose - $trim = trim( - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); + } else { + if (ctype_alpha($id)) { + // OK + } else { + if (!ctype_alpha(@$id[0])) { + return false; + } + // primitive style of regexps, I suppose + $trim = trim( + $id, + 'A..Za..z0..9:-._' + ); + if ($trim !== '') { + return false; + } + } } $regexp = $config->get('Attr.IDBlacklistRegexp'); @@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (!$this->selector && $result) { + if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. - return $result ? $id : false; + return $id; } } diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser index 9ea0978d..0a7a406e 100644 Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ diff --git a/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt b/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt new file mode 100644 index 00000000..735d4b7a --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt @@ -0,0 +1,10 @@ +Attr.ID.HTML5 +TYPE: bool/null +DEFAULT: null +VERSION: 4.8.0 +--DESCRIPTION-- +In HTML5, restrictions on the format of the id attribute have been significantly +relaxed, such that any string is valid so long as it contains no spaces and +is at least one character. In lieu of a general HTML5 compatibility flag, +set this configuration directive to true to use the relaxed rules. +--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/AttrDef/HTML/IDTest.php b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php index 31870d22..92f071a7 100644 --- a/tests/HTMLPurifier/AttrDef/HTML/IDTest.php +++ b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php @@ -105,6 +105,17 @@ class HTMLPurifier_AttrDef_HTML_IDTest extends HTMLPurifier_AttrDefHarness } + public function testRelaxed() + { + $this->config->set('Attr.ID.HTML5', true); + + $this->assertDef('123'); + $this->assertDef('x[1]'); + $this->assertDef('not ok', false); + $this->assertDef(' ', false); + $this->assertDef('', false); + } + } // vim: et sw=4 sts=4