diff --git a/NEWS b/NEWS
index a93723f9..8e46860e 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Partial support for 'border-radius' properties when %CSS.AllowProprietary is true.
The slash syntax, i.e., 'border-radius: 2em 1em 4em / 0.5em 3em' is not
yet supported.
+! %Attr.ID.HTML5 turns on HTML5-style ID handling.
- alt truncation could result in malformed UTF-8 sequence. Don't
truncate. Thanks Brandon Farber for reporting.
- Linkify regex is smarter, based off of Gruber's regex.
diff --git a/configdoc/usage.xml b/configdoc/usage.xml
index d59b6b1c..69f9b3a5 100644
--- a/configdoc/usage.xml
+++ b/configdoc/usage.xml
@@ -355,9 +355,14 @@
58
+
+
+ 75
+
+
- 89
+ 97
diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php
index 3d86efb4..4ba45610 100644
--- a/library/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/library/HTMLPurifier/AttrDef/HTML/ID.php
@@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
// we purposely avoid using regex, hopefully this is faster
- if (ctype_alpha($id)) {
- $result = true;
- } else {
- if (!ctype_alpha(@$id[0])) {
+ if ($config->get('Attr.ID.HTML5') === true) {
+ if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
return false;
}
- // primitive style of regexps, I suppose
- $trim = trim(
- $id,
- 'A..Za..z0..9:-._'
- );
- $result = ($trim === '');
+ } else {
+ if (ctype_alpha($id)) {
+ // OK
+ } else {
+ if (!ctype_alpha(@$id[0])) {
+ return false;
+ }
+ // primitive style of regexps, I suppose
+ $trim = trim(
+ $id,
+ 'A..Za..z0..9:-._'
+ );
+ if ($trim !== '') {
+ return false;
+ }
+ }
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
@@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false;
}
- if (!$this->selector && $result) {
+ if (!$this->selector) {
$id_accumulator->add($id);
}
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
- return $result ? $id : false;
+ return $id;
}
}
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
index 9ea0978d..0a7a406e 100644
Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt b/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
new file mode 100644
index 00000000..735d4b7a
--- /dev/null
+++ b/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
@@ -0,0 +1,10 @@
+Attr.ID.HTML5
+TYPE: bool/null
+DEFAULT: null
+VERSION: 4.8.0
+--DESCRIPTION--
+In HTML5, restrictions on the format of the id attribute have been significantly
+relaxed, such that any string is valid so long as it contains no spaces and
+is at least one character. In lieu of a general HTML5 compatibility flag,
+set this configuration directive to true to use the relaxed rules.
+--# vim: et sw=4 sts=4
diff --git a/tests/HTMLPurifier/AttrDef/HTML/IDTest.php b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php
index 31870d22..92f071a7 100644
--- a/tests/HTMLPurifier/AttrDef/HTML/IDTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php
@@ -105,6 +105,17 @@ class HTMLPurifier_AttrDef_HTML_IDTest extends HTMLPurifier_AttrDefHarness
}
+ public function testRelaxed()
+ {
+ $this->config->set('Attr.ID.HTML5', true);
+
+ $this->assertDef('123');
+ $this->assertDef('x[1]');
+ $this->assertDef('not ok', false);
+ $this->assertDef(' ', false);
+ $this->assertDef('', false);
+ }
+
}
// vim: et sw=4 sts=4