0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00

Relax allowed values of class for certain doctypes, see %Attr.ClassUseCDATA

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2009-05-26 01:07:40 -04:00
parent 10e2d32a79
commit 84abae08f5
8 changed files with 64 additions and 7 deletions

5
NEWS
View File

@ -18,8 +18,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
%FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
%FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
%FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
As usual, the old directive names will still work, but will through E_NOTICE
As usual, the old directive names will still work, but will throw E_NOTICE
errors.
# The allowed values for class have been relaxed to allow all of CDATA for
doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set
%Attr.ClassUseCDATA to false.
! More robust support for name="" and id=""
! HTMLPurifier_Config::inherit($config) allows you to inherit one
configuration, and have changes to that configuration be propagated

2
TODO
View File

@ -18,8 +18,6 @@ afraid to cast your vote for the next feature to be implemented!
http://htmlpurifier.org/phorum/read.php?3,3491,3548
- Fix ImgRequired to handle data correctly
- Think about allowing explicit order of operations hooks for transforms
- Allow more relaxed "class" definition than NMTOKENS for appropriate
doctypes
FUTURE VERSIONS
---------------

View File

@ -5,6 +5,15 @@
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
protected function split($string, $config, $context) {
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
protected function filter($tokens, $config, $context) {
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
@ -14,9 +23,9 @@ class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token])
) {
$ret[] = $token;
$ret[$token] = true;
}
}
return $ret;
return array_keys($ret);
}
}

View File

@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$tokens = $this->split($string);
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) return false;
return implode(' ', $tokens);
@ -23,7 +23,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
/**
* Splits a space separated list of tokens into its constituent parts.
*/
protected function split($string) {
protected function split($string, $config, $context) {
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation

View File

@ -0,0 +1,19 @@
Attr.ClassUseCDATA
TYPE: bool/null
DEFAULT: null
VERSION: 4.0.0
--DESCRIPTION--
If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
is forced; if false, the NMTOKENS definition is forced. To get behavior
of HTML Purifier prior to 4.0.0, set this directive to false.
Some rational behind the auto-detection:
in previous versions of HTML Purifier, it was assumed that the form of
class was NMTOKENS, as specified by the XHTML Modularization (representing
XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
specify class as CDATA. HTML 5 effectively defines it as CDATA, but
with the additional constraint that each name should be unique (this is not
explicitly outlined in previous specifications).
--# vim: et sw=4 sts=4

View File

@ -18,4 +18,31 @@ class HTMLPurifier_AttrDef_HTML_ClassTest extends HTMLPurifier_AttrDef_HTML_Nmto
$this->assertDef('bar', false);
$this->assertDef('foo bar', 'foo');
}
function testDefault() {
$this->assertDef('valid');
$this->assertDef('a0-_');
$this->assertDef('-valid');
$this->assertDef('_valid');
$this->assertDef('double valid');
$this->assertDef('0stillvalid');
$this->assertDef('-0');
// test conditional replacement
$this->assertDef('validassoc 0valid', 'validassoc 0valid');
// test whitespace leniency
$this->assertDef(" double\nvalid\r", 'double valid');
// test case sensitivity
$this->assertDef('VALID');
// test duplicate removal
$this->assertDef('valid valid', 'valid');
}
function testXHTML11Behavior() {
$this->config->set('HTML.Doctype', 'XHTML 1.1');
$this->assertDef('0invalid', false);
$this->assertDef('valid valid', 'valid');
}
}

View File

@ -32,6 +32,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
}
function testSelectivelyRemoveInvalidClasses() {
$this->config->set('HTML.Doctype', 'XHTML 1.1');
$this->assertResult(
'<div class="valid 0invalid">Keep valid.</div>',
'<div class="valid">Keep valid.</div>'