mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 15:28:40 +00:00
Relax allowed values of class for certain doctypes, see %Attr.ClassUseCDATA
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
parent
10e2d32a79
commit
84abae08f5
5
NEWS
5
NEWS
@ -18,8 +18,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
%FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
|
||||
%FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
|
||||
%FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
|
||||
As usual, the old directive names will still work, but will through E_NOTICE
|
||||
As usual, the old directive names will still work, but will throw E_NOTICE
|
||||
errors.
|
||||
# The allowed values for class have been relaxed to allow all of CDATA for
|
||||
doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set
|
||||
%Attr.ClassUseCDATA to false.
|
||||
! More robust support for name="" and id=""
|
||||
! HTMLPurifier_Config::inherit($config) allows you to inherit one
|
||||
configuration, and have changes to that configuration be propagated
|
||||
|
2
TODO
2
TODO
@ -18,8 +18,6 @@ afraid to cast your vote for the next feature to be implemented!
|
||||
http://htmlpurifier.org/phorum/read.php?3,3491,3548
|
||||
- Fix ImgRequired to handle data correctly
|
||||
- Think about allowing explicit order of operations hooks for transforms
|
||||
- Allow more relaxed "class" definition than NMTOKENS for appropriate
|
||||
doctypes
|
||||
|
||||
FUTURE VERSIONS
|
||||
---------------
|
||||
|
@ -5,6 +5,15 @@
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
|
||||
{
|
||||
protected function split($string, $config, $context) {
|
||||
// really, this twiddle should be lazy loaded
|
||||
$name = $config->getDefinition('HTML')->doctype->name;
|
||||
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
|
||||
return parent::split($string, $config, $context);
|
||||
} else {
|
||||
return preg_split('/\s+/', $string);
|
||||
}
|
||||
}
|
||||
protected function filter($tokens, $config, $context) {
|
||||
$allowed = $config->get('Attr.AllowedClasses');
|
||||
$forbidden = $config->get('Attr.ForbiddenClasses');
|
||||
@ -14,9 +23,9 @@ class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
|
||||
($allowed === null || isset($allowed[$token])) &&
|
||||
!isset($forbidden[$token])
|
||||
) {
|
||||
$ret[] = $token;
|
||||
$ret[$token] = true;
|
||||
}
|
||||
}
|
||||
return $ret;
|
||||
return array_keys($ret);
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
$tokens = $this->split($string);
|
||||
$tokens = $this->split($string, $config, $context);
|
||||
$tokens = $this->filter($tokens, $config, $context);
|
||||
if (empty($tokens)) return false;
|
||||
return implode(' ', $tokens);
|
||||
@ -23,7 +23,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Splits a space separated list of tokens into its constituent parts.
|
||||
*/
|
||||
protected function split($string) {
|
||||
protected function split($string, $config, $context) {
|
||||
// OPTIMIZABLE!
|
||||
// do the preg_match, capture all subpatterns for reformulation
|
||||
|
||||
|
Binary file not shown.
@ -0,0 +1,19 @@
|
||||
Attr.ClassUseCDATA
|
||||
TYPE: bool/null
|
||||
DEFAULT: null
|
||||
VERSION: 4.0.0
|
||||
--DESCRIPTION--
|
||||
If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
|
||||
XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
|
||||
it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
|
||||
is forced; if false, the NMTOKENS definition is forced. To get behavior
|
||||
of HTML Purifier prior to 4.0.0, set this directive to false.
|
||||
|
||||
Some rational behind the auto-detection:
|
||||
in previous versions of HTML Purifier, it was assumed that the form of
|
||||
class was NMTOKENS, as specified by the XHTML Modularization (representing
|
||||
XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
|
||||
specify class as CDATA. HTML 5 effectively defines it as CDATA, but
|
||||
with the additional constraint that each name should be unique (this is not
|
||||
explicitly outlined in previous specifications).
|
||||
--# vim: et sw=4 sts=4
|
@ -18,4 +18,31 @@ class HTMLPurifier_AttrDef_HTML_ClassTest extends HTMLPurifier_AttrDef_HTML_Nmto
|
||||
$this->assertDef('bar', false);
|
||||
$this->assertDef('foo bar', 'foo');
|
||||
}
|
||||
function testDefault() {
|
||||
$this->assertDef('valid');
|
||||
$this->assertDef('a0-_');
|
||||
$this->assertDef('-valid');
|
||||
$this->assertDef('_valid');
|
||||
$this->assertDef('double valid');
|
||||
|
||||
$this->assertDef('0stillvalid');
|
||||
$this->assertDef('-0');
|
||||
|
||||
// test conditional replacement
|
||||
$this->assertDef('validassoc 0valid', 'validassoc 0valid');
|
||||
|
||||
// test whitespace leniency
|
||||
$this->assertDef(" double\nvalid\r", 'double valid');
|
||||
|
||||
// test case sensitivity
|
||||
$this->assertDef('VALID');
|
||||
|
||||
// test duplicate removal
|
||||
$this->assertDef('valid valid', 'valid');
|
||||
}
|
||||
function testXHTML11Behavior() {
|
||||
$this->config->set('HTML.Doctype', 'XHTML 1.1');
|
||||
$this->assertDef('0invalid', false);
|
||||
$this->assertDef('valid valid', 'valid');
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
|
||||
}
|
||||
|
||||
function testSelectivelyRemoveInvalidClasses() {
|
||||
$this->config->set('HTML.Doctype', 'XHTML 1.1');
|
||||
$this->assertResult(
|
||||
'<div class="valid 0invalid">Keep valid.</div>',
|
||||
'<div class="valid">Keep valid.</div>'
|
||||
|
Loading…
Reference in New Issue
Block a user