0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[3.0.0] Add global scoping support for ExtractStyleBlocks; scoped="" attribute bumped off for some 'other' time.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1478 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2008-01-05 19:19:55 +00:00
parent a7fab00cdd
commit 8779b46fc4
6 changed files with 182 additions and 28 deletions

4
NEWS
View File

@ -21,7 +21,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting <style> from
documents and cleaning their contents up. Requires the CSSTidy library
<http://csstidy.sourceforge.net/>. You can access the blocks with the
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks'))
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks')).
The output CSS can also be "scoped" for a specific element, use:
%Filter.ExtractStyleBlocksScope
! Experimental support for some proprietary CSS attributes allowed:
opacity (and all of the browser-specific equivalents) and scrollbar colors.
Enable by setting %CSS.Proprietary to true.

13
TODO
View File

@ -11,12 +11,6 @@ If no interest is expressed for a feature that may required a considerable
amount of effort to implement, it may get endlessly delayed. Do not be
afraid to cast your vote for the next feature to be implemented!
3.0 release [Go PHP5!]
- Allow extracted CSS blocks to have a bounding selector prepended to all
of their declarations. There are two types: a global type and a HTML5
scoped type. This will allow for <style> while minimizing the risk of
disruption of other parts of site layout,
3.1 release [Error'ed]
# Error logging for filtering/cleanup procedures
- XSS-attempt detection
@ -81,7 +75,6 @@ Unknown release (on a scratch-an-itch basis)
- Abstract ChildDef_BlockQuote to work with all elements that only
allow blocks in them, required or optional
- Reorganize Unit Tests
- Reorganize configuration directives (Create more namespaces! Get messy!)
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
- Implement lenient <ruby> child validation
- Explain how to use HTML Purifier in non-PHP languages / create
@ -89,6 +82,12 @@ Unknown release (on a scratch-an-itch basis)
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
- Automatically add non-breaking spaces to empty table cells when
empty-cells:show is applied to have compatibility with Internet Explorer
- Distinguish between default settings and explicitly set settings, so
configurations can be merged
- Nested configuration namespaces
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
because regular CSS has no way of uniquely identifying nodes, so we'd
have to generate IDs
Requested

View File

@ -73,6 +73,7 @@ class HTMLPurifier_ConfigSchema {
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
$this->defineNamespace('Filter', 'Configuration for filters');
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');

View File

@ -2,32 +2,76 @@
require_once 'HTMLPurifier/Filter.php';
HTMLPurifier_ConfigSchema::define(
'Filter', 'ExtractStyleBlocksEscaping', true, 'bool', '
<p>
Whether or not to escape the dangerous characters &lt;, &gt; and &amp;
as \3C, \3E and \26, respectively. This is can be safely set to false
if the contents of StyleBlocks will be placed in an external stylesheet,
where there is no risk of it being interpreted as HTML. This directive
has been available since 3.0.0.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Filter', 'ExtractStyleBlocksScope', null, 'string/null', '
<p>
If you would like users to be able to define external stylesheets, but
only allow them to specify CSS declarations for a specific node and
prevent them from fiddling with other elements, use this directive.
It accepts any valid CSS selector, and will prepend this to any
CSS declaration extracted from the document. For example, if this
directive is set to <code>#user-content</code> and a user uses the
selector <code>a:hover</code>, the final selector will be
<code>#user-content a:hover</code>.
</p>
<p>
The comma shorthand may be used; consider the above example, with
<code>#user-content, #user-content2</code>, the final selector will
be <code>#user-content a:hover, #user-content2 a:hover</code>.
</p>
<p>
<strong>Warning:</strong> It is possible for users to bypass this measure
using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
Purifier, and I am working to get it fixed. Until then, HTML Purifier
performs a basic check to prevent this.
</p>
<p>
This directive has been available since 3.0.0.
</p>
'
);
/**
* This filter extracts <style> blocks from input HTML, cleans them up
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
* so they can be used elsewhere in the document.
* @note See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
* @todo Allow for selectors to be munged/checked
* @todo Expose CSSTidy configuration so that custom changes can be made
*
* @note
* See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for
* sample usage.
*
* @note
* This filter can also be used on stylesheets not included in the
* document--something purists would probably prefer. Just directly
* call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()
*/
class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
{
public $name = 'ExtractStyleBlocks';
private $_styleMatches = array();
private $_tidy, $_disableCharacterEscaping;
private $_tidy;
/**
* @param $tidy Instance of csstidy to use, false to turn off cleaning,
* and null to automatically instantiate
* @param $disable_character_escaping Whether or not to stop munging
* <, > and &. This can be set to true if the CSS will
* be placed in an external style and not inline.
* @param $tidy
* Instance of csstidy to use, false to turn off cleaning,
* and null to automatically instantiate
*/
public function __construct($tidy = null, $disable_character_escaping = false) {
public function __construct($tidy = null) {
if ($tidy === null) $tidy = new csstidy();
$this->_tidy = $tidy;
$this->_disableCharacterEscaping = $disable_character_escaping;
}
/**
@ -58,17 +102,45 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
/**
* Takes CSS (the stuff found in <style>) and cleans it.
* @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
* @param $css CSS styling to clean
* @param $config Instance of HTMLPurifier_Config
* @param $css CSS styling to clean
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Cleaned CSS
*/
public function cleanCSS($css, $config, $context) {
// prepare scope
$scope = $config->get('Filter', 'ExtractStyleBlocksScope');
if ($scope !== null) {
$scopes = array_map('trim', explode(',', $scope));
} else {
$scopes = array();
}
$this->_tidy->parse($css);
$css_definition = $config->getDefinition('CSS');
foreach ($this->_tidy->css as &$decls) {
foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector
foreach ($decls as &$style) {
$new_decls = array();
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') continue; // should not happen
if ($selector[0] === '+') {
while ($selector !== '' && $selector[0] === '+') {
// we need to perform this multiple times
// to prevent +++ from getting through
$selector = trim(substr($selector, 1));
}
if ($selector === '') continue;
}
if (!empty($scopes)) {
$new_selector = array(); // because multiple ones are possible
$selectors = array_map('trim', explode(',', $selector));
foreach ($scopes as $s1) {
foreach ($selectors as $s2) {
$new_selector[] = "$s1 $s2";
}
}
$selector = implode(', ', $new_selector); // now it's a string
}
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
@ -79,7 +151,9 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
if ($ret === false) unset($style[$name]);
else $style[$name] = $ret;
}
$new_decls[$selector] = $style;
}
$this->_tidy->css[$k] = $new_decls;
}
// remove stuff that shouldn't be used, could be reenabled
// after security risks are analyzed
@ -90,7 +164,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
$css = $printer->plain();
// we are going to escape any special characters <>& to ensure
// that no funny business occurs (i.e. </style> in a font-family prop).
if (!$this->_disableCharacterEscaping) {
if ($config->get('Filter', 'ExtractStyleBlocksEscaping')) {
$css = str_replace(
array('<', '>', '&'),
array('\3C ', '\3E ', '\26 '),

View File

@ -65,6 +65,8 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function assertCleanCSS($input, $expect = true) {
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks();
if ($expect === true) $expect = $input;
$this->normalize($input);
$this->normalize($expect);
$result = $filter->cleanCSS($input, $this->config, $this->context);
$this->assertIdentical($result, $expect);
}
@ -103,10 +105,79 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
}
function test_cleanCSS_noEscapeCodes() {
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks(null, true);
$input = ".class {\nfont-family:'</style>';\n}";
$result = $filter->cleanCSS($input, $this->config, $this->context);
$this->assertIdentical($result, $input);
$this->config->set('Filter', 'ExtractStyleBlocksEscaping', false);
$this->assertCleanCSS(
".class {\nfont-family:'</style>';\n}"
);
}
function test_cleanCSS_scope() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo');
$this->assertCleanCSS(
"p {\ntext-indent:1em;\n}",
"#foo p {\ntext-indent:1em;\n}"
);
}
function test_cleanCSS_scopeWithSelectorCommas() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo');
$this->assertCleanCSS(
"b, i {\ntext-decoration:underline;\n}",
"#foo b, #foo i {\ntext-decoration:underline;\n}"
);
}
function test_cleanCSS_scopeWithNaughtySelector() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo');
$this->assertCleanCSS(
" + p {\ntext-indent:1em;\n}",
"#foo p {\ntext-indent:1em;\n}"
);
}
function test_cleanCSS_scopeWithMultipleNaughtySelectors() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo');
$this->assertCleanCSS(
" ++ ++ p {\ntext-indent:1em;\n}",
"#foo p {\ntext-indent:1em;\n}"
);
}
function test_cleanCSS_scopeWithCommas() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo, .bar');
$this->assertCleanCSS(
"p {\ntext-indent:1em;\n}",
"#foo p, .bar p {\ntext-indent:1em;\n}"
);
}
function test_cleanCSS_scopeAllWithCommas() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', '#foo, .bar');
$this->assertCleanCSS(
"p, div {\ntext-indent:1em;\n}",
"#foo p, #foo div, .bar p, .bar div {\ntext-indent:1em;\n}"
);
}
function test_cleanCSS_scopeWithConflicts() {
$this->config->set('Filter', 'ExtractStyleBlocksScope', 'p');
$this->assertCleanCSS(
"div {
text-align:right;
}
p div {
text-align:left;
}",
"p div {
text-align:right;
}
p p div {
text-align:left;
}"
);
}
}

View File

@ -39,6 +39,13 @@ class HTMLPurifier_Harness extends UnitTestCase
return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
}
/**
* Normalizes a string to Unix (\n) endings
*/
function normalize(&$string) {
$string = str_replace(array("\r\n", "\r"), "\n", $string);
}
/**
* If $expect is false, ignore $result and check if status failed.
* Otherwise, check if $status if true and $result === $expect.