mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
Tighter CSS selector validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
9de0785448
commit
1c7fedff5a
7
NEWS
7
NEWS
@ -15,6 +15,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
to http. Reported by Neike Taika-Tessaro.
|
||||
# Core.EscapeNonASCIICharacters now always transforms entities to
|
||||
entities, even if target encoding is UTF-8.
|
||||
# Tighten up selector validation in ExtractStyleBlocks.
|
||||
Non-syntactically valid selectors are now rejected, along with
|
||||
some of the more obscure ones such as attribute selectors, the
|
||||
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
|
||||
ID and class selectors now work properly with the relevant
|
||||
configuration attributes. Also, mute errors when parsing CSS
|
||||
with CSS Tidy.
|
||||
! Added support for 'scope' attribute on tables.
|
||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||
! Properly handle sub-lists directly nested inside of lists in
|
||||
|
@ -323,23 +323,23 @@
|
||||
</directive>
|
||||
<directive id="Attr.EnableID">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>20</line>
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefix">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>26</line>
|
||||
<line>36</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefixLocal">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>28</line>
|
||||
<line>31</line>
|
||||
<line>38</line>
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklistRegexp">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>54</line>
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.">
|
||||
@ -411,17 +411,17 @@
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>41</line>
|
||||
<line>53</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Scope">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>65</line>
|
||||
<line>77</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Escaping">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>123</line>
|
||||
<line>275</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeIframe">
|
||||
@ -513,7 +513,7 @@
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
|
||||
<line>8</line>
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.MungeResources">
|
||||
|
@ -91,6 +91,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
|
@ -85,6 +85,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
|
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Validates based on {ident} CSS grammar production
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
|
||||
if (!preg_match($pattern, $string)) return false;
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@ -12,12 +12,22 @@
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
// selector is NOT a valid thing to use for IDREFs, because IDREFs
|
||||
// *must* target IDs that exist, whereas selector #ids do not.
|
||||
|
||||
/**
|
||||
* Determines whether or not we're validating an ID in a CSS
|
||||
* selector context.
|
||||
*/
|
||||
protected $selector;
|
||||
|
||||
public function __construct($selector = false) {
|
||||
$this->selector = $selector;
|
||||
}
|
||||
|
||||
public function validate($id, $config, $context) {
|
||||
|
||||
if (!$config->get('Attr.EnableID')) return false;
|
||||
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
//if (!$this->ref) {
|
||||
if (!$this->selector) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
if (!$this->selector && $result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
|
@ -21,10 +21,22 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
private $_styleMatches = array();
|
||||
private $_tidy;
|
||||
|
||||
private $_id_attrdef;
|
||||
private $_class_attrdef;
|
||||
private $_enum_attrdef;
|
||||
|
||||
public function __construct() {
|
||||
$this->_tidy = new csstidy();
|
||||
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
|
||||
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
|
||||
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Error-handler that mutes errors, alternative to shut-up operator.
|
||||
*/
|
||||
public static function muteErrorHandler() {}
|
||||
|
||||
/**
|
||||
* Save the contents of CSS blocks to style matches
|
||||
* @param $matches preg_replace style $matches array
|
||||
@ -77,27 +89,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
$css = substr($css, 0, -3);
|
||||
}
|
||||
$css = trim($css);
|
||||
set_error_handler(array('HTMLPurifier_Filter_ExtractStyleBlocks', 'muteErrorHandler'));
|
||||
$this->_tidy->parse($css);
|
||||
restore_error_handler();
|
||||
$css_definition = $config->getDefinition('CSS');
|
||||
$html_definition = $config->getDefinition('HTML');
|
||||
$new_css = array();
|
||||
foreach ($this->_tidy->css as $k => $decls) {
|
||||
// $decls are all CSS declarations inside an @ selector
|
||||
$new_decls = array();
|
||||
foreach ($decls as $selector => $style) {
|
||||
$selector = trim($selector);
|
||||
if ($selector === '') continue; // should not happen
|
||||
if ($selector[0] === '+') {
|
||||
if ($selector !== '' && $selector[0] === '+') continue;
|
||||
}
|
||||
if (!empty($scopes)) {
|
||||
$new_selector = array(); // because multiple ones are possible
|
||||
// Parse the selector
|
||||
// Here is the relevant part of the CSS grammar:
|
||||
//
|
||||
// ruleset
|
||||
// : selector [ ',' S* selector ]* '{' ...
|
||||
// selector
|
||||
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
|
||||
// combinator
|
||||
// : '+' S*
|
||||
// : '>' S*
|
||||
// simple_selector
|
||||
// : element_name [ HASH | class | attrib | pseudo ]*
|
||||
// | [ HASH | class | attrib | pseudo ]+
|
||||
// element_name
|
||||
// : IDENT | '*'
|
||||
// ;
|
||||
// class
|
||||
// : '.' IDENT
|
||||
// ;
|
||||
// attrib
|
||||
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
|
||||
// [ IDENT | STRING ] S* ]? ']'
|
||||
// ;
|
||||
// pseudo
|
||||
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
|
||||
// ;
|
||||
//
|
||||
// For reference, here are the relevant tokens:
|
||||
//
|
||||
// HASH #{name}
|
||||
// IDENT {ident}
|
||||
// INCLUDES ==
|
||||
// DASHMATCH |=
|
||||
// STRING {string}
|
||||
// FUNCTION {ident}\(
|
||||
//
|
||||
// And the lexical scanner tokens
|
||||
//
|
||||
// name {nmchar}+
|
||||
// nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||
// nonascii [\240-\377]
|
||||
// escape {unicode}|\\[^\r\n\f0-9a-f]
|
||||
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
|
||||
// ident -?{nmstart}{nmchar*}
|
||||
// nmstart [_a-z]|{nonascii}|{escape}
|
||||
// string {string1}|{string2}
|
||||
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
|
||||
//
|
||||
// We'll implement a subset (in order to reduce attack
|
||||
// surface); in particular:
|
||||
//
|
||||
// - No Unicode support
|
||||
// - No escapes support
|
||||
// - No string support (by proxy no attrib support)
|
||||
// - element_name is matched against allowed
|
||||
// elements (some people might find this
|
||||
// annoying...)
|
||||
// - Pseudo-elements one of :first-child, :link,
|
||||
// :visited, :active, :hover, :focus
|
||||
|
||||
// handle ruleset
|
||||
$selectors = array_map('trim', explode(',', $selector));
|
||||
foreach ($scopes as $s1) {
|
||||
foreach ($selectors as $s2) {
|
||||
$new_selector[] = "$s1 $s2";
|
||||
$new_selectors = array();
|
||||
foreach ($selectors as $sel) {
|
||||
// split on +, > and spaces
|
||||
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
// even indices are chunks, odd indices are
|
||||
// delimiters
|
||||
$nsel = null;
|
||||
$delim = null; // guaranteed to be non-null after
|
||||
// two loop iterations
|
||||
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
|
||||
$x = $basic_selectors[$i];
|
||||
if ($i % 2) {
|
||||
// delimiter
|
||||
if ($x === ' ') {
|
||||
$delim = ' ';
|
||||
} else {
|
||||
$delim = ' ' . $x . ' ';
|
||||
}
|
||||
} else {
|
||||
// simple selector
|
||||
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
$sdelim = null;
|
||||
$nx = null;
|
||||
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
|
||||
$y = $components[$j];
|
||||
if ($j === 0) {
|
||||
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
|
||||
$nx = $y;
|
||||
} else {
|
||||
// $nx stays null; this matters
|
||||
// if we don't manage to find
|
||||
// any valid selector content,
|
||||
// in which case we ignore the
|
||||
// outer $delim
|
||||
}
|
||||
} elseif ($j % 2) {
|
||||
// set delimiter
|
||||
$sdelim = $y;
|
||||
} else {
|
||||
$attrdef = null;
|
||||
if ($sdelim === '#') {
|
||||
$attrdef = $this->_id_attrdef;
|
||||
} elseif ($sdelim === '.') {
|
||||
$attrdef = $this->_class_attrdef;
|
||||
} elseif ($sdelim === ':') {
|
||||
$attrdef = $this->_enum_attrdef;
|
||||
} else {
|
||||
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
|
||||
}
|
||||
$r = $attrdef->validate($y, $config, $context);
|
||||
if ($r !== false) {
|
||||
if ($r !== true) {
|
||||
$y = $r;
|
||||
}
|
||||
if ($nx === null) {
|
||||
$nx = '';
|
||||
}
|
||||
$nx .= $sdelim . $y;
|
||||
}
|
||||
}
|
||||
$selector = implode(', ', $new_selector); // now it's a string
|
||||
}
|
||||
if ($nx !== null) {
|
||||
if ($nsel === null) {
|
||||
$nsel = $nx;
|
||||
} else {
|
||||
$nsel .= $delim . $nx;
|
||||
}
|
||||
} else {
|
||||
// delimiters to the left of invalid
|
||||
// basic selector ignored
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($nsel !== null) {
|
||||
if (!empty($scopes)) {
|
||||
foreach ($scopes as $s) {
|
||||
$new_selectors[] = "$s $nsel";
|
||||
}
|
||||
} else {
|
||||
$new_selectors[] = $nsel;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (empty($new_selectors)) continue;
|
||||
$selector = implode(', ', $new_selectors);
|
||||
foreach ($style as $name => $value) {
|
||||
if (!isset($css_definition->info[$name])) {
|
||||
unset($style[$name]);
|
||||
@ -110,10 +261,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
}
|
||||
$new_decls[$selector] = $style;
|
||||
}
|
||||
$this->_tidy->css[$k] = $new_decls;
|
||||
$new_css[$k] = $new_decls;
|
||||
}
|
||||
// remove stuff that shouldn't be used, could be reenabled
|
||||
// after security risks are analyzed
|
||||
$this->_tidy->css = $new_css;
|
||||
$this->_tidy->import = array();
|
||||
$this->_tidy->charset = null;
|
||||
$this->_tidy->namespace = null;
|
||||
|
@ -10,7 +10,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||
function test_tokenizeHTML_extractStyleBlocks() {
|
||||
$this->config->set('Filter.ExtractStyleBlocks', true);
|
||||
$purifier = new HTMLPurifier($this->config);
|
||||
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;}</style>Test<style>* {font-size:12pt;}</style>');
|
||||
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;} body.class[foo="attr"] {text-align:right;}</style>Test<style>* {font-size:12pt;}</style>');
|
||||
$this->assertIdentical($result, 'Test');
|
||||
$this->assertIdentical($purifier->context->get('StyleBlocks'),
|
||||
array(
|
||||
@ -153,7 +153,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||
$this->config->set('Filter.ExtractStyleBlocks.Scope', '#foo, .bar');
|
||||
$this->assertCleanCSS(
|
||||
"p, div {\ntext-indent:1em;\n}",
|
||||
"#foo p, #foo div, .bar p, .bar div {\ntext-indent:1em;\n}"
|
||||
"#foo p, .bar p, #foo div, .bar div {\ntext-indent:1em;\n}"
|
||||
);
|
||||
}
|
||||
|
||||
@ -191,6 +191,41 @@ text-align:right;
|
||||
);
|
||||
}
|
||||
|
||||
function test_atSelector() {
|
||||
$this->assertCleanCSS(
|
||||
"{
|
||||
b { text-align: center; }
|
||||
}",
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
function test_selectorValidation() {
|
||||
$this->assertCleanCSS(
|
||||
"&, & {
|
||||
text-align: center;
|
||||
}",
|
||||
""
|
||||
);
|
||||
$this->assertCleanCSS(
|
||||
"&, b {
|
||||
text-align:center;
|
||||
}",
|
||||
"b {
|
||||
text-align:center;
|
||||
}"
|
||||
);
|
||||
$this->assertCleanCSS(
|
||||
"& a #foo:hover.bar +b > i {
|
||||
text-align:center;
|
||||
}",
|
||||
"a #foo:hover.bar + b \\3E i {
|
||||
text-align:center;
|
||||
}"
|
||||
);
|
||||
$this->assertCleanCSS("doesnt-exist { text-align:center }", "");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
Loading…
Reference in New Issue
Block a user