0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-08 15:11:51 +00:00

Rewrite CSS url() and font-family output logic.

The new logic is as follows:

* Given a URL to insert into url(), check that it is properly URL
  encoded (in particular, a doublequote and backslash never occurs
  within it) and then place it as url("http://example.com").

* Given a font name, if it is strictly alphanumeric, it is safe to omit
  quotes. Otherwise, wrap in double quotes and replace '"' with '\22 '
  (note trailing space) and '\' with '\5C ' (ditto).

We introduce expandCSSEscape() which is a hack for common parsing
idioms in CSS; this means that CSS escapes are now recognized inside
URLs as well as unquoted font names.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2010-05-21 11:53:52 -04:00
parent df3100b1b3
commit d3abcb90e3
15 changed files with 99 additions and 86 deletions

3
NEWS
View File

@ -10,6 +10,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
========================== ==========================
4.1.1, unknown release date 4.1.1, unknown release date
- Rewrite CSS output logic for font-family and url(). Thanks Mario
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
- Emit an error for CollectErrors if a body is extracted - Emit an error for CollectErrors if a body is extracted
- Fix bug where in background-position for center keyword handling. - Fix bug where in background-position for center keyword handling.
- Fix infinite loop when a wrapper element is inserted in a context - Fix infinite loop when a wrapper element is inserted in a context

View File

@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
} }
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0]; $quote = $font[0];
if ($font[$length - 1] !== $quote) continue; if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2); $font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
} }
$font = $new_font; $font = $this->expandCSSEscape($font);
}
// $font is a pure representation of the font name // $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') { if (ctype_alnum($font) && $font !== '') {
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
continue; continue;
} }
// complicated font, requires quoting // bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines // These ugly transforms don't pose a security
$font = str_replace("\\", "\\\\", $font); // risk (as \\ and \" might). We could try to be clever and
$font = str_replace("'", "\\'", $font); // use single-quote wrapping when there is a double quote
$final .= "'$font', "; // present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
} }
$final = rtrim($final, ', '); $final = rtrim($final, ', ');
if ($final === '') return false; if ($final === '') return false;

View File

@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
$uri = substr($uri, 1, $new_length - 1); $uri = substr($uri, 1, $new_length - 1);
} }
$keys = array( '(', ')', ',', ' ', '"', "'"); $uri = $this->expandCSSEscape($uri);
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$result = parent::validate($uri, $config, $context); $result = parent::validate($uri, $config, $context);
if ($result === false) return false; if ($result === false) return false;
// escape necessary characters according to CSS spec // extra sanity check; should have been done by URI
// except for the comma, none of these should appear in the $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// URI at all
$result = str_replace($keys, $values, $result);
return "url('$result')"; return "url(\"$result\")";
} }

View File

@ -8,12 +8,12 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$this->def = new HTMLPurifier_AttrDef_CSS_Background($config); $this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
$valid = '#333 url(\'chess.png\') repeat fixed 50% top'; $valid = '#333 url("chess.png") repeat fixed 50% top';
$this->assertDef($valid); $this->assertDef($valid);
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid); $this->assertDef('url(\'chess.png\') #333 50% top repeat fixed', $valid);
$this->assertDef( $this->assertDef(
'rgb(34, 56, 33) url(chess.png) repeat fixed top', 'rgb(34, 56, 33) url(chess.png) repeat fixed top',
'rgb(34,56,33) url(\'chess.png\') repeat fixed top' 'rgb(34,56,33) url("chess.png") repeat fixed top'
); );
} }

View File

@ -8,29 +8,29 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
$this->def = new HTMLPurifier_AttrDef_CSS_FontFamily(); $this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
$this->assertDef('Gill, Helvetica, sans-serif'); $this->assertDef('Gill, Helvetica, sans-serif');
$this->assertDef('\'Times New Roman\', serif'); $this->assertDef('"Times New Roman", serif');
$this->assertDef('"Times New Roman"', "'Times New Roman'"); $this->assertDef('\'Times New Roman\'', '"Times New Roman"');
$this->assertDef('01234'); $this->assertDef('01234');
$this->assertDef(',', false); $this->assertDef(',', false);
$this->assertDef('Times New Roman, serif', '\'Times New Roman\', serif'); $this->assertDef('Times New Roman, serif', '"Times New Roman", serif');
$this->assertDef($d = "'John\\'s Font'"); $this->assertDef($d = '"John\'s Font"');
$this->assertDef("John's Font", $d); $this->assertDef("John's Font", $d);
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'"); $this->assertDef($d = "\"\xE5\xAE\x8B\xE4\xBD\x93\"");
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d); $this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
$this->assertDef("'\\','f'", "'\\\\', f"); $this->assertDef("'\\','f'", "\"\\5C \", f");
$this->assertDef("'\\01'", "''"); $this->assertDef("'\\01'", "\"\"");
$this->assertDef("'\\20'", "' '"); $this->assertDef("'\\20'", "\" \"");
$this->assertDef("\\0020", "'\\\\0020'"); $this->assertDef("\\0020", "\" \"");
$this->assertDef("'\\000045'", "E"); $this->assertDef("'\\000045'", "E");
$this->assertDef("','", false); $this->assertDef("','", false);
$this->assertDef("',' foobar','", "' foobar'"); $this->assertDef("',' foobar','", "\" foobar\"");
$this->assertDef("'\\27'", "'\''"); $this->assertDef("'\\27'", "\"'\"");
$this->assertDef('"\\22"', "'\"'"); $this->assertDef('"\\22"', "\"\\22 \"");
$this->assertDef('"\\""', "'\"'"); $this->assertDef('"\\""', "\"\\22 \"");
$this->assertDef('"\'"', "'\\''"); $this->assertDef('"\'"', "\"'\"");
$this->assertDef("'\\000045a'", "Ea"); $this->assertDef("'\\000045a'", "Ea");
$this->assertDef("'\\00045 a'", "Ea"); $this->assertDef("'\\00045 a'", "Ea");
$this->assertDef("'\\00045 a'", "'E a'"); $this->assertDef("'\\00045 a'", "\"E a\"");
$this->assertDef("'\\\nf'", "f"); $this->assertDef("'\\\nf'", "f");
} }

View File

@ -11,10 +11,10 @@ class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness
// hodgepodge of usage cases from W3C spec, but " -> ' // hodgepodge of usage cases from W3C spec, but " -> '
$this->assertDef('12px/14px sans-serif'); $this->assertDef('12px/14px sans-serif');
$this->assertDef('80% sans-serif'); $this->assertDef('80% sans-serif');
$this->assertDef('x-large/110% \'New Century Schoolbook\', serif'); $this->assertDef('x-large/110% "New Century Schoolbook", serif');
$this->assertDef('bold italic large Palatino, serif'); $this->assertDef('bold italic large Palatino, serif');
$this->assertDef('normal small-caps 120%/120% fantasy'); $this->assertDef('normal small-caps 120%/120% fantasy');
$this->assertDef('300 italic 1.3em/1.7em \'FB Armada\', sans-serif'); $this->assertDef('300 italic 1.3em/1.7em "FB Armada", sans-serif');
$this->assertDef('600 9px Charcoal'); $this->assertDef('600 9px Charcoal');
$this->assertDef('600 9px/ 12px Charcoal', '600 9px/12px Charcoal'); $this->assertDef('600 9px/ 12px Charcoal', '600 9px/12px Charcoal');

View File

@ -13,14 +13,14 @@ class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('circle outside'); $this->assertDef('circle outside');
$this->assertDef('inside'); $this->assertDef('inside');
$this->assertDef('none'); $this->assertDef('none');
$this->assertDef('url(\'foo.gif\')'); $this->assertDef('url("foo.gif")');
$this->assertDef('circle url(\'foo.gif\') inside'); $this->assertDef('circle url("foo.gif") inside');
// invalid values // invalid values
$this->assertDef('outside inside', 'outside'); $this->assertDef('outside inside', 'outside');
// ordering // ordering
$this->assertDef('url(foo.gif) none', 'none url(\'foo.gif\')'); $this->assertDef('url(foo.gif) none', 'none url("foo.gif")');
$this->assertDef('circle lower-alpha', 'circle'); $this->assertDef('circle lower-alpha', 'circle');
// the spec is ambiguous about what happens in these // the spec is ambiguous about what happens in these
// cases, so we're going off the W3C CSS validator // cases, so we're going off the W3C CSS validator

View File

@ -12,20 +12,16 @@ class HTMLPurifier_AttrDef_CSS_URITest extends HTMLPurifier_AttrDefHarness
// we could be nice but we won't be // we could be nice but we won't be
$this->assertDef('http://www.example.com/', false); $this->assertDef('http://www.example.com/', false);
// no quotes are used, since that's the most widely supported
// syntax
$this->assertDef('url(', false); $this->assertDef('url(', false);
$this->assertDef('url(\'\')', true); $this->assertDef('url("")', true);
$result = "url('http://www.example.com/')"; $result = 'url("http://www.example.com/")';
$this->assertDef('url(http://www.example.com/)', $result); $this->assertDef('url(http://www.example.com/)', $result);
$this->assertDef('url("http://www.example.com/")', $result); $this->assertDef('url("http://www.example.com/")', $result);
$this->assertDef("url('http://www.example.com/')", $result); $this->assertDef("url('http://www.example.com/')", $result);
$this->assertDef( $this->assertDef(
' url( "http://www.example.com/" ) ', $result); ' url( "http://www.example.com/" ) ', $result);
// escaping
$this->assertDef("url(http://www.example.com/foo,bar\))", $this->assertDef("url(http://www.example.com/foo,bar\))",
"url('http://www.example.com/foo\,bar\)')"); 'url("http://www.example.com/foo,bar)")');
} }
} }

View File

@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('text-transform:capitalize;'); $this->assertDef('text-transform:capitalize;');
$this->assertDef('background-color:rgb(0,0,255);'); $this->assertDef('background-color:rgb(0,0,255);');
$this->assertDef('background-color:transparent;'); $this->assertDef('background-color:transparent;');
$this->assertDef('background:#333 url(\'chess.png\') repeat fixed 50% top;'); $this->assertDef('background:#333 url("chess.png") repeat fixed 50% top;');
$this->assertDef('color:#F00;'); $this->assertDef('color:#F00;');
$this->assertDef('border-top-color:#F00;'); $this->assertDef('border-top-color:#F00;');
$this->assertDef('border-color:#F00 #FF0;'); $this->assertDef('border-color:#F00 #FF0;');
@ -62,7 +62,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('width:-50px;', false); $this->assertDef('width:-50px;', false);
$this->assertDef('text-decoration:underline;'); $this->assertDef('text-decoration:underline;');
$this->assertDef('font-family:sans-serif;'); $this->assertDef('font-family:sans-serif;');
$this->assertDef('font-family:Gill, \'Times New Roman\', sans-serif;'); $this->assertDef('font-family:Gill, "Times New Roman", sans-serif;');
$this->assertDef('font:12px serif;'); $this->assertDef('font:12px serif;');
$this->assertDef('border:1px solid #000;'); $this->assertDef('border:1px solid #000;');
$this->assertDef('border-bottom:2em double #FF00FA;'); $this->assertDef('border-bottom:2em double #FF00FA;');
@ -73,9 +73,9 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('vertical-align:12px;'); $this->assertDef('vertical-align:12px;');
$this->assertDef('vertical-align:50%;'); $this->assertDef('vertical-align:50%;');
$this->assertDef('table-layout:fixed;'); $this->assertDef('table-layout:fixed;');
$this->assertDef('list-style-image:url(\'nice.jpg\');'); $this->assertDef('list-style-image:url("nice.jpg");');
$this->assertDef('list-style:disc url(\'nice.jpg\') inside;'); $this->assertDef('list-style:disc url("nice.jpg") inside;');
$this->assertDef('background-image:url(\'foo.jpg\');'); $this->assertDef('background-image:url("foo.jpg");');
$this->assertDef('background-image:none;'); $this->assertDef('background-image:none;');
$this->assertDef('background-repeat:repeat-y;'); $this->assertDef('background-repeat:repeat-y;');
$this->assertDef('background-attachment:fixed;'); $this->assertDef('background-attachment:fixed;');

View File

@ -81,7 +81,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function test_cleanCSS_angledBrackets() { function test_cleanCSS_angledBrackets() {
$this->assertCleanCSS( $this->assertCleanCSS(
".class {\nfont-family:'</style>';\n}", ".class {\nfont-family:'</style>';\n}",
".class {\nfont-family:'\\3C /style\\3E ';\n}" ".class {\nfont-family:\"\\3C /style\\3E \";\n}"
); );
} }
@ -99,14 +99,14 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function test_cleanCSS_escapeCodes() { function test_cleanCSS_escapeCodes() {
$this->assertCleanCSS( $this->assertCleanCSS(
".class {\nfont-family:'\\3C /style\\3E ';\n}" ".class {\nfont-family:\"\\3C /style\\3E \";\n}"
); );
} }
function test_cleanCSS_noEscapeCodes() { function test_cleanCSS_noEscapeCodes() {
$this->config->set('Filter.ExtractStyleBlocks.Escaping', false); $this->config->set('Filter.ExtractStyleBlocks.Escaping', false);
$this->assertCleanCSS( $this->assertCleanCSS(
".class {\nfont-family:'</style>';\n}" ".class {\nfont-family:\"</style>\";\n}"
); );
} }

View File

@ -7,5 +7,5 @@ URI.MungeResources = true
<img src="http://example.com" style="background-image:url(http://example.com);" alt="example.com" /> <img src="http://example.com" style="background-image:url(http://example.com);" alt="example.com" />
--EXPECT-- --EXPECT--
<a href="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=&amp;n=a&amp;m=href&amp;p=">Link</a> <a href="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=&amp;n=a&amp;m=href&amp;p=">Link</a>
<img src="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=src&amp;p=" style="background-image:url('/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=style&amp;p=background-image');" alt="example.com" /> <img src="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=src&amp;p=" style="background-image:url(&quot;/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=style&amp;p=background-image&quot;);" alt="example.com" />
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -4,5 +4,5 @@ if (!function_exists('iconv')) return true;
Core.Encoding = "Shift_JIS" Core.Encoding = "Shift_JIS"
Core.EscapeNonASCIICharacters = true Core.EscapeNonASCIICharacters = true
--HTML-- --HTML--
<b style="font-family:'&#165;';">111</b> <b style="font-family:&quot;&#165;&quot;;">111</b>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -3,7 +3,7 @@ if (!function_exists('iconv')) return true;
--INI-- --INI--
Core.Encoding = Shift_JIS Core.Encoding = Shift_JIS
--HTML-- --HTML--
<b style="font-family:'&#165;';">111</b> <b style="font-family:&quot;&#165;&quot;;">111</b>
--EXPECT-- --EXPECT--
<b style="font-family:'';">111</b> <b style="font-family:&quot;&quot;;">111</b>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -1,5 +1,5 @@
--HTML-- --HTML--
<table background="logo.png"><tr><td>asdf</td></tr></table> <table background="logo.png"><tr><td>asdf</td></tr></table>
--EXPECT-- --EXPECT--
<table style="background-image:url('logo.png');"><tr><td>asdf</td></tr></table> <table style="background-image:url(&quot;logo.png&quot;);"><tr><td>asdf</td></tr></table>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4