mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
Rewrite CSS url() and font-family output logic.
The new logic is as follows: * Given a URL to insert into url(), check that it is properly URL encoded (in particular, a doublequote and backslash never occurs within it) and then place it as url("http://example.com"). * Given a font name, if it is strictly alphanumeric, it is safe to omit quotes. Otherwise, wrap in double quotes and replace '"' with '\22 ' (note trailing space) and '\' with '\5C ' (ditto). We introduce expandCSSEscape() which is a hack for common parsing idioms in CSS; this means that CSS escapes are now recognized inside URLs as well as unquoted font names. Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
df3100b1b3
commit
d3abcb90e3
3
NEWS
3
NEWS
@ -10,6 +10,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
==========================
|
||||
|
||||
4.1.1, unknown release date
|
||||
- Rewrite CSS output logic for font-family and url(). Thanks Mario
|
||||
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
|
||||
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
|
||||
- Emit an error for CollectErrors if a body is extracted
|
||||
- Fix bug where in background-position for center keyword handling.
|
||||
- Fix infinite loop when a wrapper element is inserted in a context
|
||||
|
@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
|
||||
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a possibly escaped CSS string and returns the "pure"
|
||||
* version of it.
|
||||
*/
|
||||
protected function expandCSSEscape($string) {
|
||||
// flexibly parse it
|
||||
$ret = '';
|
||||
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
||||
if ($string[$i] === '\\') {
|
||||
$i++;
|
||||
if ($i >= $c) {
|
||||
$ret .= '\\';
|
||||
break;
|
||||
}
|
||||
if (ctype_xdigit($string[$i])) {
|
||||
$code = $string[$i];
|
||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||
if (!ctype_xdigit($string[$i])) break;
|
||||
$code .= $string[$i];
|
||||
}
|
||||
// We have to be extremely careful when adding
|
||||
// new characters, to make sure we're not breaking
|
||||
// the encoding.
|
||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||
$ret .= $char;
|
||||
if ($i < $c && trim($string[$i]) !== '') $i--;
|
||||
continue;
|
||||
}
|
||||
if ($string[$i] === "\n") continue;
|
||||
}
|
||||
$ret .= $string[$i];
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
$quote = $font[0];
|
||||
if ($font[$length - 1] !== $quote) continue;
|
||||
$font = substr($font, 1, $length - 2);
|
||||
|
||||
$new_font = '';
|
||||
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
|
||||
if ($font[$i] === '\\') {
|
||||
$i++;
|
||||
if ($i >= $c) {
|
||||
$new_font .= '\\';
|
||||
break;
|
||||
}
|
||||
if (ctype_xdigit($font[$i])) {
|
||||
$code = $font[$i];
|
||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||
if (!ctype_xdigit($font[$i])) break;
|
||||
$code .= $font[$i];
|
||||
}
|
||||
// We have to be extremely careful when adding
|
||||
// new characters, to make sure we're not breaking
|
||||
// the encoding.
|
||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||
$new_font .= $char;
|
||||
if ($i < $c && trim($font[$i]) !== '') $i--;
|
||||
continue;
|
||||
}
|
||||
if ($font[$i] === "\n") continue;
|
||||
}
|
||||
$new_font .= $font[$i];
|
||||
}
|
||||
|
||||
$font = $new_font;
|
||||
}
|
||||
$font = $this->expandCSSEscape($font);
|
||||
|
||||
// $font is a pure representation of the font name
|
||||
|
||||
if (ctype_alnum($font) && $font !== '') {
|
||||
@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
continue;
|
||||
}
|
||||
|
||||
// complicated font, requires quoting
|
||||
// bugger out on whitespace. form feed (0C) really
|
||||
// shouldn't show up regardless
|
||||
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
|
||||
|
||||
// armor single quotes and new lines
|
||||
$font = str_replace("\\", "\\\\", $font);
|
||||
$font = str_replace("'", "\\'", $font);
|
||||
$final .= "'$font', ";
|
||||
// These ugly transforms don't pose a security
|
||||
// risk (as \\ and \" might). We could try to be clever and
|
||||
// use single-quote wrapping when there is a double quote
|
||||
// present, but I have choosen not to implement that.
|
||||
// (warning: this code relies on the selection of quotation
|
||||
// mark below)
|
||||
$font = str_replace('\\', '\\5C ', $font);
|
||||
$font = str_replace('"', '\\22 ', $font);
|
||||
|
||||
// complicated font, requires quoting
|
||||
$final .= "\"$font\", "; // note that this will later get turned into "
|
||||
}
|
||||
$final = rtrim($final, ', ');
|
||||
if ($final === '') return false;
|
||||
|
@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
$uri = substr($uri, 1, $new_length - 1);
|
||||
}
|
||||
|
||||
$keys = array( '(', ')', ',', ' ', '"', "'");
|
||||
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
|
||||
$uri = str_replace($values, $keys, $uri);
|
||||
$uri = $this->expandCSSEscape($uri);
|
||||
|
||||
$result = parent::validate($uri, $config, $context);
|
||||
|
||||
if ($result === false) return false;
|
||||
|
||||
// escape necessary characters according to CSS spec
|
||||
// except for the comma, none of these should appear in the
|
||||
// URI at all
|
||||
$result = str_replace($keys, $values, $result);
|
||||
// extra sanity check; should have been done by URI
|
||||
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
|
||||
|
||||
return "url('$result')";
|
||||
return "url(\"$result\")";
|
||||
|
||||
}
|
||||
|
||||
|
@ -8,12 +8,12 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||
|
||||
$valid = '#333 url(\'chess.png\') repeat fixed 50% top';
|
||||
$valid = '#333 url("chess.png") repeat fixed 50% top';
|
||||
$this->assertDef($valid);
|
||||
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid);
|
||||
$this->assertDef('url(\'chess.png\') #333 50% top repeat fixed', $valid);
|
||||
$this->assertDef(
|
||||
'rgb(34, 56, 33) url(chess.png) repeat fixed top',
|
||||
'rgb(34,56,33) url(\'chess.png\') repeat fixed top'
|
||||
'rgb(34,56,33) url("chess.png") repeat fixed top'
|
||||
);
|
||||
|
||||
}
|
||||
|
@ -8,29 +8,29 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
|
||||
$this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||
|
||||
$this->assertDef('Gill, Helvetica, sans-serif');
|
||||
$this->assertDef('\'Times New Roman\', serif');
|
||||
$this->assertDef('"Times New Roman"', "'Times New Roman'");
|
||||
$this->assertDef('"Times New Roman", serif');
|
||||
$this->assertDef('\'Times New Roman\'', '"Times New Roman"');
|
||||
$this->assertDef('01234');
|
||||
$this->assertDef(',', false);
|
||||
$this->assertDef('Times New Roman, serif', '\'Times New Roman\', serif');
|
||||
$this->assertDef($d = "'John\\'s Font'");
|
||||
$this->assertDef('Times New Roman, serif', '"Times New Roman", serif');
|
||||
$this->assertDef($d = '"John\'s Font"');
|
||||
$this->assertDef("John's Font", $d);
|
||||
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
|
||||
$this->assertDef($d = "\"\xE5\xAE\x8B\xE4\xBD\x93\"");
|
||||
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
|
||||
$this->assertDef("'\\','f'", "'\\\\', f");
|
||||
$this->assertDef("'\\01'", "''");
|
||||
$this->assertDef("'\\20'", "' '");
|
||||
$this->assertDef("\\0020", "'\\\\0020'");
|
||||
$this->assertDef("'\\','f'", "\"\\5C \", f");
|
||||
$this->assertDef("'\\01'", "\"\"");
|
||||
$this->assertDef("'\\20'", "\" \"");
|
||||
$this->assertDef("\\0020", "\" \"");
|
||||
$this->assertDef("'\\000045'", "E");
|
||||
$this->assertDef("','", false);
|
||||
$this->assertDef("',' foobar','", "' foobar'");
|
||||
$this->assertDef("'\\27'", "'\''");
|
||||
$this->assertDef('"\\22"', "'\"'");
|
||||
$this->assertDef('"\\""', "'\"'");
|
||||
$this->assertDef('"\'"', "'\\''");
|
||||
$this->assertDef("',' foobar','", "\" foobar\"");
|
||||
$this->assertDef("'\\27'", "\"'\"");
|
||||
$this->assertDef('"\\22"', "\"\\22 \"");
|
||||
$this->assertDef('"\\""', "\"\\22 \"");
|
||||
$this->assertDef('"\'"', "\"'\"");
|
||||
$this->assertDef("'\\000045a'", "Ea");
|
||||
$this->assertDef("'\\00045 a'", "Ea");
|
||||
$this->assertDef("'\\00045 a'", "'E a'");
|
||||
$this->assertDef("'\\00045 a'", "\"E a\"");
|
||||
$this->assertDef("'\\\nf'", "f");
|
||||
}
|
||||
|
||||
|
@ -11,10 +11,10 @@ class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness
|
||||
// hodgepodge of usage cases from W3C spec, but " -> '
|
||||
$this->assertDef('12px/14px sans-serif');
|
||||
$this->assertDef('80% sans-serif');
|
||||
$this->assertDef('x-large/110% \'New Century Schoolbook\', serif');
|
||||
$this->assertDef('x-large/110% "New Century Schoolbook", serif');
|
||||
$this->assertDef('bold italic large Palatino, serif');
|
||||
$this->assertDef('normal small-caps 120%/120% fantasy');
|
||||
$this->assertDef('300 italic 1.3em/1.7em \'FB Armada\', sans-serif');
|
||||
$this->assertDef('300 italic 1.3em/1.7em "FB Armada", sans-serif');
|
||||
$this->assertDef('600 9px Charcoal');
|
||||
$this->assertDef('600 9px/ 12px Charcoal', '600 9px/12px Charcoal');
|
||||
|
||||
|
@ -13,14 +13,14 @@ class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('circle outside');
|
||||
$this->assertDef('inside');
|
||||
$this->assertDef('none');
|
||||
$this->assertDef('url(\'foo.gif\')');
|
||||
$this->assertDef('circle url(\'foo.gif\') inside');
|
||||
$this->assertDef('url("foo.gif")');
|
||||
$this->assertDef('circle url("foo.gif") inside');
|
||||
|
||||
// invalid values
|
||||
$this->assertDef('outside inside', 'outside');
|
||||
|
||||
// ordering
|
||||
$this->assertDef('url(foo.gif) none', 'none url(\'foo.gif\')');
|
||||
$this->assertDef('url(foo.gif) none', 'none url("foo.gif")');
|
||||
$this->assertDef('circle lower-alpha', 'circle');
|
||||
// the spec is ambiguous about what happens in these
|
||||
// cases, so we're going off the W3C CSS validator
|
||||
|
@ -12,20 +12,16 @@ class HTMLPurifier_AttrDef_CSS_URITest extends HTMLPurifier_AttrDefHarness
|
||||
// we could be nice but we won't be
|
||||
$this->assertDef('http://www.example.com/', false);
|
||||
|
||||
// no quotes are used, since that's the most widely supported
|
||||
// syntax
|
||||
$this->assertDef('url(', false);
|
||||
$this->assertDef('url(\'\')', true);
|
||||
$result = "url('http://www.example.com/')";
|
||||
$this->assertDef('url("")', true);
|
||||
$result = 'url("http://www.example.com/")';
|
||||
$this->assertDef('url(http://www.example.com/)', $result);
|
||||
$this->assertDef('url("http://www.example.com/")', $result);
|
||||
$this->assertDef("url('http://www.example.com/')", $result);
|
||||
$this->assertDef(
|
||||
' url( "http://www.example.com/" ) ', $result);
|
||||
|
||||
// escaping
|
||||
$this->assertDef("url(http://www.example.com/foo,bar\))",
|
||||
"url('http://www.example.com/foo\,bar\)')");
|
||||
'url("http://www.example.com/foo,bar)")');
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('text-transform:capitalize;');
|
||||
$this->assertDef('background-color:rgb(0,0,255);');
|
||||
$this->assertDef('background-color:transparent;');
|
||||
$this->assertDef('background:#333 url(\'chess.png\') repeat fixed 50% top;');
|
||||
$this->assertDef('background:#333 url("chess.png") repeat fixed 50% top;');
|
||||
$this->assertDef('color:#F00;');
|
||||
$this->assertDef('border-top-color:#F00;');
|
||||
$this->assertDef('border-color:#F00 #FF0;');
|
||||
@ -62,7 +62,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('width:-50px;', false);
|
||||
$this->assertDef('text-decoration:underline;');
|
||||
$this->assertDef('font-family:sans-serif;');
|
||||
$this->assertDef('font-family:Gill, \'Times New Roman\', sans-serif;');
|
||||
$this->assertDef('font-family:Gill, "Times New Roman", sans-serif;');
|
||||
$this->assertDef('font:12px serif;');
|
||||
$this->assertDef('border:1px solid #000;');
|
||||
$this->assertDef('border-bottom:2em double #FF00FA;');
|
||||
@ -73,9 +73,9 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('vertical-align:12px;');
|
||||
$this->assertDef('vertical-align:50%;');
|
||||
$this->assertDef('table-layout:fixed;');
|
||||
$this->assertDef('list-style-image:url(\'nice.jpg\');');
|
||||
$this->assertDef('list-style:disc url(\'nice.jpg\') inside;');
|
||||
$this->assertDef('background-image:url(\'foo.jpg\');');
|
||||
$this->assertDef('list-style-image:url("nice.jpg");');
|
||||
$this->assertDef('list-style:disc url("nice.jpg") inside;');
|
||||
$this->assertDef('background-image:url("foo.jpg");');
|
||||
$this->assertDef('background-image:none;');
|
||||
$this->assertDef('background-repeat:repeat-y;');
|
||||
$this->assertDef('background-attachment:fixed;');
|
||||
|
@ -81,7 +81,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||
function test_cleanCSS_angledBrackets() {
|
||||
$this->assertCleanCSS(
|
||||
".class {\nfont-family:'</style>';\n}",
|
||||
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||
".class {\nfont-family:\"\\3C /style\\3E \";\n}"
|
||||
);
|
||||
}
|
||||
|
||||
@ -99,14 +99,14 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||
|
||||
function test_cleanCSS_escapeCodes() {
|
||||
$this->assertCleanCSS(
|
||||
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||
".class {\nfont-family:\"\\3C /style\\3E \";\n}"
|
||||
);
|
||||
}
|
||||
|
||||
function test_cleanCSS_noEscapeCodes() {
|
||||
$this->config->set('Filter.ExtractStyleBlocks.Escaping', false);
|
||||
$this->assertCleanCSS(
|
||||
".class {\nfont-family:'</style>';\n}"
|
||||
".class {\nfont-family:\"</style>\";\n}"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -7,5 +7,5 @@ URI.MungeResources = true
|
||||
<img src="http://example.com" style="background-image:url(http://example.com);" alt="example.com" />
|
||||
--EXPECT--
|
||||
<a href="/redirect?s=http%3A%2F%2Fexample.com&t=c15354f3953dfec262c55b1403067e0d045a3059&r=&n=a&m=href&p=">Link</a>
|
||||
<img src="/redirect?s=http%3A%2F%2Fexample.com&t=c15354f3953dfec262c55b1403067e0d045a3059&r=1&n=img&m=src&p=" style="background-image:url('/redirect?s=http%3A%2F%2Fexample.com&t=c15354f3953dfec262c55b1403067e0d045a3059&r=1&n=img&m=style&p=background-image');" alt="example.com" />
|
||||
<img src="/redirect?s=http%3A%2F%2Fexample.com&t=c15354f3953dfec262c55b1403067e0d045a3059&r=1&n=img&m=src&p=" style="background-image:url("/redirect?s=http%3A%2F%2Fexample.com&t=c15354f3953dfec262c55b1403067e0d045a3059&r=1&n=img&m=style&p=background-image");" alt="example.com" />
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@ -4,5 +4,5 @@ if (!function_exists('iconv')) return true;
|
||||
Core.Encoding = "Shift_JIS"
|
||||
Core.EscapeNonASCIICharacters = true
|
||||
--HTML--
|
||||
<b style="font-family:'¥';">111</b>
|
||||
<b style="font-family:"¥";">111</b>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@ -3,7 +3,7 @@ if (!function_exists('iconv')) return true;
|
||||
--INI--
|
||||
Core.Encoding = Shift_JIS
|
||||
--HTML--
|
||||
<b style="font-family:'¥';">111</b>
|
||||
<b style="font-family:"¥";">111</b>
|
||||
--EXPECT--
|
||||
<b style="font-family:'';">111</b>
|
||||
<b style="font-family:"";">111</b>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@ -1,5 +1,5 @@
|
||||
--HTML--
|
||||
<table background="logo.png"><tr><td>asdf</td></tr></table>
|
||||
--EXPECT--
|
||||
<table style="background-image:url('logo.png');"><tr><td>asdf</td></tr></table>
|
||||
<table style="background-image:url("logo.png");"><tr><td>asdf</td></tr></table>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
Loading…
Reference in New Issue
Block a user