mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 15:28:40 +00:00
Implement data URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
parent
9a9036c689
commit
97125ed18b
2
NEWS
2
NEWS
@ -12,6 +12,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
4.1.0, unknown release date
|
4.1.0, unknown release date
|
||||||
! Support proprietary height attribute on table element
|
! Support proprietary height attribute on table element
|
||||||
! Support YouTube slideshows that contain /cp/ in their URL.
|
! Support YouTube slideshows that contain /cp/ in their URL.
|
||||||
|
! Support for data: URI scheme; not enabled by default, add it using
|
||||||
|
%URI.AllowedSchemes
|
||||||
|
|
||||||
4.0.0, released 2009-07-07
|
4.0.0, released 2009-07-07
|
||||||
# APIs for ConfigSchema subsystem have substantially changed. See
|
# APIs for ConfigSchema subsystem have substantially changed. See
|
||||||
|
4
TODO
4
TODO
@ -13,12 +13,8 @@ afraid to cast your vote for the next feature to be implemented!
|
|||||||
|
|
||||||
Standing patches:
|
Standing patches:
|
||||||
|
|
||||||
- Incorporate data: support as implemented here:
|
|
||||||
http://htmlpurifier.org/phorum/read.php?3,3491,3548
|
|
||||||
- Incorporate download and resize support as implemented here:
|
- Incorporate download and resize support as implemented here:
|
||||||
http://htmlpurifier.org/phorum/read.php?3,2795,3628
|
http://htmlpurifier.org/phorum/read.php?3,2795,3628
|
||||||
- Incorporate remove <span> tags that don't do anything (no attributes):
|
|
||||||
http://htmlpurifier.org/phorum/read.php?5,2507
|
|
||||||
|
|
||||||
Things to do as soon as possible:
|
Things to do as soon as possible:
|
||||||
|
|
||||||
|
@ -199,6 +199,7 @@ require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
|||||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
require 'HTMLPurifier/URIFilter/Munge.php';
|
||||||
|
require 'HTMLPurifier/URIScheme/data.php';
|
||||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
require 'HTMLPurifier/URIScheme/ftp.php';
|
||||||
require 'HTMLPurifier/URIScheme/http.php';
|
require 'HTMLPurifier/URIScheme/http.php';
|
||||||
require 'HTMLPurifier/URIScheme/https.php';
|
require 'HTMLPurifier/URIScheme/https.php';
|
||||||
|
@ -193,6 +193,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
|
|||||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
||||||
|
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
||||||
|
@ -12,4 +12,6 @@ array (
|
|||||||
--DESCRIPTION--
|
--DESCRIPTION--
|
||||||
Whitelist that defines the schemes that a URI is allowed to have. This
|
Whitelist that defines the schemes that a URI is allowed to have. This
|
||||||
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
|
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
|
||||||
|
There is also support for the <code>data</code> URI scheme, but it is not
|
||||||
|
enabled by default.
|
||||||
--# vim: et sw=4 sts=4
|
--# vim: et sw=4 sts=4
|
||||||
|
93
library/HTMLPurifier/URIScheme/data.php
Normal file
93
library/HTMLPurifier/URIScheme/data.php
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements data: URI for base64 encoded images supported by GD.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
|
public $browsable = true;
|
||||||
|
public $allowed_types = array(
|
||||||
|
// you better write validation code for other types if you
|
||||||
|
// decide to allow them
|
||||||
|
'image/jpeg' => true,
|
||||||
|
'image/gif' => true,
|
||||||
|
'image/png' => true,
|
||||||
|
);
|
||||||
|
|
||||||
|
public function validate(&$uri, $config, $context) {
|
||||||
|
$result = explode(',', $uri->path, 2);
|
||||||
|
$is_base64 = false;
|
||||||
|
$charset = null;
|
||||||
|
$content_type = null;
|
||||||
|
if (count($result) == 2) {
|
||||||
|
list($metadata, $data) = $result;
|
||||||
|
// do some legwork on the metadata
|
||||||
|
$metas = explode(';', $metadata);
|
||||||
|
while(!empty($metas)) {
|
||||||
|
$cur = array_shift($metas);
|
||||||
|
if ($cur == 'base64') {
|
||||||
|
$is_base64 = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (substr($cur, 0, 8) == 'charset=') {
|
||||||
|
// doesn't match if there are arbitrary spaces, but
|
||||||
|
// whatever dude
|
||||||
|
if ($charset !== null) continue; // garbage
|
||||||
|
$charset = substr($cur, 8); // not used
|
||||||
|
} else {
|
||||||
|
if ($content_type !== null) continue; // garbage
|
||||||
|
$content_type = $cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$data = $result[0];
|
||||||
|
}
|
||||||
|
if ($content_type !== null && empty($this->allowed_types[$content_type])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ($charset !== null) {
|
||||||
|
// error; we don't allow plaintext stuff
|
||||||
|
$charset = null;
|
||||||
|
}
|
||||||
|
$data = rawurldecode($data);
|
||||||
|
if ($is_base64) {
|
||||||
|
$raw_data = base64_decode($data);
|
||||||
|
} else {
|
||||||
|
$raw_data = $data;
|
||||||
|
}
|
||||||
|
// XXX probably want to refactor this into a general mechanism
|
||||||
|
// for filtering arbitrary content types
|
||||||
|
$file = tempnam("/tmp", "");
|
||||||
|
file_put_contents($file, $raw_data);
|
||||||
|
if (function_exists('exif_imagetype')) {
|
||||||
|
$image_code = exif_imagetype($file);
|
||||||
|
} elseif (function_exists('getimagesize')) {
|
||||||
|
set_error_handler(array($this, 'muteErrorHandler'));
|
||||||
|
$info = getimagesize($file);
|
||||||
|
restore_error_handler();
|
||||||
|
if ($info == false) return false;
|
||||||
|
$image_code = $info[2];
|
||||||
|
} else {
|
||||||
|
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
|
||||||
|
}
|
||||||
|
$real_content_type = image_type_to_mime_type($image_code);
|
||||||
|
if ($real_content_type != $content_type) {
|
||||||
|
// we're nice guys; if the content type is something else we
|
||||||
|
// support, change it over
|
||||||
|
if (empty($this->allowed_types[$real_content_type])) return false;
|
||||||
|
$content_type = $real_content_type;
|
||||||
|
}
|
||||||
|
// ok, it's kosher, rewrite what we need
|
||||||
|
$uri->userinfo = null;
|
||||||
|
$uri->host = null;
|
||||||
|
$uri->port = null;
|
||||||
|
$uri->fragment = null;
|
||||||
|
$uri->query = null;
|
||||||
|
$uri->path = "$content_type;base64," . base64_encode($raw_data);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function muteErrorHandler($errno, $errstr) {}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
37
smoketests/dataScheme.php
Normal file
37
smoketests/dataScheme.php
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'common.php';
|
||||||
|
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||||
|
?><!DOCTYPE html
|
||||||
|
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<title>HTML Purifier data Scheme Smoketest</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>HTML Purifier data Scheme Smoketest</h1>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
$string = '<img src="data:image/png;base64,
|
||||||
|
iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAABGdBTUEAALGP
|
||||||
|
C/xhBQAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9YGARc5KB0XV+IA
|
||||||
|
AAAddEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIFRoZSBHSU1Q72QlbgAAAF1J
|
||||||
|
REFUGNO9zL0NglAAxPEfdLTs4BZM4DIO4C7OwQg2JoQ9LE1exdlYvBBeZ7jq
|
||||||
|
ch9//q1uH4TLzw4d6+ErXMMcXuHWxId3KOETnnXXV6MJpcq2MLaI97CER3N0
|
||||||
|
vr4MkhoXe0rZigAAAABJRU5ErkJggg==" alt="Red dot" />';
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier(array('URI.AllowedSchemes' => 'data'));
|
||||||
|
|
||||||
|
?>
|
||||||
|
<div><?php
|
||||||
|
echo $purifier->purify($string);
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
// vim: et sw=4 sts=4
|
@ -6,8 +6,21 @@
|
|||||||
class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
|
class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
|
private $pngBase64;
|
||||||
|
|
||||||
|
public function __construct() {
|
||||||
|
$this->pngBase64 =
|
||||||
|
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAABGdBTUEAALGP'.
|
||||||
|
'C/xhBQAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9YGARc5KB0XV+IA'.
|
||||||
|
'AAAddEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIFRoZSBHSU1Q72QlbgAAAF1J'.
|
||||||
|
'REFUGNO9zL0NglAAxPEfdLTs4BZM4DIO4C7OwQg2JoQ9LE1exdlYvBBeZ7jq'.
|
||||||
|
'ch9//q1uH4TLzw4d6+ErXMMcXuHWxId3KOETnnXXV6MJpcq2MLaI97CER3N0'.
|
||||||
|
'vr4MkhoXe0rZigAAAABJRU5ErkJggg==';
|
||||||
|
}
|
||||||
|
|
||||||
protected function assertValidation($uri, $expect_uri = true) {
|
protected function assertValidation($uri, $expect_uri = true) {
|
||||||
$this->prepareURI($uri, $expect_uri);
|
$this->prepareURI($uri, $expect_uri);
|
||||||
|
$this->config->set('URI.AllowedSchemes', array($uri->scheme));
|
||||||
// convenience hack: the scheme should be explicitly specified
|
// convenience hack: the scheme should be explicitly specified
|
||||||
$scheme = $uri->getSchemeObj($this->config, $this->context);
|
$scheme = $uri->getSchemeObj($this->config, $this->context);
|
||||||
$result = $scheme->validate($uri, $this->config, $this->context);
|
$result = $scheme->validate($uri, $this->config, $this->context);
|
||||||
@ -132,6 +145,26 @@ class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_data_png() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'data:image/png;base64,'.$this->pngBase64
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_data_malformed() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'data:image/png;base64,vr4MkhoXJRU5ErkJggg==',
|
||||||
|
false
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_data_implicit() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'data:base64,'.$this->pngBase64,
|
||||||
|
'data:image/png;base64,'.$this->pngBase64
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
// vim: et sw=4 sts=4
|
||||||
|
Loading…
Reference in New Issue
Block a user