2010-03-07 21:14:44 -05:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements data: URI for base64 encoded images supported by GD.
|
|
|
|
*/
|
2013-07-16 13:56:14 +02:00
|
|
|
class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @type bool
|
|
|
|
*/
|
2010-03-07 21:14:44 -05:00
|
|
|
public $browsable = true;
|
2013-07-16 13:56:14 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @type array
|
|
|
|
*/
|
2010-03-07 21:14:44 -05:00
|
|
|
public $allowed_types = array(
|
|
|
|
// you better write validation code for other types if you
|
|
|
|
// decide to allow them
|
|
|
|
'image/jpeg' => true,
|
|
|
|
'image/gif' => true,
|
|
|
|
'image/png' => true,
|
2013-07-16 13:56:14 +02:00
|
|
|
);
|
Dramatically rewrite null host URI handling.
Basically, browsers don't parse what should be valid URIs correctly, so
we have to go through some backbends to accomodate them. Specifically,
for browseable URIs, the following URIs have unintended behavior:
- ///example.com
- http:/example.com
- http:///example.com
Furthermore, if the path begins with //, modifying these URLs must
be done with care, as if you remove the host-name component, the
parse tree changes.
I've modified the engine to follow correct URI semantics as much
as possible while outputting browser compatible code, and invalidate
the URI in cases where we can't deal. There has been a refactoring
of URIScheme so that this important check is always performed,
introducing a new member variable allow_empty_host which is true
on data, file, mailto and news schemes.
This also fixes bypass bugs on URI.Munge.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-25 18:56:46 +00:00
|
|
|
// this is actually irrelevant since we only write out the path
|
|
|
|
// component
|
2013-07-16 13:56:14 +02:00
|
|
|
/**
|
|
|
|
* @type bool
|
|
|
|
*/
|
Dramatically rewrite null host URI handling.
Basically, browsers don't parse what should be valid URIs correctly, so
we have to go through some backbends to accomodate them. Specifically,
for browseable URIs, the following URIs have unintended behavior:
- ///example.com
- http:/example.com
- http:///example.com
Furthermore, if the path begins with //, modifying these URLs must
be done with care, as if you remove the host-name component, the
parse tree changes.
I've modified the engine to follow correct URI semantics as much
as possible while outputting browser compatible code, and invalidate
the URI in cases where we can't deal. There has been a refactoring
of URIScheme so that this important check is always performed,
introducing a new member variable allow_empty_host which is true
on data, file, mailto and news schemes.
This also fixes bypass bugs on URI.Munge.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-25 18:56:46 +00:00
|
|
|
public $may_omit_host = true;
|
2010-03-07 21:14:44 -05:00
|
|
|
|
2013-07-16 13:56:14 +02:00
|
|
|
/**
|
|
|
|
* @param HTMLPurifier_URI $uri
|
|
|
|
* @param HTMLPurifier_Config $config
|
|
|
|
* @param HTMLPurifier_Context $context
|
|
|
|
* @return bool
|
|
|
|
*/
|
|
|
|
public function doValidate(&$uri, $config, $context)
|
|
|
|
{
|
2010-03-07 21:14:44 -05:00
|
|
|
$result = explode(',', $uri->path, 2);
|
|
|
|
$is_base64 = false;
|
|
|
|
$charset = null;
|
|
|
|
$content_type = null;
|
|
|
|
if (count($result) == 2) {
|
|
|
|
list($metadata, $data) = $result;
|
|
|
|
// do some legwork on the metadata
|
|
|
|
$metas = explode(';', $metadata);
|
2013-07-16 13:56:14 +02:00
|
|
|
while (!empty($metas)) {
|
2010-03-07 21:14:44 -05:00
|
|
|
$cur = array_shift($metas);
|
|
|
|
if ($cur == 'base64') {
|
|
|
|
$is_base64 = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (substr($cur, 0, 8) == 'charset=') {
|
|
|
|
// doesn't match if there are arbitrary spaces, but
|
|
|
|
// whatever dude
|
2013-07-16 13:56:14 +02:00
|
|
|
if ($charset !== null) {
|
|
|
|
continue;
|
|
|
|
} // garbage
|
2010-03-07 21:14:44 -05:00
|
|
|
$charset = substr($cur, 8); // not used
|
|
|
|
} else {
|
2013-07-16 13:56:14 +02:00
|
|
|
if ($content_type !== null) {
|
|
|
|
continue;
|
|
|
|
} // garbage
|
2010-03-07 21:14:44 -05:00
|
|
|
$content_type = $cur;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$data = $result[0];
|
|
|
|
}
|
|
|
|
if ($content_type !== null && empty($this->allowed_types[$content_type])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if ($charset !== null) {
|
|
|
|
// error; we don't allow plaintext stuff
|
|
|
|
$charset = null;
|
|
|
|
}
|
|
|
|
$data = rawurldecode($data);
|
|
|
|
if ($is_base64) {
|
|
|
|
$raw_data = base64_decode($data);
|
|
|
|
} else {
|
|
|
|
$raw_data = $data;
|
|
|
|
}
|
2016-07-12 17:23:12 -07:00
|
|
|
if ( strlen($raw_data) < 12 ) {
|
|
|
|
// error; exif_imagetype throws exception with small files,
|
|
|
|
// and this likely indicates a corrupt URI/failed parse anyway
|
|
|
|
return false;
|
|
|
|
}
|
2010-03-07 21:14:44 -05:00
|
|
|
// XXX probably want to refactor this into a general mechanism
|
|
|
|
// for filtering arbitrary content types
|
2016-01-11 15:43:41 +01:00
|
|
|
if (function_exists('sys_get_temp_dir')) {
|
|
|
|
$file = tempnam(sys_get_temp_dir(), "");
|
|
|
|
} else {
|
|
|
|
$file = tempnam("/tmp", "");
|
|
|
|
}
|
2010-03-07 21:14:44 -05:00
|
|
|
file_put_contents($file, $raw_data);
|
|
|
|
if (function_exists('exif_imagetype')) {
|
|
|
|
$image_code = exif_imagetype($file);
|
2012-10-27 02:30:58 -07:00
|
|
|
unlink($file);
|
2010-03-07 21:14:44 -05:00
|
|
|
} elseif (function_exists('getimagesize')) {
|
|
|
|
set_error_handler(array($this, 'muteErrorHandler'));
|
|
|
|
$info = getimagesize($file);
|
|
|
|
restore_error_handler();
|
2012-10-27 02:30:58 -07:00
|
|
|
unlink($file);
|
2013-07-16 13:56:14 +02:00
|
|
|
if ($info == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2010-03-07 21:14:44 -05:00
|
|
|
$image_code = $info[2];
|
|
|
|
} else {
|
|
|
|
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
|
|
|
|
}
|
|
|
|
$real_content_type = image_type_to_mime_type($image_code);
|
|
|
|
if ($real_content_type != $content_type) {
|
|
|
|
// we're nice guys; if the content type is something else we
|
|
|
|
// support, change it over
|
2013-07-16 13:56:14 +02:00
|
|
|
if (empty($this->allowed_types[$real_content_type])) {
|
|
|
|
return false;
|
|
|
|
}
|
2010-03-07 21:14:44 -05:00
|
|
|
$content_type = $real_content_type;
|
|
|
|
}
|
|
|
|
// ok, it's kosher, rewrite what we need
|
|
|
|
$uri->userinfo = null;
|
|
|
|
$uri->host = null;
|
|
|
|
$uri->port = null;
|
|
|
|
$uri->fragment = null;
|
|
|
|
$uri->query = null;
|
|
|
|
$uri->path = "$content_type;base64," . base64_encode($raw_data);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-07-16 13:56:14 +02:00
|
|
|
/**
|
|
|
|
* @param int $errno
|
|
|
|
* @param string $errstr
|
|
|
|
*/
|
|
|
|
public function muteErrorHandler($errno, $errstr)
|
|
|
|
{
|
|
|
|
}
|
2010-03-07 21:14:44 -05:00
|
|
|
}
|