mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-10 16:01:53 +00:00
797b899305
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1334 48356398-32a2-884e-a903-53898d9a118a
154 lines
5.5 KiB
PHP
154 lines
5.5 KiB
PHP
<?php
|
|
|
|
require_once 'HTMLPurifier/URIParser.php';
|
|
|
|
/**
|
|
* HTML Purifier's internal representation of a URI
|
|
*/
|
|
class HTMLPurifier_URI
|
|
{
|
|
|
|
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
|
|
|
/**
|
|
* @note Automatically normalizes scheme and port
|
|
*/
|
|
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
|
|
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
|
$this->userinfo = $userinfo;
|
|
$this->host = $host;
|
|
$this->port = is_null($port) ? $port : (int) $port;
|
|
$this->path = $path;
|
|
$this->query = $query;
|
|
$this->fragment = $fragment;
|
|
}
|
|
|
|
function getSchemeObj($config, &$context) {
|
|
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
|
if ($this->scheme !== null) {
|
|
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
|
|
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
|
} else {
|
|
// no scheme: retrieve the default one
|
|
$scheme_obj = $registry->getScheme($config->get('URI', 'DefaultScheme'), $config, $context);
|
|
if (!$scheme_obj) {
|
|
// something funky happened to the default scheme object
|
|
trigger_error(
|
|
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
|
E_USER_WARNING
|
|
);
|
|
return false;
|
|
}
|
|
}
|
|
return $scheme_obj;
|
|
}
|
|
|
|
/**
|
|
* Generic validation method applicable for all schemes
|
|
*/
|
|
function validate($config, &$context) {
|
|
|
|
// validate host
|
|
if (!is_null($this->host)) {
|
|
// remove URI if it's absolute and we disabled externals or
|
|
// if it's absolute and embedded and we disabled external resources
|
|
unset($our_host); // ensure this variable is not set
|
|
if (
|
|
$config->get('URI', 'DisableExternal') ||
|
|
(
|
|
$config->get('URI', 'DisableExternalResources') &&
|
|
$context->get('EmbeddedURI', true) // suppress errors
|
|
)
|
|
) {
|
|
$our_host = $config->get('URI', 'Host');
|
|
if ($our_host === null) return false;
|
|
}
|
|
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
|
$this->host = $host_def->validate($this->host, $config, $context);
|
|
if ($this->host === false) $this->host = null;
|
|
|
|
// check host against blacklist
|
|
if ($this->checkBlacklist($this->host, $config, $context)) return false;
|
|
|
|
// more lenient absolute checking
|
|
if (isset($our_host)) {
|
|
$host_parts = array_reverse(explode('.', $this->host));
|
|
// could be cached
|
|
$our_host_parts = array_reverse(explode('.', $our_host));
|
|
foreach ($our_host_parts as $i => $discard) {
|
|
if (!isset($host_parts[$i])) return false;
|
|
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// munge scheme off if necessary
|
|
if (!is_null($this->scheme) && is_null($this->host)) {
|
|
if ($config->get('URI', 'DefaultScheme') == $this->scheme) {
|
|
$this->scheme = null;
|
|
}
|
|
}
|
|
|
|
// validate port
|
|
if (!is_null($this->port)) {
|
|
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
|
}
|
|
|
|
// query and fragment are quite simple in terms of definition:
|
|
// *( pchar / "/" / "?" ), so define their validation routines
|
|
// when we start fixing percent encoding
|
|
|
|
// path gets to be validated against a hodge-podge of rules depending
|
|
// on the status of authority and scheme, but it's not that important,
|
|
// esp. since it won't be applicable to everyone
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/**
|
|
* Checks a host against an array blacklist
|
|
* @param $host Host to check
|
|
* @param $config HTMLPurifier_Config instance
|
|
* @param $context HTMLPurifier_Context instance
|
|
* @return bool Is spam?
|
|
*/
|
|
function checkBlacklist($host, $config, &$context) {
|
|
$blacklist = $config->get('URI', 'HostBlacklist');
|
|
if (!empty($blacklist)) {
|
|
foreach($blacklist as $blacklisted_host_fragment) {
|
|
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Convert URI back to string
|
|
*/
|
|
function toString() {
|
|
// reconstruct authority
|
|
$authority = null;
|
|
if (!is_null($this->host)) {
|
|
$authority = '';
|
|
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
|
|
$authority .= $this->host;
|
|
if(!is_null($this->port)) $authority .= ':' . $this->port;
|
|
}
|
|
|
|
// reconstruct the result
|
|
$result = '';
|
|
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
|
|
if (!is_null($authority)) $result .= '//' . $authority;
|
|
$result .= $this->path;
|
|
if (!is_null($this->query)) $result .= '?' . $this->query;
|
|
if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
|
|
|
|
return $result;
|
|
}
|
|
|
|
}
|
|
|