mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 15:28:40 +00:00
[2.1.0] Create new URI object and migrate URI validation systems to use it. URIScheme interface changed.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1334 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
8c9dbe142d
commit
797b899305
3
NEWS
3
NEWS
@ -51,6 +51,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. URI scheme is munged off if there is no authority and the scheme is the
|
. URI scheme is munged off if there is no authority and the scheme is the
|
||||||
default one
|
default one
|
||||||
. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
|
. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
|
||||||
|
. Interface for URIScheme changed
|
||||||
|
. Generic URI object to hold components of URI added, most systems involved
|
||||||
|
in URI validation have been migrated to use it
|
||||||
|
|
||||||
2.0.1, released 2007-06-27
|
2.0.1, released 2007-06-27
|
||||||
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
||||||
|
@ -93,170 +93,59 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
|||||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $host, $parser;
|
var $parser, $percentEncoder;
|
||||||
var $embeds_resource;
|
var $embedsResource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
|
||||||
$this->parser = new HTMLPurifier_URIParser();
|
$this->parser = new HTMLPurifier_URIParser();
|
||||||
$this->embeds_resource = (bool) $embeds_resource;
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
$this->embedsResource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($uri, $config, &$context) {
|
function validate($uri, $config, &$context) {
|
||||||
|
|
||||||
static $PercentEncoder = null;
|
|
||||||
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
|
|
||||||
|
|
||||||
if ($config->get('URI', 'Disable')) return false;
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// initial operations
|
// initial operations
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
$uri = $PercentEncoder->normalize($uri);
|
$uri = $this->percentEncoder->normalize($uri);
|
||||||
|
|
||||||
// parse the URI
|
// parse the URI
|
||||||
$parsed_uri = $this->parser->parse($uri);
|
$uri = $this->parser->parse($uri);
|
||||||
if ($parsed_uri === false) return false;
|
if ($uri === false) return false;
|
||||||
list($scheme, $userinfo, $host, $port, $path, $query, $fragment) = $parsed_uri;
|
|
||||||
|
|
||||||
// retrieve the scheme object
|
// generic validation
|
||||||
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
$context->register('EmbeddedURI', $this->embedsResource); // flag
|
||||||
$default_scheme = $config->get('URI', 'DefaultScheme');
|
$result = $uri->validate($config, $context);
|
||||||
if ($scheme !== null) {
|
$context->destroy('EmbeddedURI');
|
||||||
// no need to validate the scheme's fmt since we do that when we
|
if (!$result) return false;
|
||||||
// retrieve the specific scheme object from the registry
|
|
||||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
|
||||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
|
||||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
|
||||||
} else {
|
|
||||||
// no scheme: retrieve the default one
|
|
||||||
$scheme_obj = $registry->getScheme($default_scheme, $config, $context);
|
|
||||||
if (!$scheme_obj) {
|
|
||||||
// something funky happened to the default scheme object
|
|
||||||
trigger_error(
|
|
||||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
|
||||||
E_USER_WARNING
|
|
||||||
);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
|
||||||
// the URI we're processing embeds_resource a resource in the
|
|
||||||
// page, but the URI it references cannot be physically retrieved
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate host
|
// scheme-specific validation
|
||||||
if ($host !== null) {
|
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||||
// remove URI if it's absolute and we disabled externals or
|
if (!$scheme_obj) return false;
|
||||||
// if it's absolute and embedded and we disabled external resources
|
if ($this->embedsResource && !$scheme_obj->browsable) return false;
|
||||||
unset($our_host);
|
$result = $scheme_obj->validate($uri, $config, $context);
|
||||||
if (
|
if (!$result) return false;
|
||||||
$config->get('URI', 'DisableExternal') ||
|
|
||||||
(
|
|
||||||
$config->get('URI', 'DisableExternalResources') &&
|
|
||||||
$this->embeds_resource
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
$our_host = $config->get('URI', 'Host');
|
|
||||||
if ($our_host === null) return false;
|
|
||||||
}
|
|
||||||
$host = $this->host->validate($host, $config, $context);
|
|
||||||
if ($host === false) $host = null;
|
|
||||||
|
|
||||||
// check host against blacklist
|
|
||||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
|
||||||
|
|
||||||
// more lenient absolute checking
|
|
||||||
if (isset($our_host)) {
|
|
||||||
$host_parts = array_reverse(explode('.', $host));
|
|
||||||
// could be cached
|
|
||||||
$our_host_parts = array_reverse(explode('.', $our_host));
|
|
||||||
foreach ($our_host_parts as $i => $discard) {
|
|
||||||
if (!isset($host_parts[$i])) return false;
|
|
||||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate port
|
// back to string
|
||||||
if ($port !== null) {
|
$result = $uri->toString();
|
||||||
if ($port < 1 || $port > 65535) $port = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// query and fragment are quite simple in terms of definition:
|
|
||||||
// *( pchar / "/" / "?" ), so define their validation routines
|
|
||||||
// when we start fixing percent encoding
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// path gets to be validated against a hodge-podge of rules depending
|
|
||||||
// on the status of authority and scheme, but it's not that important,
|
|
||||||
// esp. since it won't be applicable to everyone
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// okay, now we defer execution to the subobject for more processing
|
|
||||||
// note that $fragment is omitted
|
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
|
||||||
$scheme_obj->validateComponents(
|
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
// reconstruct authority
|
|
||||||
$authority = null;
|
|
||||||
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
|
|
||||||
$authority = '';
|
|
||||||
if($userinfo !== null) $authority .= $userinfo . '@';
|
|
||||||
$authority .= $host;
|
|
||||||
if($port !== null) $authority .= ':' . $port;
|
|
||||||
} else {
|
|
||||||
if ($default_scheme == $scheme) $scheme = null; // munge scheme off when unnecessary
|
|
||||||
}
|
|
||||||
|
|
||||||
// reconstruct the result
|
|
||||||
$result = '';
|
|
||||||
if ($scheme !== null) $result .= "$scheme:";
|
|
||||||
if ($authority !== null) $result .= "//$authority";
|
|
||||||
$result .= $path;
|
|
||||||
if ($query !== null) $result .= "?$query";
|
|
||||||
if ($fragment !== null) $result .= "#$fragment";
|
|
||||||
|
|
||||||
// munge if necessary
|
// munge if necessary
|
||||||
$munge = $config->get('URI', 'Munge');
|
if (
|
||||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
!is_null($uri->host) && // indicator for authority
|
||||||
if ($authority !== null) {
|
!empty($scheme_obj->browsable) &&
|
||||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
!is_null($munge = $config->get('URI', 'Munge'))
|
||||||
}
|
) {
|
||||||
|
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks a host against an array blacklist
|
|
||||||
* @param $host Host to check
|
|
||||||
* @param $config HTMLPurifier_Config instance
|
|
||||||
* @param $context HTMLPurifier_Context instance
|
|
||||||
* @return bool Is spam?
|
|
||||||
*/
|
|
||||||
function checkBlacklist($host, &$config, &$context) {
|
|
||||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
|
||||||
if (!empty($blacklist)) {
|
|
||||||
foreach($blacklist as $blacklisted_host_fragment) {
|
|
||||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
153
library/HTMLPurifier/URI.php
Normal file
153
library/HTMLPurifier/URI.php
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HTML Purifier's internal representation of a URI
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_URI
|
||||||
|
{
|
||||||
|
|
||||||
|
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @note Automatically normalizes scheme and port
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
|
||||||
|
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||||
|
$this->userinfo = $userinfo;
|
||||||
|
$this->host = $host;
|
||||||
|
$this->port = is_null($port) ? $port : (int) $port;
|
||||||
|
$this->path = $path;
|
||||||
|
$this->query = $query;
|
||||||
|
$this->fragment = $fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSchemeObj($config, &$context) {
|
||||||
|
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
||||||
|
if ($this->scheme !== null) {
|
||||||
|
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
|
||||||
|
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||||
|
} else {
|
||||||
|
// no scheme: retrieve the default one
|
||||||
|
$scheme_obj = $registry->getScheme($config->get('URI', 'DefaultScheme'), $config, $context);
|
||||||
|
if (!$scheme_obj) {
|
||||||
|
// something funky happened to the default scheme object
|
||||||
|
trigger_error(
|
||||||
|
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||||
|
E_USER_WARNING
|
||||||
|
);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $scheme_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic validation method applicable for all schemes
|
||||||
|
*/
|
||||||
|
function validate($config, &$context) {
|
||||||
|
|
||||||
|
// validate host
|
||||||
|
if (!is_null($this->host)) {
|
||||||
|
// remove URI if it's absolute and we disabled externals or
|
||||||
|
// if it's absolute and embedded and we disabled external resources
|
||||||
|
unset($our_host); // ensure this variable is not set
|
||||||
|
if (
|
||||||
|
$config->get('URI', 'DisableExternal') ||
|
||||||
|
(
|
||||||
|
$config->get('URI', 'DisableExternalResources') &&
|
||||||
|
$context->get('EmbeddedURI', true) // suppress errors
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
$our_host = $config->get('URI', 'Host');
|
||||||
|
if ($our_host === null) return false;
|
||||||
|
}
|
||||||
|
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
||||||
|
$this->host = $host_def->validate($this->host, $config, $context);
|
||||||
|
if ($this->host === false) $this->host = null;
|
||||||
|
|
||||||
|
// check host against blacklist
|
||||||
|
if ($this->checkBlacklist($this->host, $config, $context)) return false;
|
||||||
|
|
||||||
|
// more lenient absolute checking
|
||||||
|
if (isset($our_host)) {
|
||||||
|
$host_parts = array_reverse(explode('.', $this->host));
|
||||||
|
// could be cached
|
||||||
|
$our_host_parts = array_reverse(explode('.', $our_host));
|
||||||
|
foreach ($our_host_parts as $i => $discard) {
|
||||||
|
if (!isset($host_parts[$i])) return false;
|
||||||
|
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// munge scheme off if necessary
|
||||||
|
if (!is_null($this->scheme) && is_null($this->host)) {
|
||||||
|
if ($config->get('URI', 'DefaultScheme') == $this->scheme) {
|
||||||
|
$this->scheme = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate port
|
||||||
|
if (!is_null($this->port)) {
|
||||||
|
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// query and fragment are quite simple in terms of definition:
|
||||||
|
// *( pchar / "/" / "?" ), so define their validation routines
|
||||||
|
// when we start fixing percent encoding
|
||||||
|
|
||||||
|
// path gets to be validated against a hodge-podge of rules depending
|
||||||
|
// on the status of authority and scheme, but it's not that important,
|
||||||
|
// esp. since it won't be applicable to everyone
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a host against an array blacklist
|
||||||
|
* @param $host Host to check
|
||||||
|
* @param $config HTMLPurifier_Config instance
|
||||||
|
* @param $context HTMLPurifier_Context instance
|
||||||
|
* @return bool Is spam?
|
||||||
|
*/
|
||||||
|
function checkBlacklist($host, $config, &$context) {
|
||||||
|
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||||
|
if (!empty($blacklist)) {
|
||||||
|
foreach($blacklist as $blacklisted_host_fragment) {
|
||||||
|
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert URI back to string
|
||||||
|
*/
|
||||||
|
function toString() {
|
||||||
|
// reconstruct authority
|
||||||
|
$authority = null;
|
||||||
|
if (!is_null($this->host)) {
|
||||||
|
$authority = '';
|
||||||
|
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
|
||||||
|
$authority .= $this->host;
|
||||||
|
if(!is_null($this->port)) $authority .= ':' . $this->port;
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruct the result
|
||||||
|
$result = '';
|
||||||
|
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
|
||||||
|
if (!is_null($authority)) $result .= '//' . $authority;
|
||||||
|
$result .= $this->path;
|
||||||
|
if (!is_null($this->query)) $result .= '?' . $this->query;
|
||||||
|
if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,8 +1,11 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URI.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a URI into the components and fragment identifier as specified
|
* Parses a URI into the components and fragment identifier as specified
|
||||||
* by RFC 2396.
|
* by RFC 2396.
|
||||||
|
* @todo Replace regexps with a native PHP parser
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIParser
|
class HTMLPurifier_URIParser
|
||||||
{
|
{
|
||||||
@ -10,7 +13,7 @@ class HTMLPurifier_URIParser
|
|||||||
/**
|
/**
|
||||||
* Parses a URI
|
* Parses a URI
|
||||||
* @param $uri string URI to parse
|
* @param $uri string URI to parse
|
||||||
* @return array(userinfo, host, int port, path, query, fragment) components
|
* @return HTMLPurifier_URI representation of URI
|
||||||
*/
|
*/
|
||||||
function parse($uri) {
|
function parse($uri) {
|
||||||
$r_URI = '!'.
|
$r_URI = '!'.
|
||||||
@ -51,7 +54,8 @@ class HTMLPurifier_URIParser
|
|||||||
$port = $host = $userinfo = null;
|
$port = $host = $userinfo = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return array($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
return new HTMLPurifier_URI(
|
||||||
|
$scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -23,20 +23,14 @@ class HTMLPurifier_URIScheme
|
|||||||
* Validates the components of a URI
|
* Validates the components of a URI
|
||||||
* @note This implementation should be called by children if they define
|
* @note This implementation should be called by children if they define
|
||||||
* a default port, as it does port processing.
|
* a default port, as it does port processing.
|
||||||
* @note Fragment is omitted as that is scheme independent
|
* @param $uri Instance of HTMLPurifier_URI
|
||||||
* @param $userinfo User info found before at sign in authority
|
|
||||||
* @param $host Hostname in authority
|
|
||||||
* @param $port Port found after colon in authority
|
|
||||||
* @param $path Path of URI
|
|
||||||
* @param $query Query of URI, found after question mark
|
|
||||||
* @param $config HTMLPurifier_Config object
|
* @param $config HTMLPurifier_Config object
|
||||||
* @param $context HTMLPurifier_Context object
|
* @param $context HTMLPurifier_Context object
|
||||||
|
* @return Bool success or failure
|
||||||
*/
|
*/
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
if ($this->default_port == $uri->port) $uri->port = null;
|
||||||
) {
|
return true;
|
||||||
if ($this->default_port == $port) $port = null;
|
|
||||||
return array($userinfo, $host, $port, $path, $query);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,34 +10,33 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
|||||||
var $default_port = 21;
|
var $default_port = 21;
|
||||||
var $browsable = true; // usually
|
var $browsable = true; // usually
|
||||||
|
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
parent::validate($uri, $config, $context);
|
||||||
) {
|
$uri->query = null;
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
|
||||||
parent::validateComponents(
|
// typecode check
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
$semicolon_pos = strrpos($uri->path, ';'); // reverse
|
||||||
$semicolon_pos = strrpos($path, ';'); // reverse
|
|
||||||
if ($semicolon_pos !== false) {
|
if ($semicolon_pos !== false) {
|
||||||
// typecode check
|
$type = substr($uri->path, $semicolon_pos + 1); // no semicolon
|
||||||
$type = substr($path, $semicolon_pos + 1); // no semicolon
|
$uri->path = substr($uri->path, 0, $semicolon_pos);
|
||||||
$path = substr($path, 0, $semicolon_pos);
|
|
||||||
$type_ret = '';
|
$type_ret = '';
|
||||||
if (strpos($type, '=') !== false) {
|
if (strpos($type, '=') !== false) {
|
||||||
// figure out whether or not the declaration is correct
|
// figure out whether or not the declaration is correct
|
||||||
list($key, $typecode) = explode('=', $type, 2);
|
list($key, $typecode) = explode('=', $type, 2);
|
||||||
if ($key !== 'type') {
|
if ($key !== 'type') {
|
||||||
// invalid key, tack it back on encoded
|
// invalid key, tack it back on encoded
|
||||||
$path .= '%3B' . $type;
|
$uri->path .= '%3B' . $type;
|
||||||
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
|
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
|
||||||
$type_ret = ";type=$typecode";
|
$type_ret = ";type=$typecode";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$path .= '%3B' . $type;
|
$uri->path .= '%3B' . $type;
|
||||||
}
|
}
|
||||||
$path = str_replace(';', '%3B', $path);
|
$uri->path = str_replace(';', '%3B', $uri->path);
|
||||||
$path .= $type_ret;
|
$uri->path .= $type_ret;
|
||||||
}
|
}
|
||||||
return array($userinfo, $host, $port, $path, null);
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,13 +10,10 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
|||||||
var $default_port = 80;
|
var $default_port = 80;
|
||||||
var $browsable = true;
|
var $browsable = true;
|
||||||
|
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
parent::validate($uri, $config, $context);
|
||||||
) {
|
$uri->userinfo = null;
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
return true;
|
||||||
parent::validateComponents(
|
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
|
||||||
return array(null, $host, $port, $path, $query);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15,14 +15,13 @@ class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
|
|||||||
|
|
||||||
var $browsable = false;
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
parent::validate($uri, $config, $context);
|
||||||
) {
|
$uri->userinfo = null;
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
$uri->host = null;
|
||||||
parent::validateComponents(
|
$uri->port = null;
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
|
||||||
// we need to validate path against RFC 2368's addr-spec
|
// we need to validate path against RFC 2368's addr-spec
|
||||||
return array(null, null, null, $path, $query);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,14 +9,14 @@ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
|
|||||||
|
|
||||||
var $browsable = false;
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
parent::validate($uri, $config, $context);
|
||||||
) {
|
$uri->userinfo = null;
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
$uri->host = null;
|
||||||
parent::validateComponents(
|
$uri->port = null;
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
$uri->query = null;
|
||||||
// typecode check needed on path
|
// typecode check needed on path
|
||||||
return array(null, null, null, $path, null);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,13 +10,11 @@ class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
|
|||||||
var $default_port = 119;
|
var $default_port = 119;
|
||||||
var $browsable = false;
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validate(&$uri, $config, &$context) {
|
||||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
parent::validate($uri, $config, $context);
|
||||||
) {
|
$uri->userinfo = null;
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
$uri->query = null;
|
||||||
parent::validateComponents(
|
return true;
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
|
||||||
return array(null, $host, $port, $path, null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,162 +3,17 @@
|
|||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||||
|
|
||||||
// WARNING: INCOMPLETE UNIT TESTS!
|
/**
|
||||||
// we also need to test all the configuration directives defined by this class
|
* @todo Aim for complete code coverage with mocks
|
||||||
|
*/
|
||||||
// http: is returned quite often when a URL is invalid. We have to change
|
|
||||||
// this behavior to just a plain old "FALSE"!
|
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
var $scheme, $components, $return_components;
|
|
||||||
|
|
||||||
var $oldRegistry;
|
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
// setup ensures that any twiddling around with the registry is reverted
|
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||||
$this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI(); // default
|
|
||||||
parent::setUp();
|
parent::setUp();
|
||||||
}
|
}
|
||||||
|
|
||||||
function tearDown() {
|
|
||||||
HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry);
|
|
||||||
}
|
|
||||||
|
|
||||||
function &generateSchemeMock($scheme_names = array('http', 'mailto')) {
|
|
||||||
generate_mock_once('HTMLPurifier_URIScheme');
|
|
||||||
generate_mock_once('HTMLPurifier_URISchemeRegistry');
|
|
||||||
|
|
||||||
// load a scheme registry mock to the singleton
|
|
||||||
$registry =& HTMLPurifier_URISchemeRegistry::instance(
|
|
||||||
new HTMLPurifier_URISchemeRegistryMock()
|
|
||||||
);
|
|
||||||
|
|
||||||
// add a pseudo-scheme to the registry for $scheme_names
|
|
||||||
$scheme = new HTMLPurifier_URISchemeMock();
|
|
||||||
foreach ($scheme_names as $name) {
|
|
||||||
$registry->setReturnReference('getScheme', $scheme, array($name, '*', '*'));
|
|
||||||
}
|
|
||||||
// registry returns false if an invalid scheme is requested
|
|
||||||
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
|
|
||||||
|
|
||||||
return $scheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
// PARSING RELATED TESTS
|
|
||||||
|
|
||||||
function assertParsing($uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
|
|
||||||
|
|
||||||
$this->prepareCommon($config, $context);
|
|
||||||
$scheme =& $this->generateSchemeMock();
|
|
||||||
|
|
||||||
// create components parameter list
|
|
||||||
// Config and Context are wildcards due to PHP4 reference funkiness
|
|
||||||
$components = array($userinfo, $host, $port, $path, $query, '*', '*');
|
|
||||||
$scheme->expectOnce('validateComponents', $components);
|
|
||||||
|
|
||||||
$def = new HTMLPurifier_AttrDef_URI();
|
|
||||||
$def->validate($uri, $config, $context);
|
|
||||||
|
|
||||||
$scheme->tally();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingImproperPercentEncoding() {
|
|
||||||
// even though we don't resolve percent entities, we have to fix
|
|
||||||
// improper percent-encodes. Taken one at a time:
|
|
||||||
// %56 - V, which is an unreserved character
|
|
||||||
// %fc - u with an umlaut, normalize to uppercase
|
|
||||||
// %GJ - invalid characters in entity, encode %
|
|
||||||
// %5 - prematurely terminated, encode %
|
|
||||||
// %FC - u with umlaut, correct
|
|
||||||
// note that Apache doesn't do such fixing, rather, it just claims
|
|
||||||
// that the browser sent a "Bad Request". See PercentEncoder.php
|
|
||||||
// for more details
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://www.example.com/%56%fc%GJ%5%FC',
|
|
||||||
null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingInvalidHostThatLooksLikeIPv6Address() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]',
|
|
||||||
null, null, null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingOverLargePort() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://example.com:65536',
|
|
||||||
null, 'example.com', null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// OUTPUT RELATED TESTS
|
|
||||||
// scheme is mocked to ensure only the URI is being tested
|
|
||||||
|
|
||||||
function assertOutput($input_uri, $expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
|
|
||||||
|
|
||||||
// prepare mock machinery
|
|
||||||
$this->prepareCommon($config, $context);
|
|
||||||
$scheme =& $this->generateSchemeMock();
|
|
||||||
$components = array($userinfo, $host, $port, $path, $query);
|
|
||||||
$scheme->setReturnValue('validateComponents', $components);
|
|
||||||
|
|
||||||
$def = new HTMLPurifier_AttrDef_URI();
|
|
||||||
$result_uri = $def->validate($input_uri, $config, $context);
|
|
||||||
if ($expect_uri === true) $expect_uri = $input_uri;
|
|
||||||
$this->assertEqual($result_uri, $expect_uri);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputRegular() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'http://user@authority.part:8080/now/the/path?query#frag', true,
|
|
||||||
'user', 'authority.part', 8080, '/now/the/path', 'query'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputEmpty() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'', true,
|
|
||||||
null, null, null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputNullPath() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'', true,
|
|
||||||
null, null, null, null, null // usually shouldn't happen
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputPathAbsolute() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'http:/this/is/path', '/this/is/path',
|
|
||||||
null, null, null, '/this/is/path', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputPathRootless() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'http:this/is/path', 'this/is/path',
|
|
||||||
null, null, null, 'this/is/path', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testOutputPathEmpty() {
|
|
||||||
$this->assertOutput(
|
|
||||||
'http:', '',
|
|
||||||
null, null, null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// INTEGRATION TESTS
|
|
||||||
|
|
||||||
function testIntegration() {
|
function testIntegration() {
|
||||||
$this->assertDef('http://www.google.com/');
|
$this->assertDef('http://www.google.com/');
|
||||||
$this->assertDef('http:', '');
|
$this->assertDef('http:', '');
|
||||||
@ -170,84 +25,27 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef('mailto:bob@example.com');
|
$this->assertDef('mailto:bob@example.com');
|
||||||
}
|
}
|
||||||
|
|
||||||
function testConfigDisableExternal() {
|
function testIntegrationWithPercentEncoder() {
|
||||||
|
$this->assertDef(
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
'http://www.example.com/%56%fc%GJ%5%FC',
|
||||||
|
'http://www.example.com/V%FC%25GJ%255%FC'
|
||||||
$this->config->set('URI', 'DisableExternal', true);
|
);
|
||||||
$this->config->set('URI', 'Host', 'sub.example.com');
|
|
||||||
|
|
||||||
$this->assertDef('/foobar.txt');
|
|
||||||
$this->assertDef('http://google.com/', false);
|
|
||||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
|
||||||
$this->assertDef('http://example.com/teehee', false);
|
|
||||||
$this->assertDef('http://www.example.com/#man', false);
|
|
||||||
$this->assertDef('http://go.sub.example.com/perhaps?p=foo');
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function testEmbeds() {
|
function testEmbeds() {
|
||||||
|
|
||||||
// embedded URI
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
||||||
|
|
||||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||||
$this->assertDef('mailto:foo@example.com', false);
|
$this->assertDef('mailto:foo@example.com', false);
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function testConfigDisableExternalResources() {
|
|
||||||
|
|
||||||
$this->config->set('URI', 'DisableExternalResources', true);
|
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
|
||||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
|
||||||
$this->assertDef('/img.png');
|
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
|
||||||
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
|
|
||||||
$this->assertDef('/img.png');
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function testConfigMunge() {
|
function testConfigMunge() {
|
||||||
|
|
||||||
$this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
|
$this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
|
||||||
|
|
||||||
$this->assertDef(
|
$this->assertDef(
|
||||||
'http://www.example.com/',
|
'http://www.example.com/',
|
||||||
'http://www.google.com/url?q=http%3A%2F%2Fwww.example.com%2F'
|
'http://www.google.com/url?q=http%3A%2F%2Fwww.example.com%2F'
|
||||||
);
|
);
|
||||||
|
|
||||||
$this->assertDef('index.html');
|
$this->assertDef('index.html');
|
||||||
$this->assertDef('javascript:foobar();', false);
|
$this->assertDef('javascript:foobar();', false);
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function testBlacklist() {
|
|
||||||
|
|
||||||
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
|
|
||||||
|
|
||||||
$this->assertDef('foo.txt');
|
|
||||||
$this->assertDef('http://www.google.com/example.com/moo');
|
|
||||||
|
|
||||||
$this->assertDef('http://example.com/#23', false);
|
|
||||||
$this->assertDef('https://sub.domain.example.com/foobar', false);
|
|
||||||
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
|
|
||||||
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function testWhitelist() {
|
|
||||||
/* unimplemented
|
|
||||||
$this->config->set('URI', 'HostPolicy', 'DenyAll');
|
|
||||||
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
|
|
||||||
|
|
||||||
$this->assertDef('http://example.com/fo/google.com', false);
|
|
||||||
$this->assertDef('server.txt');
|
|
||||||
$this->assertDef('ftp://www.google.com/?t=a');
|
|
||||||
$this->assertDef('http://google.com.tricky.spamsite.net', false);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,10 +10,20 @@ class HTMLPurifier_Harness extends UnitTestCase
|
|||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var $config, $context;
|
||||||
|
|
||||||
|
function setUp() {
|
||||||
|
list($this->config, $this->context) = $this->createCommon();
|
||||||
|
}
|
||||||
|
|
||||||
function prepareCommon(&$config, &$context) {
|
function prepareCommon(&$config, &$context) {
|
||||||
$config = HTMLPurifier_Config::create($config);
|
$config = HTMLPurifier_Config::create($config);
|
||||||
if (!$context) $context = new HTMLPurifier_Context();
|
if (!$context) $context = new HTMLPurifier_Context();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createCommon() {
|
||||||
|
return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/URIParser.php';
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
|
require_once 'HTMLPurifier/URI.php';
|
||||||
|
|
||||||
class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
||||||
{
|
{
|
||||||
@ -11,7 +12,8 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
|||||||
$this->prepareCommon($config, $context);
|
$this->prepareCommon($config, $context);
|
||||||
$parser = new HTMLPurifier_URIParser();
|
$parser = new HTMLPurifier_URIParser();
|
||||||
$result = $parser->parse($uri, $config, $context);
|
$result = $parser->parse($uri, $config, $context);
|
||||||
$this->assertEqual($result, array($scheme, $userinfo, $host, $port, $path, $query, $fragment));
|
$expect = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||||
|
$this->assertEqual($result, $expect);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testRegular() {
|
function testRegular() {
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URI.php';
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
|
|
||||||
require_once 'HTMLPurifier/URIScheme.php';
|
require_once 'HTMLPurifier/URIScheme.php';
|
||||||
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
|
|
||||||
require_once 'HTMLPurifier/URIScheme/http.php';
|
require_once 'HTMLPurifier/URIScheme/http.php';
|
||||||
require_once 'HTMLPurifier/URIScheme/ftp.php';
|
require_once 'HTMLPurifier/URIScheme/ftp.php';
|
||||||
@ -15,142 +19,140 @@ require_once 'HTMLPurifier/URIScheme/nntp.php';
|
|||||||
class HTMLPurifier_URISchemeTest extends HTMLPurifier_Harness
|
class HTMLPurifier_URISchemeTest extends HTMLPurifier_Harness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test_http() {
|
function assertValidation($uri, $expect_uri = true) {
|
||||||
$scheme = new HTMLPurifier_URIScheme_http();
|
$parser = new HTMLPurifier_URIParser();
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
if ($expect_uri === true) $uri = $expect_uri;
|
||||||
$context = new HTMLPurifier_Context();
|
$uri = $parser->parse($uri);
|
||||||
|
if ($expect_uri !== false) {
|
||||||
$this->assertIdentical(
|
$expect_uri = $parser->parse($expect_uri);
|
||||||
$scheme->validateComponents(
|
}
|
||||||
null, 'www.example.com', null, '/', 's=foobar', $config, $context),
|
// convenience hack: the scheme should be explicitly specified
|
||||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
$scheme = $uri->getSchemeObj($this->config, $this->context);
|
||||||
);
|
$result = $scheme->validate($uri, $this->config, $this->context);
|
||||||
|
if ($expect_uri !== false) {
|
||||||
// absorb default port and userinfo
|
$this->assertTrue($result);
|
||||||
$this->assertIdentical(
|
$this->assertIdentical($uri, $expect_uri);
|
||||||
$scheme->validateComponents(
|
} else {
|
||||||
'user', 'www.example.com', 80, '/', 's=foobar', $config, $context),
|
$this->assertFalse($result);
|
||||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
}
|
||||||
);
|
|
||||||
|
|
||||||
// do not absorb non-default port
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, 'www.example.com', 8080, '/', 's=foobar', $config, $context),
|
|
||||||
array(null, 'www.example.com', 8080, '/', 's=foobar')
|
|
||||||
);
|
|
||||||
|
|
||||||
// https is basically the same
|
|
||||||
|
|
||||||
$scheme = new HTMLPurifier_URIScheme_https();
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
'user', 'www.example.com', 443, '/', 's=foobar', $config, $context),
|
|
||||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
|
||||||
);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_ftp() {
|
function test_http_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
$scheme = new HTMLPurifier_URIScheme_ftp();
|
'http://example.com/?s=q#fragment'
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$context = new HTMLPurifier_Context();
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
'user', 'www.example.com', 21, '/', 's=foobar', $config, $context),
|
|
||||||
array('user', 'www.example.com', null, '/', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
// valid typecode
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, 'www.example.com', null, '/file.txt;type=a', null, $config, $context),
|
|
||||||
array(null, 'www.example.com', null, '/file.txt;type=a', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
// remove invalid typecode
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, 'www.example.com', null, '/file.txt;type=z', null, $config, $context),
|
|
||||||
array(null, 'www.example.com', null, '/file.txt', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
// encode errant semicolons
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, 'www.example.com', null, '/too;many;semicolons=1', null, $config, $context),
|
|
||||||
array(null, 'www.example.com', null, '/too%3Bmany%3Bsemicolons=1', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_news() {
|
|
||||||
|
|
||||||
$scheme = new HTMLPurifier_URIScheme_news();
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$context = new HTMLPurifier_Context();
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, null, null, 'gmane.science.linguistics', null, $config, $context),
|
|
||||||
array(null, null, null, 'gmane.science.linguistics', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, null, null, '642@eagle.ATT.COM', null, $config, $context),
|
|
||||||
array(null, null, null, '642@eagle.ATT.COM', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
// test invalid field removal
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
'user', 'www.google.com', 80, 'rec.music', 'path=foo', $config, $context),
|
|
||||||
array(null, null, null, 'rec.music', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_nntp() {
|
|
||||||
|
|
||||||
$scheme = new HTMLPurifier_URIScheme_nntp();
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$context = new HTMLPurifier_Context();
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, 'news.example.com', null, '/alt.misc/12345', null, $config, $context),
|
|
||||||
array(null, 'news.example.com', null, '/alt.misc/12345', null)
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
'user', 'news.example.com', 119, '/alt.misc/12345', 'foo=asdf', $config, $context),
|
|
||||||
array(null, 'news.example.com', null, '/alt.misc/12345', null)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_mailto() {
|
function test_http_removeDefaultPort() {
|
||||||
|
$this->assertValidation(
|
||||||
$scheme = new HTMLPurifier_URIScheme_mailto();
|
'http://example.com:80',
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
'http://example.com'
|
||||||
$context = new HTMLPurifier_Context();
|
|
||||||
|
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
|
||||||
null, null, null, 'bob@example.com', null, $config, $context),
|
|
||||||
array(null, null, null, 'bob@example.com', null)
|
|
||||||
);
|
);
|
||||||
|
}
|
||||||
$this->assertIdentical(
|
|
||||||
$scheme->validateComponents(
|
function test_http_removeUserInfo() {
|
||||||
'user', 'example.com', 80, 'bob@example.com', 'subject=Foo!', $config, $context),
|
$this->assertValidation(
|
||||||
array(null, null, null, 'bob@example.com', 'subject=Foo!')
|
'http://bob@example.com',
|
||||||
|
'http://example.com'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_http_preserveNonDefaultPort() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'http://example.com:8080'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_https_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'https://user@example.com:443/?s=q#frag',
|
||||||
|
'https://example.com/?s=q#frag'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://user@example.com/path'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_removeDefaultPort() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://example.com:21',
|
||||||
|
'ftp://example.com'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_removeQueryString() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://example.com?s=q',
|
||||||
|
'ftp://example.com'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_preserveValidTypecode() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://example.com/file.txt;type=a'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_removeInvalidTypecode() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://example.com/file.txt;type=z',
|
||||||
|
'ftp://example.com/file.txt'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_ftp_encodeExtraSemicolons() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'ftp://example.com/too;many;semicolons=1',
|
||||||
|
'ftp://example.com/too%3Bmany%3Bsemicolons=1'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_news_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'news:gmane.science.linguistics'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_news_explicit() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'news:642@eagle.ATT.COM'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_news_removeNonPathComponents() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'news://user@example.com:80/rec.music?path=foo#frag',
|
||||||
|
'news:/rec.music#frag'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_nntp_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'nntp://news.example.com/alt.misc/42#frag'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_nntp_removalOfRedundantOrUselessComponents() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'nntp://user@news.example.com:119/alt.misc/42?s=q#frag',
|
||||||
|
'nntp://news.example.com/alt.misc/42#frag'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_mailto_regular() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'mailto:bob@example.com'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_mailto_removalOfRedundantOrUselessComponents() {
|
||||||
|
$this->assertValidation(
|
||||||
|
'mailto://user@example.com:80/bob@example.com?subject=Foo#frag',
|
||||||
|
'mailto:/bob@example.com?subject=Foo#frag'
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
240
tests/HTMLPurifier/URITest.php
Normal file
240
tests/HTMLPurifier/URITest.php
Normal file
@ -0,0 +1,240 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URI.php';
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_URITest extends HTMLPurifier_Harness
|
||||||
|
{
|
||||||
|
|
||||||
|
function createURI($uri) {
|
||||||
|
$parser = new HTMLPurifier_URIParser();
|
||||||
|
return $parser->parse($uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_construct() {
|
||||||
|
$uri1 = new HTMLPurifier_URI('HTTP', 'bob', 'example.com', '23', '/foo', 'bar=2', 'slash');
|
||||||
|
$uri2 = new HTMLPurifier_URI('http', 'bob', 'example.com', 23, '/foo', 'bar=2', 'slash');
|
||||||
|
$this->assertIdentical($uri1, $uri2);
|
||||||
|
}
|
||||||
|
|
||||||
|
var $oldRegistry;
|
||||||
|
|
||||||
|
function &setUpSchemeRegistryMock() {
|
||||||
|
$this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
|
||||||
|
generate_mock_once('HTMLPurifier_URIScheme');
|
||||||
|
generate_mock_once('HTMLPurifier_URISchemeRegistry');
|
||||||
|
$registry =& HTMLPurifier_URISchemeRegistry::instance(
|
||||||
|
new HTMLPurifier_URISchemeRegistryMock()
|
||||||
|
);
|
||||||
|
return $registry;
|
||||||
|
}
|
||||||
|
|
||||||
|
function &setUpSchemeMock($name) {
|
||||||
|
$registry =& $this->setUpSchemeRegistryMock();
|
||||||
|
$scheme_mock = new HTMLPurifier_URISchemeMock();
|
||||||
|
$registry->setReturnValue('getScheme', $scheme_mock, array($name, '*', '*'));
|
||||||
|
return $scheme_mock;
|
||||||
|
}
|
||||||
|
|
||||||
|
function setUpNoValidSchemes() {
|
||||||
|
$registry =& $this->setUpSchemeRegistryMock();
|
||||||
|
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function tearDownSchemeRegistryMock() {
|
||||||
|
HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_getSchemeObj() {
|
||||||
|
$scheme_mock =& $this->setUpSchemeMock('http');
|
||||||
|
|
||||||
|
$uri = $this->createURI('http:');
|
||||||
|
$scheme_obj = $uri->getSchemeObj($this->config, $this->context);
|
||||||
|
$this->assertIdentical($scheme_obj, $scheme_mock);
|
||||||
|
|
||||||
|
$this->tearDownSchemeRegistryMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_getSchemeObj_invalidScheme() {
|
||||||
|
$this->setUpNoValidSchemes();
|
||||||
|
|
||||||
|
$uri = $this->createURI('http:');
|
||||||
|
$result = $uri->getSchemeObj($this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, false);
|
||||||
|
|
||||||
|
$this->tearDownSchemeRegistryMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_getSchemaObj_defaultScheme() {
|
||||||
|
$scheme = 'foobar';
|
||||||
|
|
||||||
|
$scheme_mock =& $this->setUpSchemeMock($scheme);
|
||||||
|
$this->config->set('URI', 'DefaultScheme', $scheme);
|
||||||
|
|
||||||
|
$uri = $this->createURI('hmm');
|
||||||
|
$scheme_obj = $uri->getSchemeObj($this->config, $this->context);
|
||||||
|
$this->assertIdentical($scheme_obj, $scheme_mock);
|
||||||
|
|
||||||
|
$this->tearDownSchemeRegistryMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_getSchemaObj_invalidDefaultScheme() {
|
||||||
|
$this->setUpNoValidSchemes();
|
||||||
|
$this->config->set('URI', 'DefaultScheme', 'foobar');
|
||||||
|
|
||||||
|
$uri = $this->createURI('hmm');
|
||||||
|
|
||||||
|
$this->expectError('Default scheme object "foobar" was not readable');
|
||||||
|
$result = $uri->getSchemeObj($this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, false);
|
||||||
|
|
||||||
|
$this->tearDownSchemeRegistryMock();
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertToString($expect_uri, $scheme, $userinfo, $host, $port, $path, $query, $fragment) {
|
||||||
|
$uri = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||||
|
$string = $uri->toString();
|
||||||
|
$this->assertIdentical($string, $expect_uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_full() {
|
||||||
|
$this->assertToString(
|
||||||
|
'http://bob@example.com:300/foo?bar=baz#fragment',
|
||||||
|
'http', 'bob', 'example.com', 300, '/foo', 'bar=baz', 'fragment'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_scheme() {
|
||||||
|
$this->assertToString(
|
||||||
|
'http:',
|
||||||
|
'http', null, null, null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_authority() {
|
||||||
|
$this->assertToString(
|
||||||
|
'//bob@example.com:8080',
|
||||||
|
null, 'bob', 'example.com', 8080, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_path() {
|
||||||
|
$this->assertToString(
|
||||||
|
'/path/to',
|
||||||
|
null, null, null, null, '/path/to', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_query() {
|
||||||
|
$this->assertToString(
|
||||||
|
'?q=string',
|
||||||
|
null, null, null, null, '', 'q=string', null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_toString_fragment() {
|
||||||
|
$this->assertToString(
|
||||||
|
'#fragment',
|
||||||
|
null, null, null, null, '', null, 'fragment'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertValidation($uri, $expect_uri = true) {
|
||||||
|
if ($expect_uri === true) $expect_uri = $uri;
|
||||||
|
$uri = $this->createURI($uri);
|
||||||
|
$result = $uri->validate($this->config, $this->context);
|
||||||
|
if ($expect_uri === false) {
|
||||||
|
$this->assertFalse($result);
|
||||||
|
} else {
|
||||||
|
$this->assertTrue($result);
|
||||||
|
$this->assertIdentical($uri->toString(), $expect_uri);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_defaultSchemeRemovedInBlank() {
|
||||||
|
$this->assertValidation('http:', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_defaultSchemeRemovedInRelativeURI() {
|
||||||
|
$this->assertValidation('http:/foo/bar', '/foo/bar');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_defaultSchemeNotRemovedInAbsoluteURI() {
|
||||||
|
$this->assertValidation('http://example.com/foo/bar');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_altSchemeNotRemoved() {
|
||||||
|
$this->assertValidation('mailto:this-looks-like-a-path@example.com');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_overlongPort() {
|
||||||
|
$this->assertValidation('http://example.com:65536', 'http://example.com');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_zeroPort() {
|
||||||
|
$this->assertValidation('http://example.com:00', 'http://example.com');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_invalidHostThatLooksLikeIPv6() {
|
||||||
|
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_configDisableExternal() {
|
||||||
|
|
||||||
|
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||||
|
|
||||||
|
$this->config->set('URI', 'DisableExternal', true);
|
||||||
|
$this->config->set('URI', 'Host', 'sub.example.com');
|
||||||
|
|
||||||
|
$this->assertValidation('/foobar.txt');
|
||||||
|
$this->assertValidation('http://google.com/', false);
|
||||||
|
$this->assertValidation('http://sub.example.com/alas?foo=asd');
|
||||||
|
$this->assertValidation('http://example.com/teehee', false);
|
||||||
|
$this->assertValidation('http://www.example.com/#man', false);
|
||||||
|
$this->assertValidation('http://go.sub.example.com/perhaps?p=foo');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_configDisableExternalResources() {
|
||||||
|
|
||||||
|
$this->config->set('URI', 'DisableExternalResources', true);
|
||||||
|
|
||||||
|
$this->assertValidation('http://sub.example.com/alas?foo=asd');
|
||||||
|
$this->assertValidation('/img.png');
|
||||||
|
|
||||||
|
$embeds = true; // passed by reference
|
||||||
|
$this->context->register('EmbeddedURI', $embeds);
|
||||||
|
$this->assertValidation('http://sub.example.com/alas?foo=asd', false);
|
||||||
|
$this->assertValidation('/img.png');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_configBlacklist() {
|
||||||
|
|
||||||
|
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
|
||||||
|
|
||||||
|
$this->assertValidation('foo.txt');
|
||||||
|
$this->assertValidation('http://www.google.com/example.com/moo');
|
||||||
|
|
||||||
|
$this->assertValidation('http://example.com/#23', false);
|
||||||
|
$this->assertValidation('https://sub.domain.example.com/foobar', false);
|
||||||
|
$this->assertValidation('http://example.com.example.net/?whoo=foo', false);
|
||||||
|
$this->assertValidation('ftp://moo-moo.net/foo/foo/', false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
function test_validate_configWhitelist() {
|
||||||
|
|
||||||
|
$this->config->set('URI', 'HostPolicy', 'DenyAll');
|
||||||
|
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
|
||||||
|
|
||||||
|
$this->assertValidation('http://example.com/fo/google.com', false);
|
||||||
|
$this->assertValidation('server.txt');
|
||||||
|
$this->assertValidation('ftp://www.google.com/?t=a');
|
||||||
|
$this->assertValidation('http://google.com.tricky.spamsite.net', false);
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
}
|
@ -105,6 +105,7 @@ $test_files[] = 'HTMLPurifier/TokenTest.php';
|
|||||||
$test_files[] = 'HTMLPurifier/URIParserTest.php';
|
$test_files[] = 'HTMLPurifier/URIParserTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
|
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/URISchemeTest.php';
|
$test_files[] = 'HTMLPurifier/URISchemeTest.php';
|
||||||
|
$test_files[] = 'HTMLPurifier/URITest.php';
|
||||||
$test_files[] = 'HTMLPurifierTest.php';
|
$test_files[] = 'HTMLPurifierTest.php';
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
|
Loading…
Reference in New Issue
Block a user