0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00

[2.1.0] Create new URI object and migrate URI validation systems to use it. URIScheme interface changed.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1334 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-08-01 18:34:46 +00:00
parent 8c9dbe142d
commit 797b899305
16 changed files with 623 additions and 534 deletions

3
NEWS
View File

@ -51,6 +51,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. URI scheme is munged off if there is no authority and the scheme is the . URI scheme is munged off if there is no authority and the scheme is the
default one default one
. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase . All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
. Interface for URIScheme changed
. Generic URI object to hold components of URI added, most systems involved
in URI validation have been migrated to use it
2.0.1, released 2007-06-27 2.0.1, released 2007-06-27
! Tag auto-closing now based on a ChildDef heuristic rather than a ! Tag auto-closing now based on a ChildDef heuristic rather than a

View File

@ -93,170 +93,59 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{ {
var $host, $parser; var $parser, $percentEncoder;
var $embeds_resource; var $embedsResource;
/** /**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request? * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/ */
function HTMLPurifier_AttrDef_URI($embeds_resource = false) { function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
$this->host = new HTMLPurifier_AttrDef_URI_Host();
$this->parser = new HTMLPurifier_URIParser(); $this->parser = new HTMLPurifier_URIParser();
$this->embeds_resource = (bool) $embeds_resource; $this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
} }
function validate($uri, $config, &$context) { function validate($uri, $config, &$context) {
static $PercentEncoder = null;
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
if ($config->get('URI', 'Disable')) return false; if ($config->get('URI', 'Disable')) return false;
// initial operations // initial operations
$uri = $this->parseCDATA($uri); $uri = $this->parseCDATA($uri);
$uri = $PercentEncoder->normalize($uri); $uri = $this->percentEncoder->normalize($uri);
// parse the URI // parse the URI
$parsed_uri = $this->parser->parse($uri); $uri = $this->parser->parse($uri);
if ($parsed_uri === false) return false; if ($uri === false) return false;
list($scheme, $userinfo, $host, $port, $path, $query, $fragment) = $parsed_uri;
// retrieve the scheme object // generic validation
$registry =& HTMLPurifier_URISchemeRegistry::instance(); $context->register('EmbeddedURI', $this->embedsResource); // flag
$default_scheme = $config->get('URI', 'DefaultScheme'); $result = $uri->validate($config, $context);
if ($scheme !== null) { $context->destroy('EmbeddedURI');
// no need to validate the scheme's fmt since we do that when we if (!$result) return false;
// retrieve the specific scheme object from the registry
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
$scheme_obj = $registry->getScheme($scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$scheme_obj = $registry->getScheme($default_scheme, $config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
E_USER_WARNING
);
return false;
}
}
if ($this->embeds_resource && !$scheme_obj->browsable) {
// the URI we're processing embeds_resource a resource in the
// page, but the URI it references cannot be physically retrieved
return false;
}
// validate host // scheme-specific validation
if ($host !== null) { $scheme_obj = $uri->getSchemeObj($config, $context);
// remove URI if it's absolute and we disabled externals or if (!$scheme_obj) return false;
// if it's absolute and embedded and we disabled external resources if ($this->embedsResource && !$scheme_obj->browsable) return false;
unset($our_host); $result = $scheme_obj->validate($uri, $config, $context);
if ( if (!$result) return false;
$config->get('URI', 'DisableExternal') ||
(
$config->get('URI', 'DisableExternalResources') &&
$this->embeds_resource
)
) {
$our_host = $config->get('URI', 'Host');
if ($our_host === null) return false;
}
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
// check host against blacklist
if ($this->checkBlacklist($host, $config, $context)) return false;
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $host));
// could be cached
$our_host_parts = array_reverse(explode('.', $our_host));
foreach ($our_host_parts as $i => $discard) {
if (!isset($host_parts[$i])) return false;
if ($host_parts[$i] != $our_host_parts[$i]) return false;
}
}
}
// validate port // back to string
if ($port !== null) { $result = $uri->toString();
if ($port < 1 || $port > 65535) $port = null;
}
// query and fragment are quite simple in terms of definition:
// *( pchar / "/" / "?" ), so define their validation routines
// when we start fixing percent encoding
// path gets to be validated against a hodge-podge of rules depending
// on the status of authority and scheme, but it's not that important,
// esp. since it won't be applicable to everyone
// okay, now we defer execution to the subobject for more processing
// note that $fragment is omitted
list($userinfo, $host, $port, $path, $query) =
$scheme_obj->validateComponents(
$userinfo, $host, $port, $path, $query, $config, $context
);
// reconstruct authority
$authority = null;
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
$authority = '';
if($userinfo !== null) $authority .= $userinfo . '@';
$authority .= $host;
if($port !== null) $authority .= ':' . $port;
} else {
if ($default_scheme == $scheme) $scheme = null; // munge scheme off when unnecessary
}
// reconstruct the result
$result = '';
if ($scheme !== null) $result .= "$scheme:";
if ($authority !== null) $result .= "//$authority";
$result .= $path;
if ($query !== null) $result .= "?$query";
if ($fragment !== null) $result .= "#$fragment";
// munge if necessary // munge if necessary
$munge = $config->get('URI', 'Munge'); if (
if (!empty($scheme_obj->browsable) && $munge !== null) { !is_null($uri->host) && // indicator for authority
if ($authority !== null) { !empty($scheme_obj->browsable) &&
$result = str_replace('%s', rawurlencode($result), $munge); !is_null($munge = $config->get('URI', 'Munge'))
} ) {
$result = str_replace('%s', rawurlencode($result), $munge);
} }
return $result; return $result;
} }
/**
* Checks a host against an array blacklist
* @param $host Host to check
* @param $config HTMLPurifier_Config instance
* @param $context HTMLPurifier_Context instance
* @return bool Is spam?
*/
function checkBlacklist($host, &$config, &$context) {
$blacklist = $config->get('URI', 'HostBlacklist');
if (!empty($blacklist)) {
foreach($blacklist as $blacklisted_host_fragment) {
if (strpos($host, $blacklisted_host_fragment) !== false) {
return true;
}
}
}
return false;
}
} }

View File

@ -0,0 +1,153 @@
<?php
require_once 'HTMLPurifier/URIParser.php';
/**
* HTML Purifier's internal representation of a URI
*/
class HTMLPurifier_URI
{
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
/**
* @note Automatically normalizes scheme and port
*/
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
$this->userinfo = $userinfo;
$this->host = $host;
$this->port = is_null($port) ? $port : (int) $port;
$this->path = $path;
$this->query = $query;
$this->fragment = $fragment;
}
function getSchemeObj($config, &$context) {
$registry =& HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$scheme_obj = $registry->getScheme($config->get('URI', 'DefaultScheme'), $config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
E_USER_WARNING
);
return false;
}
}
return $scheme_obj;
}
/**
* Generic validation method applicable for all schemes
*/
function validate($config, &$context) {
// validate host
if (!is_null($this->host)) {
// remove URI if it's absolute and we disabled externals or
// if it's absolute and embedded and we disabled external resources
unset($our_host); // ensure this variable is not set
if (
$config->get('URI', 'DisableExternal') ||
(
$config->get('URI', 'DisableExternalResources') &&
$context->get('EmbeddedURI', true) // suppress errors
)
) {
$our_host = $config->get('URI', 'Host');
if ($our_host === null) return false;
}
$host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context);
if ($this->host === false) $this->host = null;
// check host against blacklist
if ($this->checkBlacklist($this->host, $config, $context)) return false;
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $this->host));
// could be cached
$our_host_parts = array_reverse(explode('.', $our_host));
foreach ($our_host_parts as $i => $discard) {
if (!isset($host_parts[$i])) return false;
if ($host_parts[$i] != $our_host_parts[$i]) return false;
}
}
}
// munge scheme off if necessary
if (!is_null($this->scheme) && is_null($this->host)) {
if ($config->get('URI', 'DefaultScheme') == $this->scheme) {
$this->scheme = null;
}
}
// validate port
if (!is_null($this->port)) {
if ($this->port < 1 || $this->port > 65535) $this->port = null;
}
// query and fragment are quite simple in terms of definition:
// *( pchar / "/" / "?" ), so define their validation routines
// when we start fixing percent encoding
// path gets to be validated against a hodge-podge of rules depending
// on the status of authority and scheme, but it's not that important,
// esp. since it won't be applicable to everyone
return true;
}
/**
* Checks a host against an array blacklist
* @param $host Host to check
* @param $config HTMLPurifier_Config instance
* @param $context HTMLPurifier_Context instance
* @return bool Is spam?
*/
function checkBlacklist($host, $config, &$context) {
$blacklist = $config->get('URI', 'HostBlacklist');
if (!empty($blacklist)) {
foreach($blacklist as $blacklisted_host_fragment) {
if (strpos($host, $blacklisted_host_fragment) !== false) {
return true;
}
}
}
return false;
}
/**
* Convert URI back to string
*/
function toString() {
// reconstruct authority
$authority = null;
if (!is_null($this->host)) {
$authority = '';
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
$authority .= $this->host;
if(!is_null($this->port)) $authority .= ':' . $this->port;
}
// reconstruct the result
$result = '';
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
if (!is_null($authority)) $result .= '//' . $authority;
$result .= $this->path;
if (!is_null($this->query)) $result .= '?' . $this->query;
if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
return $result;
}
}

View File

@ -1,8 +1,11 @@
<?php <?php
require_once 'HTMLPurifier/URI.php';
/** /**
* Parses a URI into the components and fragment identifier as specified * Parses a URI into the components and fragment identifier as specified
* by RFC 2396. * by RFC 2396.
* @todo Replace regexps with a native PHP parser
*/ */
class HTMLPurifier_URIParser class HTMLPurifier_URIParser
{ {
@ -10,7 +13,7 @@ class HTMLPurifier_URIParser
/** /**
* Parses a URI * Parses a URI
* @param $uri string URI to parse * @param $uri string URI to parse
* @return array(userinfo, host, int port, path, query, fragment) components * @return HTMLPurifier_URI representation of URI
*/ */
function parse($uri) { function parse($uri) {
$r_URI = '!'. $r_URI = '!'.
@ -51,7 +54,8 @@ class HTMLPurifier_URIParser
$port = $host = $userinfo = null; $port = $host = $userinfo = null;
} }
return array($scheme, $userinfo, $host, $port, $path, $query, $fragment); return new HTMLPurifier_URI(
$scheme, $userinfo, $host, $port, $path, $query, $fragment);
} }
} }

View File

@ -23,20 +23,14 @@ class HTMLPurifier_URIScheme
* Validates the components of a URI * Validates the components of a URI
* @note This implementation should be called by children if they define * @note This implementation should be called by children if they define
* a default port, as it does port processing. * a default port, as it does port processing.
* @note Fragment is omitted as that is scheme independent * @param $uri Instance of HTMLPurifier_URI
* @param $userinfo User info found before at sign in authority
* @param $host Hostname in authority
* @param $port Port found after colon in authority
* @param $path Path of URI
* @param $query Query of URI, found after question mark
* @param $config HTMLPurifier_Config object * @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object * @param $context HTMLPurifier_Context object
* @return Bool success or failure
*/ */
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context if ($this->default_port == $uri->port) $uri->port = null;
) { return true;
if ($this->default_port == $port) $port = null;
return array($userinfo, $host, $port, $path, $query);
} }
} }

View File

@ -10,34 +10,33 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
var $default_port = 21; var $default_port = 21;
var $browsable = true; // usually var $browsable = true; // usually
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context parent::validate($uri, $config, $context);
) { $uri->query = null;
list($userinfo, $host, $port, $path, $query) =
parent::validateComponents( // typecode check
$userinfo, $host, $port, $path, $query, $config, $context ); $semicolon_pos = strrpos($uri->path, ';'); // reverse
$semicolon_pos = strrpos($path, ';'); // reverse
if ($semicolon_pos !== false) { if ($semicolon_pos !== false) {
// typecode check $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
$type = substr($path, $semicolon_pos + 1); // no semicolon $uri->path = substr($uri->path, 0, $semicolon_pos);
$path = substr($path, 0, $semicolon_pos);
$type_ret = ''; $type_ret = '';
if (strpos($type, '=') !== false) { if (strpos($type, '=') !== false) {
// figure out whether or not the declaration is correct // figure out whether or not the declaration is correct
list($key, $typecode) = explode('=', $type, 2); list($key, $typecode) = explode('=', $type, 2);
if ($key !== 'type') { if ($key !== 'type') {
// invalid key, tack it back on encoded // invalid key, tack it back on encoded
$path .= '%3B' . $type; $uri->path .= '%3B' . $type;
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
$type_ret = ";type=$typecode"; $type_ret = ";type=$typecode";
} }
} else { } else {
$path .= '%3B' . $type; $uri->path .= '%3B' . $type;
} }
$path = str_replace(';', '%3B', $path); $uri->path = str_replace(';', '%3B', $uri->path);
$path .= $type_ret; $uri->path .= $type_ret;
} }
return array($userinfo, $host, $port, $path, null);
return true;
} }
} }

View File

@ -10,13 +10,10 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
var $default_port = 80; var $default_port = 80;
var $browsable = true; var $browsable = true;
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context parent::validate($uri, $config, $context);
) { $uri->userinfo = null;
list($userinfo, $host, $port, $path, $query) = return true;
parent::validateComponents(
$userinfo, $host, $port, $path, $query, $config, $context );
return array(null, $host, $port, $path, $query);
} }
} }

View File

@ -15,14 +15,13 @@ class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
var $browsable = false; var $browsable = false;
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context parent::validate($uri, $config, $context);
) { $uri->userinfo = null;
list($userinfo, $host, $port, $path, $query) = $uri->host = null;
parent::validateComponents( $uri->port = null;
$userinfo, $host, $port, $path, $query, $config, $context );
// we need to validate path against RFC 2368's addr-spec // we need to validate path against RFC 2368's addr-spec
return array(null, null, null, $path, $query); return true;
} }
} }

View File

@ -9,14 +9,14 @@ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
var $browsable = false; var $browsable = false;
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context parent::validate($uri, $config, $context);
) { $uri->userinfo = null;
list($userinfo, $host, $port, $path, $query) = $uri->host = null;
parent::validateComponents( $uri->port = null;
$userinfo, $host, $port, $path, $query, $config, $context ); $uri->query = null;
// typecode check needed on path // typecode check needed on path
return array(null, null, null, $path, null); return true;
} }
} }

View File

@ -10,13 +10,11 @@ class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
var $default_port = 119; var $default_port = 119;
var $browsable = false; var $browsable = false;
function validateComponents( function validate(&$uri, $config, &$context) {
$userinfo, $host, $port, $path, $query, $config, &$context parent::validate($uri, $config, $context);
) { $uri->userinfo = null;
list($userinfo, $host, $port, $path, $query) = $uri->query = null;
parent::validateComponents( return true;
$userinfo, $host, $port, $path, $query, $config, $context );
return array(null, $host, $port, $path, null);
} }
} }

View File

@ -3,162 +3,17 @@
require_once 'HTMLPurifier/AttrDefHarness.php'; require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/URI.php'; require_once 'HTMLPurifier/AttrDef/URI.php';
// WARNING: INCOMPLETE UNIT TESTS! /**
// we also need to test all the configuration directives defined by this class * @todo Aim for complete code coverage with mocks
*/
// http: is returned quite often when a URL is invalid. We have to change
// this behavior to just a plain old "FALSE"!
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
{ {
var $scheme, $components, $return_components;
var $oldRegistry;
function setUp() { function setUp() {
// setup ensures that any twiddling around with the registry is reverted $this->def = new HTMLPurifier_AttrDef_URI();
$this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
$this->def = new HTMLPurifier_AttrDef_URI(); // default
parent::setUp(); parent::setUp();
} }
function tearDown() {
HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry);
}
function &generateSchemeMock($scheme_names = array('http', 'mailto')) {
generate_mock_once('HTMLPurifier_URIScheme');
generate_mock_once('HTMLPurifier_URISchemeRegistry');
// load a scheme registry mock to the singleton
$registry =& HTMLPurifier_URISchemeRegistry::instance(
new HTMLPurifier_URISchemeRegistryMock()
);
// add a pseudo-scheme to the registry for $scheme_names
$scheme = new HTMLPurifier_URISchemeMock();
foreach ($scheme_names as $name) {
$registry->setReturnReference('getScheme', $scheme, array($name, '*', '*'));
}
// registry returns false if an invalid scheme is requested
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
return $scheme;
}
// PARSING RELATED TESTS
function assertParsing($uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
$this->prepareCommon($config, $context);
$scheme =& $this->generateSchemeMock();
// create components parameter list
// Config and Context are wildcards due to PHP4 reference funkiness
$components = array($userinfo, $host, $port, $path, $query, '*', '*');
$scheme->expectOnce('validateComponents', $components);
$def = new HTMLPurifier_AttrDef_URI();
$def->validate($uri, $config, $context);
$scheme->tally();
}
function testParsingImproperPercentEncoding() {
// even though we don't resolve percent entities, we have to fix
// improper percent-encodes. Taken one at a time:
// %56 - V, which is an unreserved character
// %fc - u with an umlaut, normalize to uppercase
// %GJ - invalid characters in entity, encode %
// %5 - prematurely terminated, encode %
// %FC - u with umlaut, correct
// note that Apache doesn't do such fixing, rather, it just claims
// that the browser sent a "Bad Request". See PercentEncoder.php
// for more details
$this->assertParsing(
'http://www.example.com/%56%fc%GJ%5%FC',
null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null
);
}
function testParsingInvalidHostThatLooksLikeIPv6Address() {
$this->assertParsing(
'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]',
null, null, null, '', null
);
}
function testParsingOverLargePort() {
$this->assertParsing(
'http://example.com:65536',
null, 'example.com', null, '', null
);
}
// OUTPUT RELATED TESTS
// scheme is mocked to ensure only the URI is being tested
function assertOutput($input_uri, $expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
// prepare mock machinery
$this->prepareCommon($config, $context);
$scheme =& $this->generateSchemeMock();
$components = array($userinfo, $host, $port, $path, $query);
$scheme->setReturnValue('validateComponents', $components);
$def = new HTMLPurifier_AttrDef_URI();
$result_uri = $def->validate($input_uri, $config, $context);
if ($expect_uri === true) $expect_uri = $input_uri;
$this->assertEqual($result_uri, $expect_uri);
}
function testOutputRegular() {
$this->assertOutput(
'http://user@authority.part:8080/now/the/path?query#frag', true,
'user', 'authority.part', 8080, '/now/the/path', 'query'
);
}
function testOutputEmpty() {
$this->assertOutput(
'', true,
null, null, null, '', null
);
}
function testOutputNullPath() {
$this->assertOutput(
'', true,
null, null, null, null, null // usually shouldn't happen
);
}
function testOutputPathAbsolute() {
$this->assertOutput(
'http:/this/is/path', '/this/is/path',
null, null, null, '/this/is/path', null
);
}
function testOutputPathRootless() {
$this->assertOutput(
'http:this/is/path', 'this/is/path',
null, null, null, 'this/is/path', null
);
}
function testOutputPathEmpty() {
$this->assertOutput(
'http:', '',
null, null, null, '', null
);
}
// INTEGRATION TESTS
function testIntegration() { function testIntegration() {
$this->assertDef('http://www.google.com/'); $this->assertDef('http://www.google.com/');
$this->assertDef('http:', ''); $this->assertDef('http:', '');
@ -170,84 +25,27 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$this->assertDef('mailto:bob@example.com'); $this->assertDef('mailto:bob@example.com');
} }
function testConfigDisableExternal() { function testIntegrationWithPercentEncoder() {
$this->assertDef(
$this->def = new HTMLPurifier_AttrDef_URI(); 'http://www.example.com/%56%fc%GJ%5%FC',
'http://www.example.com/V%FC%25GJ%255%FC'
$this->config->set('URI', 'DisableExternal', true); );
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertDef('/foobar.txt');
$this->assertDef('http://google.com/', false);
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('http://example.com/teehee', false);
$this->assertDef('http://www.example.com/#man', false);
$this->assertDef('http://go.sub.example.com/perhaps?p=foo');
} }
function testEmbeds() { function testEmbeds() {
// embedded URI
$this->def = new HTMLPurifier_AttrDef_URI(true); $this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd'); $this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('mailto:foo@example.com', false); $this->assertDef('mailto:foo@example.com', false);
}
function testConfigDisableExternalResources() {
$this->config->set('URI', 'DisableExternalResources', true);
$this->def = new HTMLPurifier_AttrDef_URI();
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('/img.png');
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
$this->assertDef('/img.png');
} }
function testConfigMunge() { function testConfigMunge() {
$this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s'); $this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
$this->assertDef( $this->assertDef(
'http://www.example.com/', 'http://www.example.com/',
'http://www.google.com/url?q=http%3A%2F%2Fwww.example.com%2F' 'http://www.google.com/url?q=http%3A%2F%2Fwww.example.com%2F'
); );
$this->assertDef('index.html'); $this->assertDef('index.html');
$this->assertDef('javascript:foobar();', false); $this->assertDef('javascript:foobar();', false);
}
function testBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertDef('foo.txt');
$this->assertDef('http://www.google.com/example.com/moo');
$this->assertDef('http://example.com/#23', false);
$this->assertDef('https://sub.domain.example.com/foobar', false);
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
}
function testWhitelist() {
/* unimplemented
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertDef('http://example.com/fo/google.com', false);
$this->assertDef('server.txt');
$this->assertDef('ftp://www.google.com/?t=a');
$this->assertDef('http://google.com.tricky.spamsite.net', false);
*/
} }
} }

View File

@ -10,10 +10,20 @@ class HTMLPurifier_Harness extends UnitTestCase
parent::UnitTestCase(); parent::UnitTestCase();
} }
var $config, $context;
function setUp() {
list($this->config, $this->context) = $this->createCommon();
}
function prepareCommon(&$config, &$context) { function prepareCommon(&$config, &$context) {
$config = HTMLPurifier_Config::create($config); $config = HTMLPurifier_Config::create($config);
if (!$context) $context = new HTMLPurifier_Context(); if (!$context) $context = new HTMLPurifier_Context();
} }
function createCommon() {
return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
}
} }

View File

@ -1,6 +1,7 @@
<?php <?php
require_once 'HTMLPurifier/URIParser.php'; require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URI.php';
class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
{ {
@ -11,7 +12,8 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
$this->prepareCommon($config, $context); $this->prepareCommon($config, $context);
$parser = new HTMLPurifier_URIParser(); $parser = new HTMLPurifier_URIParser();
$result = $parser->parse($uri, $config, $context); $result = $parser->parse($uri, $config, $context);
$this->assertEqual($result, array($scheme, $userinfo, $host, $port, $path, $query, $fragment)); $expect = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
$this->assertEqual($result, $expect);
} }
function testRegular() { function testRegular() {

View File

@ -1,6 +1,10 @@
<?php <?php
require_once 'HTMLPurifier/URI.php';
require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIScheme.php'; require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/URIScheme/http.php'; require_once 'HTMLPurifier/URIScheme/http.php';
require_once 'HTMLPurifier/URIScheme/ftp.php'; require_once 'HTMLPurifier/URIScheme/ftp.php';
@ -15,142 +19,140 @@ require_once 'HTMLPurifier/URIScheme/nntp.php';
class HTMLPurifier_URISchemeTest extends HTMLPurifier_Harness class HTMLPurifier_URISchemeTest extends HTMLPurifier_Harness
{ {
function test_http() { function assertValidation($uri, $expect_uri = true) {
$scheme = new HTMLPurifier_URIScheme_http(); $parser = new HTMLPurifier_URIParser();
$config = HTMLPurifier_Config::createDefault(); if ($expect_uri === true) $uri = $expect_uri;
$context = new HTMLPurifier_Context(); $uri = $parser->parse($uri);
if ($expect_uri !== false) {
$this->assertIdentical( $expect_uri = $parser->parse($expect_uri);
$scheme->validateComponents( }
null, 'www.example.com', null, '/', 's=foobar', $config, $context), // convenience hack: the scheme should be explicitly specified
array(null, 'www.example.com', null, '/', 's=foobar') $scheme = $uri->getSchemeObj($this->config, $this->context);
); $result = $scheme->validate($uri, $this->config, $this->context);
if ($expect_uri !== false) {
// absorb default port and userinfo $this->assertTrue($result);
$this->assertIdentical( $this->assertIdentical($uri, $expect_uri);
$scheme->validateComponents( } else {
'user', 'www.example.com', 80, '/', 's=foobar', $config, $context), $this->assertFalse($result);
array(null, 'www.example.com', null, '/', 's=foobar') }
);
// do not absorb non-default port
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', 8080, '/', 's=foobar', $config, $context),
array(null, 'www.example.com', 8080, '/', 's=foobar')
);
// https is basically the same
$scheme = new HTMLPurifier_URIScheme_https();
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.example.com', 443, '/', 's=foobar', $config, $context),
array(null, 'www.example.com', null, '/', 's=foobar')
);
} }
function test_ftp() { function test_http_regular() {
$this->assertValidation(
$scheme = new HTMLPurifier_URIScheme_ftp(); 'http://example.com/?s=q#fragment'
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.example.com', 21, '/', 's=foobar', $config, $context),
array('user', 'www.example.com', null, '/', null)
);
// valid typecode
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', null, '/file.txt;type=a', null, $config, $context),
array(null, 'www.example.com', null, '/file.txt;type=a', null)
);
// remove invalid typecode
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', null, '/file.txt;type=z', null, $config, $context),
array(null, 'www.example.com', null, '/file.txt', null)
);
// encode errant semicolons
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', null, '/too;many;semicolons=1', null, $config, $context),
array(null, 'www.example.com', null, '/too%3Bmany%3Bsemicolons=1', null)
);
}
function test_news() {
$scheme = new HTMLPurifier_URIScheme_news();
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, 'gmane.science.linguistics', null, $config, $context),
array(null, null, null, 'gmane.science.linguistics', null)
);
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, '642@eagle.ATT.COM', null, $config, $context),
array(null, null, null, '642@eagle.ATT.COM', null)
);
// test invalid field removal
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.google.com', 80, 'rec.music', 'path=foo', $config, $context),
array(null, null, null, 'rec.music', null)
);
}
function test_nntp() {
$scheme = new HTMLPurifier_URIScheme_nntp();
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$this->assertIdentical(
$scheme->validateComponents(
null, 'news.example.com', null, '/alt.misc/12345', null, $config, $context),
array(null, 'news.example.com', null, '/alt.misc/12345', null)
);
$this->assertIdentical(
$scheme->validateComponents(
'user', 'news.example.com', 119, '/alt.misc/12345', 'foo=asdf', $config, $context),
array(null, 'news.example.com', null, '/alt.misc/12345', null)
); );
} }
function test_mailto() { function test_http_removeDefaultPort() {
$this->assertValidation(
$scheme = new HTMLPurifier_URIScheme_mailto(); 'http://example.com:80',
$config = HTMLPurifier_Config::createDefault(); 'http://example.com'
$context = new HTMLPurifier_Context();
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, 'bob@example.com', null, $config, $context),
array(null, null, null, 'bob@example.com', null)
); );
}
$this->assertIdentical(
$scheme->validateComponents( function test_http_removeUserInfo() {
'user', 'example.com', 80, 'bob@example.com', 'subject=Foo!', $config, $context), $this->assertValidation(
array(null, null, null, 'bob@example.com', 'subject=Foo!') 'http://bob@example.com',
'http://example.com'
);
}
function test_http_preserveNonDefaultPort() {
$this->assertValidation(
'http://example.com:8080'
);
}
function test_https_regular() {
$this->assertValidation(
'https://user@example.com:443/?s=q#frag',
'https://example.com/?s=q#frag'
);
}
function test_ftp_regular() {
$this->assertValidation(
'ftp://user@example.com/path'
);
}
function test_ftp_removeDefaultPort() {
$this->assertValidation(
'ftp://example.com:21',
'ftp://example.com'
);
}
function test_ftp_removeQueryString() {
$this->assertValidation(
'ftp://example.com?s=q',
'ftp://example.com'
);
}
function test_ftp_preserveValidTypecode() {
$this->assertValidation(
'ftp://example.com/file.txt;type=a'
);
}
function test_ftp_removeInvalidTypecode() {
$this->assertValidation(
'ftp://example.com/file.txt;type=z',
'ftp://example.com/file.txt'
);
}
function test_ftp_encodeExtraSemicolons() {
$this->assertValidation(
'ftp://example.com/too;many;semicolons=1',
'ftp://example.com/too%3Bmany%3Bsemicolons=1'
);
}
function test_news_regular() {
$this->assertValidation(
'news:gmane.science.linguistics'
);
}
function test_news_explicit() {
$this->assertValidation(
'news:642@eagle.ATT.COM'
);
}
function test_news_removeNonPathComponents() {
$this->assertValidation(
'news://user@example.com:80/rec.music?path=foo#frag',
'news:/rec.music#frag'
);
}
function test_nntp_regular() {
$this->assertValidation(
'nntp://news.example.com/alt.misc/42#frag'
);
}
function test_nntp_removalOfRedundantOrUselessComponents() {
$this->assertValidation(
'nntp://user@news.example.com:119/alt.misc/42?s=q#frag',
'nntp://news.example.com/alt.misc/42#frag'
);
}
function test_mailto_regular() {
$this->assertValidation(
'mailto:bob@example.com'
);
}
function test_mailto_removalOfRedundantOrUselessComponents() {
$this->assertValidation(
'mailto://user@example.com:80/bob@example.com?subject=Foo#frag',
'mailto:/bob@example.com?subject=Foo#frag'
); );
} }
} }

View File

@ -0,0 +1,240 @@
<?php
require_once 'HTMLPurifier/URI.php';
require_once 'HTMLPurifier/URIParser.php';
class HTMLPurifier_URITest extends HTMLPurifier_Harness
{
function createURI($uri) {
$parser = new HTMLPurifier_URIParser();
return $parser->parse($uri);
}
function test_construct() {
$uri1 = new HTMLPurifier_URI('HTTP', 'bob', 'example.com', '23', '/foo', 'bar=2', 'slash');
$uri2 = new HTMLPurifier_URI('http', 'bob', 'example.com', 23, '/foo', 'bar=2', 'slash');
$this->assertIdentical($uri1, $uri2);
}
var $oldRegistry;
function &setUpSchemeRegistryMock() {
$this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
generate_mock_once('HTMLPurifier_URIScheme');
generate_mock_once('HTMLPurifier_URISchemeRegistry');
$registry =& HTMLPurifier_URISchemeRegistry::instance(
new HTMLPurifier_URISchemeRegistryMock()
);
return $registry;
}
function &setUpSchemeMock($name) {
$registry =& $this->setUpSchemeRegistryMock();
$scheme_mock = new HTMLPurifier_URISchemeMock();
$registry->setReturnValue('getScheme', $scheme_mock, array($name, '*', '*'));
return $scheme_mock;
}
function setUpNoValidSchemes() {
$registry =& $this->setUpSchemeRegistryMock();
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
}
function tearDownSchemeRegistryMock() {
HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry);
}
function test_getSchemeObj() {
$scheme_mock =& $this->setUpSchemeMock('http');
$uri = $this->createURI('http:');
$scheme_obj = $uri->getSchemeObj($this->config, $this->context);
$this->assertIdentical($scheme_obj, $scheme_mock);
$this->tearDownSchemeRegistryMock();
}
function test_getSchemeObj_invalidScheme() {
$this->setUpNoValidSchemes();
$uri = $this->createURI('http:');
$result = $uri->getSchemeObj($this->config, $this->context);
$this->assertIdentical($result, false);
$this->tearDownSchemeRegistryMock();
}
function test_getSchemaObj_defaultScheme() {
$scheme = 'foobar';
$scheme_mock =& $this->setUpSchemeMock($scheme);
$this->config->set('URI', 'DefaultScheme', $scheme);
$uri = $this->createURI('hmm');
$scheme_obj = $uri->getSchemeObj($this->config, $this->context);
$this->assertIdentical($scheme_obj, $scheme_mock);
$this->tearDownSchemeRegistryMock();
}
function test_getSchemaObj_invalidDefaultScheme() {
$this->setUpNoValidSchemes();
$this->config->set('URI', 'DefaultScheme', 'foobar');
$uri = $this->createURI('hmm');
$this->expectError('Default scheme object "foobar" was not readable');
$result = $uri->getSchemeObj($this->config, $this->context);
$this->assertIdentical($result, false);
$this->tearDownSchemeRegistryMock();
}
function assertToString($expect_uri, $scheme, $userinfo, $host, $port, $path, $query, $fragment) {
$uri = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
$string = $uri->toString();
$this->assertIdentical($string, $expect_uri);
}
function test_toString_full() {
$this->assertToString(
'http://bob@example.com:300/foo?bar=baz#fragment',
'http', 'bob', 'example.com', 300, '/foo', 'bar=baz', 'fragment'
);
}
function test_toString_scheme() {
$this->assertToString(
'http:',
'http', null, null, null, '', null, null
);
}
function test_toString_authority() {
$this->assertToString(
'//bob@example.com:8080',
null, 'bob', 'example.com', 8080, '', null, null
);
}
function test_toString_path() {
$this->assertToString(
'/path/to',
null, null, null, null, '/path/to', null, null
);
}
function test_toString_query() {
$this->assertToString(
'?q=string',
null, null, null, null, '', 'q=string', null
);
}
function test_toString_fragment() {
$this->assertToString(
'#fragment',
null, null, null, null, '', null, 'fragment'
);
}
function assertValidation($uri, $expect_uri = true) {
if ($expect_uri === true) $expect_uri = $uri;
$uri = $this->createURI($uri);
$result = $uri->validate($this->config, $this->context);
if ($expect_uri === false) {
$this->assertFalse($result);
} else {
$this->assertTrue($result);
$this->assertIdentical($uri->toString(), $expect_uri);
}
}
function test_validate_defaultSchemeRemovedInBlank() {
$this->assertValidation('http:', '');
}
function test_validate_defaultSchemeRemovedInRelativeURI() {
$this->assertValidation('http:/foo/bar', '/foo/bar');
}
function test_validate_defaultSchemeNotRemovedInAbsoluteURI() {
$this->assertValidation('http://example.com/foo/bar');
}
function test_validate_altSchemeNotRemoved() {
$this->assertValidation('mailto:this-looks-like-a-path@example.com');
}
function test_validate_overlongPort() {
$this->assertValidation('http://example.com:65536', 'http://example.com');
}
function test_validate_zeroPort() {
$this->assertValidation('http://example.com:00', 'http://example.com');
}
function test_validate_invalidHostThatLooksLikeIPv6() {
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', '');
}
function test_validate_configDisableExternal() {
$this->def = new HTMLPurifier_AttrDef_URI();
$this->config->set('URI', 'DisableExternal', true);
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertValidation('/foobar.txt');
$this->assertValidation('http://google.com/', false);
$this->assertValidation('http://sub.example.com/alas?foo=asd');
$this->assertValidation('http://example.com/teehee', false);
$this->assertValidation('http://www.example.com/#man', false);
$this->assertValidation('http://go.sub.example.com/perhaps?p=foo');
}
function test_validate_configDisableExternalResources() {
$this->config->set('URI', 'DisableExternalResources', true);
$this->assertValidation('http://sub.example.com/alas?foo=asd');
$this->assertValidation('/img.png');
$embeds = true; // passed by reference
$this->context->register('EmbeddedURI', $embeds);
$this->assertValidation('http://sub.example.com/alas?foo=asd', false);
$this->assertValidation('/img.png');
}
function test_validate_configBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertValidation('foo.txt');
$this->assertValidation('http://www.google.com/example.com/moo');
$this->assertValidation('http://example.com/#23', false);
$this->assertValidation('https://sub.domain.example.com/foobar', false);
$this->assertValidation('http://example.com.example.net/?whoo=foo', false);
$this->assertValidation('ftp://moo-moo.net/foo/foo/', false);
}
/*
function test_validate_configWhitelist() {
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertValidation('http://example.com/fo/google.com', false);
$this->assertValidation('server.txt');
$this->assertValidation('ftp://www.google.com/?t=a');
$this->assertValidation('http://google.com.tricky.spamsite.net', false);
}
*/
}

View File

@ -105,6 +105,7 @@ $test_files[] = 'HTMLPurifier/TokenTest.php';
$test_files[] = 'HTMLPurifier/URIParserTest.php'; $test_files[] = 'HTMLPurifier/URIParserTest.php';
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php'; $test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
$test_files[] = 'HTMLPurifier/URISchemeTest.php'; $test_files[] = 'HTMLPurifier/URISchemeTest.php';
$test_files[] = 'HTMLPurifier/URITest.php';
$test_files[] = 'HTMLPurifierTest.php'; $test_files[] = 'HTMLPurifierTest.php';
if (version_compare(PHP_VERSION, '5', '>=')) { if (version_compare(PHP_VERSION, '5', '>=')) {