mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-08 23:11:52 +00:00
[2.1.0] Refactor AttrDef_URI: removed URIParser functionality
- Genericized flush-definition-cache script git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1333 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
2a002857ce
commit
8c9dbe142d
4
NEWS
4
NEWS
@ -10,6 +10,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
==========================
|
==========================
|
||||||
|
|
||||||
2.1.0, unknown release date
|
2.1.0, unknown release date
|
||||||
|
# flush-htmldefinition-cache.php superseded in favor of a generic
|
||||||
|
flush-definition-cache.php script
|
||||||
! Phorum mod implemented for HTML Purifier
|
! Phorum mod implemented for HTML Purifier
|
||||||
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
||||||
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
||||||
@ -43,7 +45,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
already exists. May clobber autoload, so I need to keep an eye on it
|
already exists. May clobber autoload, so I need to keep an eye on it
|
||||||
. ConfigSchema heavily optimized, will only collect information and validate
|
. ConfigSchema heavily optimized, will only collect information and validate
|
||||||
definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
|
definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
|
||||||
. AttrDef_URI unit tests refactored
|
. AttrDef_URI unit tests and implementation refactored
|
||||||
. benchmarks/ directory now protected from public view with .htaccess file;
|
. benchmarks/ directory now protected from public view with .htaccess file;
|
||||||
run the tests via command line
|
run the tests via command line
|
||||||
. URI scheme is munged off if there is no authority and the scheme is the
|
. URI scheme is munged off if there is no authority and the scheme is the
|
||||||
|
@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
|||||||
{
|
{
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
parent::HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($uri_string, $config, &$context) {
|
function validate($uri_string, $config, &$context) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
require_once 'HTMLPurifier/URIScheme.php';
|
require_once 'HTMLPurifier/URIScheme.php';
|
||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||||
@ -92,7 +93,7 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
|||||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $host;
|
var $host, $parser;
|
||||||
var $embeds_resource;
|
var $embeds_resource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -100,6 +101,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||||
|
$this->parser = new HTMLPurifier_URIParser();
|
||||||
$this->embeds_resource = (bool) $embeds_resource;
|
$this->embeds_resource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,43 +110,18 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
static $PercentEncoder = null;
|
static $PercentEncoder = null;
|
||||||
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
|
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
|
||||||
// We'll write stack-based parsers later, for now, use regexps to
|
|
||||||
// get things working as fast as possible (irony)
|
|
||||||
|
|
||||||
if ($config->get('URI', 'Disable')) return false;
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// parse as CDATA
|
// initial operations
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
|
|
||||||
// fix up percent-encoding
|
|
||||||
$uri = $PercentEncoder->normalize($uri);
|
$uri = $PercentEncoder->normalize($uri);
|
||||||
|
|
||||||
// while it would be nice to use parse_url(), that's specifically
|
// parse the URI
|
||||||
// for HTTP and thus won't work for our generic URI parsing
|
$parsed_uri = $this->parser->parse($uri);
|
||||||
|
if ($parsed_uri === false) return false;
|
||||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
list($scheme, $userinfo, $host, $port, $path, $query, $fragment) = $parsed_uri;
|
||||||
$r_URI = '!'.
|
|
||||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
|
||||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
|
||||||
'([^?#<>\'"]*)'. // 5. Path
|
|
||||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
|
||||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
|
||||||
'!';
|
|
||||||
|
|
||||||
$matches = array();
|
|
||||||
$result = preg_match($r_URI, $uri, $matches);
|
|
||||||
|
|
||||||
if (!$result) return false; // *really* invalid URI
|
|
||||||
|
|
||||||
// seperate out parts
|
|
||||||
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
|
||||||
$authority = !empty($matches[3]) ? $matches[4] : null;
|
|
||||||
$path = $matches[5]; // always present, can be empty
|
|
||||||
$query = !empty($matches[6]) ? $matches[7] : null;
|
|
||||||
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// retrieve the scheme object
|
||||||
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
||||||
$default_scheme = $config->get('URI', 'DefaultScheme');
|
$default_scheme = $config->get('URI', 'DefaultScheme');
|
||||||
if ($scheme !== null) {
|
if ($scheme !== null) {
|
||||||
@ -154,31 +131,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
||||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||||
} else {
|
} else {
|
||||||
$scheme_obj = $registry->getScheme(
|
// no scheme: retrieve the default one
|
||||||
$default_scheme, $config, $context
|
$scheme_obj = $registry->getScheme($default_scheme, $config, $context);
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// something funky weird happened in the registry, abort!
|
|
||||||
if (!$scheme_obj) {
|
if (!$scheme_obj) {
|
||||||
|
// something funky happened to the default scheme object
|
||||||
trigger_error(
|
trigger_error(
|
||||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||||
E_USER_WARNING
|
E_USER_WARNING
|
||||||
);
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
|
||||||
// it references cannot be located
|
|
||||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||||
|
// the URI we're processing embeds_resource a resource in the
|
||||||
|
// page, but the URI it references cannot be physically retrieved
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validate host
|
||||||
if ($authority !== null) {
|
if ($host !== null) {
|
||||||
|
|
||||||
// ridiculously inefficient
|
|
||||||
|
|
||||||
// remove URI if it's absolute and we disabled externals or
|
// remove URI if it's absolute and we disabled externals or
|
||||||
// if it's absolute and embedded and we disabled external resources
|
// if it's absolute and embedded and we disabled external resources
|
||||||
unset($our_host);
|
unset($our_host);
|
||||||
@ -192,29 +163,10 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$our_host = $config->get('URI', 'Host');
|
$our_host = $config->get('URI', 'Host');
|
||||||
if ($our_host === null) return false;
|
if ($our_host === null) return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$HEXDIG = '[A-Fa-f0-9]';
|
|
||||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
|
||||||
$sub_delims = '!$&\'()'; // needs []
|
|
||||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
|
||||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
|
||||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
||||||
$matches = array();
|
|
||||||
preg_match($r_authority, $authority, $matches);
|
|
||||||
// overloads regexp!
|
|
||||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
|
||||||
$host = !empty($matches[3]) ? $matches[3] : null;
|
|
||||||
$port = !empty($matches[4]) ? $matches[5] : null;
|
|
||||||
|
|
||||||
// validate port
|
|
||||||
if ($port !== null) {
|
|
||||||
$port = (int) $port;
|
|
||||||
if ($port < 1 || $port > 65535) $port = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$host = $this->host->validate($host, $config, $context);
|
$host = $this->host->validate($host, $config, $context);
|
||||||
if ($host === false) $host = null;
|
if ($host === false) $host = null;
|
||||||
|
|
||||||
|
// check host against blacklist
|
||||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||||
|
|
||||||
// more lenient absolute checking
|
// more lenient absolute checking
|
||||||
@ -227,11 +179,11 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// userinfo and host are validated within the regexp
|
// validate port
|
||||||
|
if ($port !== null) {
|
||||||
} else {
|
if ($port < 1 || $port > 65535) $port = null;
|
||||||
$port = $host = $userinfo = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
58
library/HTMLPurifier/URIParser.php
Normal file
58
library/HTMLPurifier/URIParser.php
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a URI into the components and fragment identifier as specified
|
||||||
|
* by RFC 2396.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_URIParser
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a URI
|
||||||
|
* @param $uri string URI to parse
|
||||||
|
* @return array(userinfo, host, int port, path, query, fragment) components
|
||||||
|
*/
|
||||||
|
function parse($uri) {
|
||||||
|
$r_URI = '!'.
|
||||||
|
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||||
|
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||||
|
'([^?#<>\'"]*)'. // 5. Path
|
||||||
|
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||||
|
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||||
|
'!';
|
||||||
|
|
||||||
|
$matches = array();
|
||||||
|
$result = preg_match($r_URI, $uri, $matches);
|
||||||
|
|
||||||
|
if (!$result) return false; // *really* invalid URI
|
||||||
|
|
||||||
|
// seperate out parts
|
||||||
|
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
||||||
|
$authority = !empty($matches[3]) ? $matches[4] : null;
|
||||||
|
$path = $matches[5]; // always present, can be empty
|
||||||
|
$query = !empty($matches[6]) ? $matches[7] : null;
|
||||||
|
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
||||||
|
|
||||||
|
// further parse authority
|
||||||
|
if ($authority !== null) {
|
||||||
|
// ridiculously inefficient: it's a stacked regex!
|
||||||
|
$HEXDIG = '[A-Fa-f0-9]';
|
||||||
|
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||||
|
$sub_delims = '!$&\'()'; // needs []
|
||||||
|
$pct_encoded = "%$HEXDIG$HEXDIG";
|
||||||
|
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
||||||
|
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||||
|
$matches = array();
|
||||||
|
preg_match($r_authority, $authority, $matches);
|
||||||
|
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
||||||
|
$host = !empty($matches[3]) ? $matches[3] : '';
|
||||||
|
$port = !empty($matches[4]) ? (int) $matches[5] : null;
|
||||||
|
} else {
|
||||||
|
$port = $host = $userinfo = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return array($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -10,14 +10,18 @@ if (php_sapi_name() != 'cli') {
|
|||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
echo 'Flushing cache... ';
|
echo "Flushing cache... \n";
|
||||||
|
|
||||||
require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php');
|
require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php');
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
$cache = new HTMLPurifier_DefinitionCache_Serializer('HTML');
|
$names = array('HTML', 'CSS', 'Test');
|
||||||
|
foreach ($names as $name) {
|
||||||
|
echo " - Flushing $name\n";
|
||||||
|
$cache = new HTMLPurifier_DefinitionCache_Serializer($name);
|
||||||
$cache->flush($config);
|
$cache->flush($config);
|
||||||
|
}
|
||||||
|
|
||||||
echo 'Cache flushed successfully.';
|
echo 'Cache flushed successfully.';
|
||||||
|
|
@ -66,48 +66,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function testParsingRegular() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://www.example.com/webhp?q=foo#result2',
|
|
||||||
null, 'www.example.com', null, '/webhp', 'q=foo'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingPortAndUsername() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://user@authority.part:80/now/the/path?query#fragment',
|
|
||||||
'user', 'authority.part', 80, '/now/the/path', 'query'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingPercentEncoding() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://en.wikipedia.org/wiki/Clich%C3%A9',
|
|
||||||
null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingEmptyQuery() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://www.example.com/?#',
|
|
||||||
null, 'www.example.com', null, '/', ''
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingEmptyPath() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://www.example.com',
|
|
||||||
null, 'www.example.com', null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingOpaqueURI() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'mailto:bob@example.com',
|
|
||||||
null, null, null, 'bob@example.com', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingImproperPercentEncoding() {
|
function testParsingImproperPercentEncoding() {
|
||||||
// even though we don't resolve percent entities, we have to fix
|
// even though we don't resolve percent entities, we have to fix
|
||||||
// improper percent-encodes. Taken one at a time:
|
// improper percent-encodes. Taken one at a time:
|
||||||
@ -125,38 +83,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testParsingIPv4Address() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://192.0.34.166/',
|
|
||||||
null, '192.0.34.166', null, '/', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingFakeIPv4Address() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://333.123.32.123/',
|
|
||||||
null, '333.123.32.123', null, '/', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingIPv6Address() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://[2001:db8::7]/c=GB?objectClass?one',
|
|
||||||
null, '[2001:db8::7]', null, '/c=GB', 'objectClass?one'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We will not implement punycode encoding, that's up to the browsers
|
|
||||||
// We also will not implement percent to IDNA encoding transformations:
|
|
||||||
// if you need to use an international domain in a link, make sure that
|
|
||||||
// you've got it in UTF-8 and send it in raw (no encoding).
|
|
||||||
function testParsingInternationalizedDomainName() {
|
|
||||||
$this->assertParsing(
|
|
||||||
"http://t\xC5\xABdali\xC5\x86.lv",
|
|
||||||
null, "t\xC5\xABdali\xC5\x86.lv", null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingInvalidHostThatLooksLikeIPv6Address() {
|
function testParsingInvalidHostThatLooksLikeIPv6Address() {
|
||||||
$this->assertParsing(
|
$this->assertParsing(
|
||||||
'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]',
|
'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]',
|
||||||
@ -164,13 +90,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testParsingInvalidPort() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://example.com:foobar',
|
|
||||||
null, 'example.com', null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingOverLargePort() {
|
function testParsingOverLargePort() {
|
||||||
$this->assertParsing(
|
$this->assertParsing(
|
||||||
'http://example.com:65536',
|
'http://example.com:65536',
|
||||||
@ -178,49 +97,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testParsingPathAbsolute() { // note this is different from path-rootless
|
|
||||||
$this->assertParsing(
|
|
||||||
'http:/this/is/path',
|
|
||||||
null, null, null, '/this/is/path', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingPathRootless() {
|
|
||||||
// this should not be used but is allowed
|
|
||||||
$this->assertParsing(
|
|
||||||
'http:this/is/path',
|
|
||||||
null, null, null, 'this/is/path', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingPathEmpty() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http:',
|
|
||||||
null, null, null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingRelativeURI() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'/a/b',
|
|
||||||
null, null, null, '/a/b', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingMalformedTag() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'http://www.google.com/\'>"',
|
|
||||||
null, 'www.google.com', null, '/', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function testParsingEmpty() {
|
|
||||||
$this->assertParsing(
|
|
||||||
'',
|
|
||||||
null, null, null, '', null
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// OUTPUT RELATED TESTS
|
// OUTPUT RELATED TESTS
|
||||||
// scheme is mocked to ensure only the URI is being tested
|
// scheme is mocked to ensure only the URI is being tested
|
||||||
|
|
||||||
|
138
tests/HTMLPurifier/URIParserTest.php
Normal file
138
tests/HTMLPurifier/URIParserTest.php
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
||||||
|
{
|
||||||
|
|
||||||
|
function assertParsing(
|
||||||
|
$uri, $scheme, $userinfo, $host, $port, $path, $query, $fragment, $config = null, $context = null
|
||||||
|
) {
|
||||||
|
$this->prepareCommon($config, $context);
|
||||||
|
$parser = new HTMLPurifier_URIParser();
|
||||||
|
$result = $parser->parse($uri, $config, $context);
|
||||||
|
$this->assertEqual($result, array($scheme, $userinfo, $host, $port, $path, $query, $fragment));
|
||||||
|
}
|
||||||
|
|
||||||
|
function testRegular() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://www.example.com/webhp?q=foo#result2',
|
||||||
|
'http', null, 'www.example.com', null, '/webhp', 'q=foo', 'result2'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPortAndUsername() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://user@authority.part:80/now/the/path?query#fragment',
|
||||||
|
'http', 'user', 'authority.part', 80, '/now/the/path', 'query', 'fragment'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPercentEncoding() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://en.wikipedia.org/wiki/Clich%C3%A9',
|
||||||
|
'http', null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testEmptyQuery() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://www.example.com/?#',
|
||||||
|
'http', null, 'www.example.com', null, '/', '', null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testEmptyPath() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://www.example.com',
|
||||||
|
'http', null, 'www.example.com', null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testOpaqueURI() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'mailto:bob@example.com',
|
||||||
|
'mailto', null, null, null, 'bob@example.com', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testIPv4Address() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://192.0.34.166/',
|
||||||
|
'http', null, '192.0.34.166', null, '/', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testFakeIPv4Address() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://333.123.32.123/',
|
||||||
|
'http', null, '333.123.32.123', null, '/', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testIPv6Address() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://[2001:db8::7]/c=GB?objectClass?one',
|
||||||
|
'http', null, '[2001:db8::7]', null, '/c=GB', 'objectClass?one', null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testInternationalizedDomainName() {
|
||||||
|
$this->assertParsing(
|
||||||
|
"http://t\xC5\xABdali\xC5\x86.lv",
|
||||||
|
'http', null, "t\xC5\xABdali\xC5\x86.lv", null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testInvalidPort() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://example.com:foobar',
|
||||||
|
'http', null, 'example.com', null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPathAbsolute() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http:/this/is/path',
|
||||||
|
'http', null, null, null, '/this/is/path', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPathRootless() {
|
||||||
|
// this should not be used but is allowed
|
||||||
|
$this->assertParsing(
|
||||||
|
'http:this/is/path',
|
||||||
|
'http', null, null, null, 'this/is/path', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPathEmpty() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http:',
|
||||||
|
'http', null, null, null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testRelativeURI() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'/a/b',
|
||||||
|
null, null, null, null, '/a/b', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testMalformedTag() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'http://www.example.com/\'>"',
|
||||||
|
'http', null, 'www.example.com', null, '/', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testEmpty() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'',
|
||||||
|
null, null, null, null, '', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -102,6 +102,7 @@ $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
|
|||||||
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
|
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/TagTransformTest.php';
|
$test_files[] = 'HTMLPurifier/TagTransformTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/TokenTest.php';
|
$test_files[] = 'HTMLPurifier/TokenTest.php';
|
||||||
|
$test_files[] = 'HTMLPurifier/URIParserTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
|
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/URISchemeTest.php';
|
$test_files[] = 'HTMLPurifier/URISchemeTest.php';
|
||||||
$test_files[] = 'HTMLPurifierTest.php';
|
$test_files[] = 'HTMLPurifierTest.php';
|
||||||
|
Loading…
Reference in New Issue
Block a user