oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
$this->def = new HTMLPurifier_AttrDef_URI(); // default
function tearDown() {
function prepareCommon(&$config, &$context) {
$config = HTMLPurifier_Config::create($config);
if (!$context) $context = new HTMLPurifier_Context();
function &generateSchemeMock($scheme_names = array('http', 'mailto')) {
// load a scheme registry mock to the singleton
$registry =& HTMLPurifier_URISchemeRegistry::instance(
new HTMLPurifier_URISchemeRegistryMock()
// add a pseudo-scheme to the registry for $scheme_names
$scheme = new HTMLPurifier_URISchemeMock();
foreach ($scheme_names as $name) {
$registry->setReturnReference('getScheme', $scheme, array($name, '*', '*'));
// registry returns false if an invalid scheme is requested
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
return $scheme;
function assertParsing($uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
$this->prepareCommon($config, $context);
$scheme =& $this->generateSchemeMock();
// create components parameter list
// Config and Context are wildcards due to PHP4 reference funkiness
$components = array($userinfo, $host, $port, $path, $query, '*', '*');
$scheme->expectOnce('validateComponents', $components);
$def = new HTMLPurifier_AttrDef_URI();
$def->validate($uri, $config, $context);
function testParsingRegular() {
null, 'www.example.com', null, '/webhp', 'q=foo'
function testParsingPortAndUsername() {
'user', 'authority.part', 80, '/now/the/path', 'query'
function testParsingPercentEncoding() {
null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null
function testParsingEmptyQuery() {
null, 'www.example.com', null, '/', ''
function testParsingEmptyPath() {
null, 'www.example.com', null, '', null
function testParsingOpaqueURI() {
null, null, null, 'bob@example.com', null
function testParsingImproperPercentEncoding() {
// even though we don't resolve percent entities, we have to fix
// improper percent-encodes. Taken one at a time:
// %56 - V, which is an unreserved character
// %fc - u with an umlaut, normalize to uppercase
// %GJ - invalid characters in entity, encode %
// %5 - prematurely terminated, encode %
// %FC - u with umlaut, correct
// note that Apache doesn't do such fixing, rather, it just claims
// that the browser sent a "Bad Request". See PercentEncoder.php
// for more details
null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null
function testParsingIPv4Address() {
null, '', null, '/', null
function testParsingFakeIPv4Address() {
null, '333.123.32.123', null, '/', null
function testParsingIPv6Address() {
null, '[2001:db8::7]', null, '/c=GB', 'objectClass?one'
// We will not implement punycode encoding, that's up to the browsers
// We also will not implement percent to IDNA encoding transformations:
// if you need to use an international domain in a link, make sure that
// you've got it in UTF-8 and send it in raw (no encoding).
function testParsingInternationalizedDomainName() {
null, "t\xC5\xABdali\xC5\x86.lv", null, '', null
function testParsingInvalidHostThatLooksLikeIPv6Address() {
null, null, null, '', null
function testParsingInvalidPort() {
null, 'example.com', null, '', null
function testParsingOverLargePort() {
null, 'example.com', null, '', null
function testParsingPathAbsolute() { // note this is different from path-rootless
null, null, null, '/this/is/path', null
function testParsingPathRootless() {
// this should not be used but is allowed
null, null, null, 'this/is/path', null
function testParsingPathEmpty() {
null, null, null, '', null
function testParsingRelativeURI() {
null, null, null, '/a/b', null
function testParsingMalformedTag() {
null, 'www.google.com', null, '/', null
function testParsingEmpty() {
null, null, null, '', null
// scheme is mocked to ensure only the URI is being tested
function assertOutput($input_uri, $expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) {
// prepare mock machinery
$this->prepareCommon($config, $context);
$scheme =& $this->generateSchemeMock();
$components = array($userinfo, $host, $port, $path, $query);
$scheme->setReturnValue('validateComponents', $components);
$def = new HTMLPurifier_AttrDef_URI();
$result_uri = $def->validate($input_uri, $config, $context);
if ($expect_uri === true) $expect_uri = $input_uri;
$this->assertEqual($result_uri, $expect_uri);
function testOutputRegular() {
'http://user@authority.part:8080/now/the/path?query#frag', true,
'user', 'authority.part', 8080, '/now/the/path', 'query'
function testOutputEmpty() {
'', true,
null, null, null, '', null
function testOutputNullPath() {
'', true,
null, null, null, null, null // usually shouldn't happen
function testOutputPathAbsolute() {
'http:/this/is/path', '/this/is/path',
null, null, null, '/this/is/path', null
function testOutputPathRootless() {
'http:this/is/path', 'this/is/path',
null, null, null, 'this/is/path', null
function testOutputPathEmpty() {
'http:', '',
null, null, null, '', null
function testIntegration() {
$this->assertDef('http:', '');
$this->assertDef('http:/foo', '/foo');
$this->assertDef('javascript:bad_stuff();', false);
function testConfigDisableExternal() {
$this->def = new HTMLPurifier_AttrDef_URI();
$this->config->set('URI', 'DisableExternal', true);
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertDef('http://google.com/', false);
$this->assertDef('http://example.com/teehee', false);
$this->assertDef('http://www.example.com/#man', false);
function testEmbeds() {
// embedded URI
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('mailto:foo@example.com', false);
function testConfigDisableExternalResources() {
$this->config->set('URI', 'DisableExternalResources', true);
$this->def = new HTMLPurifier_AttrDef_URI();
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
function testConfigMunge() {
$this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
$this->assertDef('javascript:foobar();', false);
function testBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertDef('http://example.com/#23', false);
$this->assertDef('https://sub.domain.example.com/foobar', false);
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
function testWhitelist() {
/* unimplemented
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertDef('http://example.com/fo/google.com', false);
$this->assertDef('http://google.com.tricky.spamsite.net', false);