0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-19 18:55:19 +00:00

Implement URIScheme and subclasses except for mailto. Remove fragment from components, as it is scheme independent.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@218 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-08-12 18:58:54 +00:00
parent e56c3fcd20
commit d28bad648a
10 changed files with 250 additions and 31 deletions

View File

@ -49,7 +49,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$scheme_obj =& $registry->getScheme($scheme, $config);
if (!$scheme_obj) return ''; // invalid scheme, clean it out
} else {
$scheme_obj =& $registry->getScheme($config->get('URI', 'DefaultScheme'), $config);
$scheme_obj =& $registry->getScheme(
$config->get('URI', 'DefaultScheme'), $config
);
}
@ -99,11 +101,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// userinfo and host are validated within the regexp
// regenerate authority
$authority =
($userinfo === null ? '' : ($userinfo . '@')) .
$host .
($port === null ? '' : (':' . $port));
} else {
$port = $host = $userinfo = null;
}
@ -120,10 +119,21 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// okay, now we defer execution to the subobject for more processing
list($authority, $path, $query, $fragment) =
$scheme_obj->validateComponents($authority, $path, $query, $fragment);
// note that $fragment is omitted
list($userinfo, $host, $port, $path, $query) =
$scheme_obj->validateComponents(
$userinfo, $host, $port, $path, $query, $config
);
// reconstruct authority
$authority = null;
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
$authority = '';
if($userinfo !== null) $authority .= $userinfo . '@';
$authority .= $host;
if($port !== null) $authority .= ':' . $port;
}
// reconstruct the result
$result = '';

View File

@ -3,8 +3,13 @@
class HTMLPurifier_URIScheme
{
function validateComponents($authority, $path, $query, $fragment) {
return array($authority, $path, $query, $fragment);
var $default_port = null;
function validateComponents(
$userinfo, $host, $port, $path, $query, $config
) {
if ($this->default_port == $port) $port = null;
return array($userinfo, $host, $port, $path, $query);
}
}

View File

@ -0,0 +1,21 @@
<?php
require_once 'HTMLPurifier/URIScheme.php';
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
var $default_port = 21;
function validateComponents(
$userinfo, $host, $port, $path, $query, $config
) {
list($userinfo, $host, $port, $path, $query) =
parent::validateComponents(
$userinfo, $host, $port, $path, $query, $config );
// typecode check needed on path
return array($userinfo, $host, $port, $path, null);
}
}
?>

View File

@ -4,8 +4,15 @@ require_once 'HTMLPurifier/URIScheme.php';
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
function validateComponents($authority, $path, $query, $fragment) {
var $default_port = 80;
function validateComponents(
$userinfo, $host, $port, $path, $query, $config
) {
list($userinfo, $host, $port, $path, $query) =
parent::validateComponents(
$userinfo, $host, $port, $path, $query, $config );
return array(null, $host, $port, $path, $query);
}
}

View File

@ -0,0 +1,11 @@
<?php
require_once 'HTMLPurifier/URIScheme/http.php';
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
var $default_port = 443;
}
?>

View File

@ -0,0 +1,19 @@
<?php
require_once 'HTMLPurifier/URIScheme.php';
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
function validateComponents(
$userinfo, $host, $port, $path, $query, $config
) {
list($userinfo, $host, $port, $path, $query) =
parent::validateComponents(
$userinfo, $host, $port, $path, $query, $config );
// typecode check needed on path
return array(null, null, null, $path, null);
}
}
?>

View File

@ -0,0 +1,20 @@
<?php
require_once 'HTMLPurifier/URIScheme.php';
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
var $default_port = 119;
function validateComponents(
$userinfo, $host, $port, $path, $query, $config
) {
list($userinfo, $host, $port, $path, $query) =
parent::validateComponents(
$userinfo, $host, $port, $path, $query, $config );
return array(null, $host, $port, $path, null);
}
}
?>

View File

@ -33,21 +33,22 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// test a regular instance, return identical URI
$uri[0] = 'http://www.example.com/webhp?q=foo#result2';
$components[0] = array(
'www.example.com', // authority
null, // userinfo
'www.example.com', // host
null, // port
'/webhp', // path
'q=foo', // query
'result2' // fragment
'q=foo' // query
);
// test an amended URI (the actual logic is irrelevant)
// test that user and port get parsed correctly (3.2.1 and 3.2.3)
$uri[1] = 'http://user@authority.part:80/now/the/path?query#fragment';
$components[1] = array(
'user@authority.part:80', // yes, user+port are part of authority
'/now/the/path', 'query', 'fragment'
'user', 'authority.part', 80,
'/now/the/path', 'query'
);
$return_components[1] = array( // removed port (it's standard)
'user@authority.part', '/now/the/path', 'query', 'fragment'
'user', 'authority.part', null, '/now/the/path', 'query'
);
$expect_uri[1] = 'http://user@authority.part/now/the/path?query#fragment';
@ -56,20 +57,20 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// also test what happens when query/fragment are missing
$uri[2] = 'http://en.wikipedia.org/wiki/Clich%C3%A9';
$components[2] = array(
'en.wikipedia.org', '/wiki/Clich%C3%A9', null, null
null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null
);
// test distinction between empty query and undefined query (above)
$uri[3] = 'http://www.example.com/?#';
$components[3] = array( 'www.example.com', '/', '', '' );
$components[3] = array(null, 'www.example.com', null, '/', '');
// path is always defined, even if empty
$uri[4] = 'http://www.example.com';
$components[4] = array( 'www.example.com', '', null, null );
$components[4] = array(null, 'www.example.com', null, '', null);
// test parsing of an opaque URI
$uri[5] = 'mailto:bob@example.com';
$components[5] = array(null, 'bob@example.com', null, null);
$components[5] = array(null, null, null, 'bob@example.com', null);
// even though we don't resolve percent entities, we have to fix
// improper percent-encodes. Taken one at a time:
@ -86,12 +87,12 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// test IPv4 address (behavior may vary with configuration)
$uri[7] = 'http://192.0.34.166/';
$components[7] = array('192.0.34.166', '/', null, null);
$components[7] = array(null, '192.0.34.166', null, '/', null);
// while it may look like an IPv4 address, it's really a reg-name.
// don't destroy it
$uri[8] = 'http://333.123.32.123/';
$components[8] = array('333.123.32.123', '/', null, null);
$components[8] = array(null, '333.123.32.123', null, '/', null);
// test IPv6 address, using amended form of RFC's example
//$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
@ -105,7 +106,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// break the RFC a little and allow international characters
// WARNING: UTF-8 encoded!
$uri[10] = 'http://tūdaliņ.lv';
$components[10] = array('tūdaliņ.lv', '', null, null);
$components[10] = array(null, 'tūdaliņ.lv', null, '', null);
// test invalid IPv6 address and invalid reg-name
//$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
@ -113,12 +114,12 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// test invalid port
$uri[12] = 'http://example.com:foobar';
$components[12] = array('example.com', '', null, null);
$components[12] = array(null, 'example.com', null, '', null);
$expect_uri[12] = 'http://example.com';
// test overlarge port (max is 65535, although this isn't official)
$uri[13] = 'http://example.com:65536';
$components[13] = array('example.com', '', null, null);
$components[13] = array(null, 'example.com', null, '', null);
$expect_uri[13] = 'http://example.com';
// some spec abnf tests
@ -127,19 +128,19 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// "path-absolute", note this is different from path-rootless
$uri[14] = 'http:/this/is/path';
$components[14] = array(null, '/this/is/path', null, null);
$components[14] = array(null, null, null, '/this/is/path', null);
$expect_uri[14] = 'http:/this/is/path'; // do not munge scheme off
// scheme munging is not being tested yet, it's an extra feature
// "path-rootless" - this should not be used but is allowed
$uri[15] = 'http:this/is/path';
$components[15] = array(null, 'this/is/path', null, null);
$components[15] = array(null, null, null, 'this/is/path', null);
//$expect_uri[15] = 'this/is/path'; // munge scheme off
// "path-empty" - a rather interesting case, remove the scheme
$uri[16] = 'http:';
$components[16] = array(null, '', null, null);
$components[16] = array(null, null, null, '', null);
//$expect_uri[16] = ''; // munge scheme off
// test invalid scheme, components shouldn't be passed
@ -150,7 +151,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
// test basic case
$uri[18] = '/a/b';
$components[18] = array(null, '/a/b', null, null);
$components[18] = array(null, null, null, '/a/b', null);
foreach ($uri as $i => $value) {
@ -204,6 +205,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
if ($this->components === false) {
$this->scheme->expectNever('validateComponents');
} else {
$this->components[] = $this->config; // append the configuration
$this->scheme->setReturnValue(
'validateComponents', $this->return_components, $this->components);
$this->scheme->expectOnce('validateComponents', $this->components);

View File

@ -0,0 +1,123 @@
<?php
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URIScheme/http.php';
require_once 'HTMLPurifier/URIScheme/ftp.php';
require_once 'HTMLPurifier/URIScheme/https.php';
//require_once 'HTMLPurifier/URIScheme/mailto.php';
require_once 'HTMLPurifier/URIScheme/news.php';
require_once 'HTMLPurifier/URIScheme/nntp.php';
class HTMLPurifier_URISchemeTest extends UnitTestCase
{
function test_http() {
$scheme = new HTMLPurifier_URIScheme_http();
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', null, '/', 's=foobar', $config),
array(null, 'www.example.com', null, '/', 's=foobar')
);
// absorb default port and userinfo
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.example.com', 80, '/', 's=foobar', $config),
array(null, 'www.example.com', null, '/', 's=foobar')
);
// do not absorb non-default port
$this->assertIdentical(
$scheme->validateComponents(
null, 'www.example.com', 8080, '/', 's=foobar', $config),
array(null, 'www.example.com', 8080, '/', 's=foobar')
);
// https is basically the same
$scheme = new HTMLPurifier_URIScheme_https();
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.example.com', 443, '/', 's=foobar', $config),
array(null, 'www.example.com', null, '/', 's=foobar')
);
}
function test_ftp() {
$scheme = new HTMLPurifier_URIScheme_ftp();
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.example.com', 21, '/', 's=foobar', $config),
array('user', 'www.example.com', null, '/', null)
);
}
function test_news() {
$scheme = new HTMLPurifier_URIScheme_news();
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, 'gmane.science.linguistics', null, $config),
array(null, null, null, 'gmane.science.linguistics', null)
);
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, '642@eagle.ATT.COM', null, $config),
array(null, null, null, '642@eagle.ATT.COM', null)
);
// test invalid field removal
$this->assertIdentical(
$scheme->validateComponents(
'user', 'www.google.com', 80, 'rec.music', 'path=foo', $config),
array(null, null, null, 'rec.music', null)
);
}
function test_nntp() {
$scheme = new HTMLPurifier_URIScheme_nntp();
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical(
$scheme->validateComponents(
null, 'news.example.com', null, '/alt.misc/12345', null, $config),
array(null, 'news.example.com', null, '/alt.misc/12345', null)
);
$this->assertIdentical(
$scheme->validateComponents(
'user', 'news.example.com', 119, '/alt.misc/12345', 'foo=asdf', $config),
array(null, 'news.example.com', null, '/alt.misc/12345', null)
);
}
// mailto currently isn't implemented yet
function non_test_mailto() {
$scheme = new HTMLPurifier_URIScheme_mailto();
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical(
$scheme->validateComponents(
null, null, null, 'bob@example.com', null, $config),
array(null, null, null, 'bob@example.com', null)
);
}
}
?>

View File

@ -68,6 +68,7 @@ $test_files[] = 'TagTransformTest.php';
$test_files[] = 'AttrTransform/LangTest.php';
$test_files[] = 'AttrTransform/TextAlignTest.php';
$test_files[] = 'URISchemeRegistryTest.php';
$test_files[] = 'URISchemeTest.php';
$test_file_lookup = array_flip($test_files);