mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 23:28:42 +00:00
Implement URIScheme and subclasses except for mailto. Remove fragment from components, as it is scheme independent.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@218 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
e56c3fcd20
commit
d28bad648a
@ -49,7 +49,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
$scheme_obj =& $registry->getScheme($scheme, $config);
|
||||
if (!$scheme_obj) return ''; // invalid scheme, clean it out
|
||||
} else {
|
||||
$scheme_obj =& $registry->getScheme($config->get('URI', 'DefaultScheme'), $config);
|
||||
$scheme_obj =& $registry->getScheme(
|
||||
$config->get('URI', 'DefaultScheme'), $config
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -99,11 +101,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
// userinfo and host are validated within the regexp
|
||||
|
||||
// regenerate authority
|
||||
$authority =
|
||||
($userinfo === null ? '' : ($userinfo . '@')) .
|
||||
$host .
|
||||
($port === null ? '' : (':' . $port));
|
||||
} else {
|
||||
$port = $host = $userinfo = null;
|
||||
}
|
||||
|
||||
|
||||
@ -120,10 +119,21 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
|
||||
// okay, now we defer execution to the subobject for more processing
|
||||
list($authority, $path, $query, $fragment) =
|
||||
$scheme_obj->validateComponents($authority, $path, $query, $fragment);
|
||||
// note that $fragment is omitted
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
$scheme_obj->validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
);
|
||||
|
||||
|
||||
// reconstruct authority
|
||||
$authority = null;
|
||||
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
|
||||
$authority = '';
|
||||
if($userinfo !== null) $authority .= $userinfo . '@';
|
||||
$authority .= $host;
|
||||
if($port !== null) $authority .= ':' . $port;
|
||||
}
|
||||
|
||||
// reconstruct the result
|
||||
$result = '';
|
||||
|
@ -3,8 +3,13 @@
|
||||
class HTMLPurifier_URIScheme
|
||||
{
|
||||
|
||||
function validateComponents($authority, $path, $query, $fragment) {
|
||||
return array($authority, $path, $query, $fragment);
|
||||
var $default_port = null;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
if ($this->default_port == $port) $port = null;
|
||||
return array($userinfo, $host, $port, $path, $query);
|
||||
}
|
||||
|
||||
}
|
||||
|
21
library/HTMLPurifier/URIScheme/ftp.php
Normal file
21
library/HTMLPurifier/URIScheme/ftp.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 21;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
// typecode check needed on path
|
||||
return array($userinfo, $host, $port, $path, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -4,8 +4,15 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
||||
|
||||
function validateComponents($authority, $path, $query, $fragment) {
|
||||
var $default_port = 80;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
return array(null, $host, $port, $path, $query);
|
||||
}
|
||||
|
||||
}
|
||||
|
11
library/HTMLPurifier/URIScheme/https.php
Normal file
11
library/HTMLPurifier/URIScheme/https.php
Normal file
@ -0,0 +1,11 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme/http.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
|
||||
|
||||
var $default_port = 443;
|
||||
|
||||
}
|
||||
|
||||
?>
|
19
library/HTMLPurifier/URIScheme/news.php
Normal file
19
library/HTMLPurifier/URIScheme/news.php
Normal file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
// typecode check needed on path
|
||||
return array(null, null, null, $path, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
20
library/HTMLPurifier/URIScheme/nntp.php
Normal file
20
library/HTMLPurifier/URIScheme/nntp.php
Normal file
@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 119;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
return array(null, $host, $port, $path, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -33,21 +33,22 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
// test a regular instance, return identical URI
|
||||
$uri[0] = 'http://www.example.com/webhp?q=foo#result2';
|
||||
$components[0] = array(
|
||||
'www.example.com', // authority
|
||||
null, // userinfo
|
||||
'www.example.com', // host
|
||||
null, // port
|
||||
'/webhp', // path
|
||||
'q=foo', // query
|
||||
'result2' // fragment
|
||||
'q=foo' // query
|
||||
);
|
||||
|
||||
// test an amended URI (the actual logic is irrelevant)
|
||||
// test that user and port get parsed correctly (3.2.1 and 3.2.3)
|
||||
$uri[1] = 'http://user@authority.part:80/now/the/path?query#fragment';
|
||||
$components[1] = array(
|
||||
'user@authority.part:80', // yes, user+port are part of authority
|
||||
'/now/the/path', 'query', 'fragment'
|
||||
'user', 'authority.part', 80,
|
||||
'/now/the/path', 'query'
|
||||
);
|
||||
$return_components[1] = array( // removed port (it's standard)
|
||||
'user@authority.part', '/now/the/path', 'query', 'fragment'
|
||||
'user', 'authority.part', null, '/now/the/path', 'query'
|
||||
);
|
||||
$expect_uri[1] = 'http://user@authority.part/now/the/path?query#fragment';
|
||||
|
||||
@ -56,20 +57,20 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
// also test what happens when query/fragment are missing
|
||||
$uri[2] = 'http://en.wikipedia.org/wiki/Clich%C3%A9';
|
||||
$components[2] = array(
|
||||
'en.wikipedia.org', '/wiki/Clich%C3%A9', null, null
|
||||
null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null
|
||||
);
|
||||
|
||||
// test distinction between empty query and undefined query (above)
|
||||
$uri[3] = 'http://www.example.com/?#';
|
||||
$components[3] = array( 'www.example.com', '/', '', '' );
|
||||
$components[3] = array(null, 'www.example.com', null, '/', '');
|
||||
|
||||
// path is always defined, even if empty
|
||||
$uri[4] = 'http://www.example.com';
|
||||
$components[4] = array( 'www.example.com', '', null, null );
|
||||
$components[4] = array(null, 'www.example.com', null, '', null);
|
||||
|
||||
// test parsing of an opaque URI
|
||||
$uri[5] = 'mailto:bob@example.com';
|
||||
$components[5] = array(null, 'bob@example.com', null, null);
|
||||
$components[5] = array(null, null, null, 'bob@example.com', null);
|
||||
|
||||
// even though we don't resolve percent entities, we have to fix
|
||||
// improper percent-encodes. Taken one at a time:
|
||||
@ -86,12 +87,12 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
// test IPv4 address (behavior may vary with configuration)
|
||||
$uri[7] = 'http://192.0.34.166/';
|
||||
$components[7] = array('192.0.34.166', '/', null, null);
|
||||
$components[7] = array(null, '192.0.34.166', null, '/', null);
|
||||
|
||||
// while it may look like an IPv4 address, it's really a reg-name.
|
||||
// don't destroy it
|
||||
$uri[8] = 'http://333.123.32.123/';
|
||||
$components[8] = array('333.123.32.123', '/', null, null);
|
||||
$components[8] = array(null, '333.123.32.123', null, '/', null);
|
||||
|
||||
// test IPv6 address, using amended form of RFC's example
|
||||
//$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
|
||||
@ -105,7 +106,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
// break the RFC a little and allow international characters
|
||||
// WARNING: UTF-8 encoded!
|
||||
$uri[10] = 'http://tūdaliņ.lv';
|
||||
$components[10] = array('tūdaliņ.lv', '', null, null);
|
||||
$components[10] = array(null, 'tūdaliņ.lv', null, '', null);
|
||||
|
||||
// test invalid IPv6 address and invalid reg-name
|
||||
//$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
|
||||
@ -113,12 +114,12 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
// test invalid port
|
||||
$uri[12] = 'http://example.com:foobar';
|
||||
$components[12] = array('example.com', '', null, null);
|
||||
$components[12] = array(null, 'example.com', null, '', null);
|
||||
$expect_uri[12] = 'http://example.com';
|
||||
|
||||
// test overlarge port (max is 65535, although this isn't official)
|
||||
$uri[13] = 'http://example.com:65536';
|
||||
$components[13] = array('example.com', '', null, null);
|
||||
$components[13] = array(null, 'example.com', null, '', null);
|
||||
$expect_uri[13] = 'http://example.com';
|
||||
|
||||
// some spec abnf tests
|
||||
@ -127,19 +128,19 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
// "path-absolute", note this is different from path-rootless
|
||||
$uri[14] = 'http:/this/is/path';
|
||||
$components[14] = array(null, '/this/is/path', null, null);
|
||||
$components[14] = array(null, null, null, '/this/is/path', null);
|
||||
$expect_uri[14] = 'http:/this/is/path'; // do not munge scheme off
|
||||
|
||||
// scheme munging is not being tested yet, it's an extra feature
|
||||
|
||||
// "path-rootless" - this should not be used but is allowed
|
||||
$uri[15] = 'http:this/is/path';
|
||||
$components[15] = array(null, 'this/is/path', null, null);
|
||||
$components[15] = array(null, null, null, 'this/is/path', null);
|
||||
//$expect_uri[15] = 'this/is/path'; // munge scheme off
|
||||
|
||||
// "path-empty" - a rather interesting case, remove the scheme
|
||||
$uri[16] = 'http:';
|
||||
$components[16] = array(null, '', null, null);
|
||||
$components[16] = array(null, null, null, '', null);
|
||||
//$expect_uri[16] = ''; // munge scheme off
|
||||
|
||||
// test invalid scheme, components shouldn't be passed
|
||||
@ -150,7 +151,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
// test basic case
|
||||
$uri[18] = '/a/b';
|
||||
$components[18] = array(null, '/a/b', null, null);
|
||||
$components[18] = array(null, null, null, '/a/b', null);
|
||||
|
||||
foreach ($uri as $i => $value) {
|
||||
|
||||
@ -204,6 +205,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
if ($this->components === false) {
|
||||
$this->scheme->expectNever('validateComponents');
|
||||
} else {
|
||||
$this->components[] = $this->config; // append the configuration
|
||||
$this->scheme->setReturnValue(
|
||||
'validateComponents', $this->return_components, $this->components);
|
||||
$this->scheme->expectOnce('validateComponents', $this->components);
|
||||
|
123
tests/HTMLPurifier/URISchemeTest.php
Normal file
123
tests/HTMLPurifier/URISchemeTest.php
Normal file
@ -0,0 +1,123 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
require_once 'HTMLPurifier/URIScheme/http.php';
|
||||
require_once 'HTMLPurifier/URIScheme/ftp.php';
|
||||
require_once 'HTMLPurifier/URIScheme/https.php';
|
||||
//require_once 'HTMLPurifier/URIScheme/mailto.php';
|
||||
require_once 'HTMLPurifier/URIScheme/news.php';
|
||||
require_once 'HTMLPurifier/URIScheme/nntp.php';
|
||||
|
||||
class HTMLPurifier_URISchemeTest extends UnitTestCase
|
||||
{
|
||||
|
||||
function test_http() {
|
||||
$scheme = new HTMLPurifier_URIScheme_http();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, 'www.example.com', null, '/', 's=foobar', $config),
|
||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
||||
);
|
||||
|
||||
// absorb default port and userinfo
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
'user', 'www.example.com', 80, '/', 's=foobar', $config),
|
||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
||||
);
|
||||
|
||||
// do not absorb non-default port
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, 'www.example.com', 8080, '/', 's=foobar', $config),
|
||||
array(null, 'www.example.com', 8080, '/', 's=foobar')
|
||||
);
|
||||
|
||||
// https is basically the same
|
||||
|
||||
$scheme = new HTMLPurifier_URIScheme_https();
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
'user', 'www.example.com', 443, '/', 's=foobar', $config),
|
||||
array(null, 'www.example.com', null, '/', 's=foobar')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_ftp() {
|
||||
|
||||
$scheme = new HTMLPurifier_URIScheme_ftp();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
'user', 'www.example.com', 21, '/', 's=foobar', $config),
|
||||
array('user', 'www.example.com', null, '/', null)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_news() {
|
||||
|
||||
$scheme = new HTMLPurifier_URIScheme_news();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, null, null, 'gmane.science.linguistics', null, $config),
|
||||
array(null, null, null, 'gmane.science.linguistics', null)
|
||||
);
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, null, null, '642@eagle.ATT.COM', null, $config),
|
||||
array(null, null, null, '642@eagle.ATT.COM', null)
|
||||
);
|
||||
|
||||
// test invalid field removal
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
'user', 'www.google.com', 80, 'rec.music', 'path=foo', $config),
|
||||
array(null, null, null, 'rec.music', null)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_nntp() {
|
||||
|
||||
$scheme = new HTMLPurifier_URIScheme_nntp();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, 'news.example.com', null, '/alt.misc/12345', null, $config),
|
||||
array(null, 'news.example.com', null, '/alt.misc/12345', null)
|
||||
);
|
||||
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
'user', 'news.example.com', 119, '/alt.misc/12345', 'foo=asdf', $config),
|
||||
array(null, 'news.example.com', null, '/alt.misc/12345', null)
|
||||
);
|
||||
}
|
||||
|
||||
// mailto currently isn't implemented yet
|
||||
function non_test_mailto() {
|
||||
|
||||
$scheme = new HTMLPurifier_URIScheme_mailto();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->assertIdentical(
|
||||
$scheme->validateComponents(
|
||||
null, null, null, 'bob@example.com', null, $config),
|
||||
array(null, null, null, 'bob@example.com', null)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -68,6 +68,7 @@ $test_files[] = 'TagTransformTest.php';
|
||||
$test_files[] = 'AttrTransform/LangTest.php';
|
||||
$test_files[] = 'AttrTransform/TextAlignTest.php';
|
||||
$test_files[] = 'URISchemeRegistryTest.php';
|
||||
$test_files[] = 'URISchemeTest.php';
|
||||
|
||||
$test_file_lookup = array_flip($test_files);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user