From 22ef52a7f68e54fbd7e01c364a1a895b6f925559 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 2 Aug 2007 01:41:37 +0000 Subject: [PATCH] [2.1.0] Migrate host blacklist functionality to URIFilter. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1336 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/AttrDef/URI.php | 17 +++-- library/HTMLPurifier/URI.php | 41 +++--------- library/HTMLPurifier/URIDefinition.php | 11 ++++ .../HTMLPurifier/URIFilter/HostBlacklist.php | 28 +++++++++ tests/HTMLPurifier/AttrDef/URITest.php | 63 ++++++++----------- .../URIFilter/HostBlacklistTest.php | 30 +++++++++ tests/HTMLPurifier/URITest.php | 18 +----- tests/test_files.php | 1 + 8 files changed, 114 insertions(+), 95 deletions(-) create mode 100644 library/HTMLPurifier/URIFilter/HostBlacklist.php create mode 100644 tests/HTMLPurifier/URIFilter/HostBlacklistTest.php diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index a21d4f4b..3b97e007 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -47,14 +47,6 @@ HTMLPurifier_ConfigSchema::define( 'This directive has been available since 1.3.0.' ); -HTMLPurifier_ConfigSchema::define( - 'URI', 'HostBlacklist', array(), 'list', - 'List of strings that are forbidden in the host of any URI. Use it to '. - 'kill domain names of spam, etc. Note that it will catch anything in '. - 'the domain, so moo.com will catch moo.com.example.com. '. - 'This directive has been available since 1.3.0.' -); - HTMLPurifier_ConfigSchema::define( 'URI', 'Disable', false, 'bool', 'Disables all URIs in all forms. Not sure why you\'d want to do that '. @@ -124,10 +116,17 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $context->destroy('EmbeddedURI'); if (!$ok) return false; + // munge scheme off if necessary (this must be last) + if (!is_null($uri->scheme) && is_null($uri->host)) { + if ($config->get('URI', 'DefaultScheme') == $uri->scheme) { + $uri->scheme = null; + } + } + // back to string $result = $uri->toString(); - // munge if necessary + // munge entire URI if necessary if ( !is_null($uri->host) && // indicator for authority !empty($scheme_obj->browsable) && diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php index da56872f..cec71436 100644 --- a/library/HTMLPurifier/URI.php +++ b/library/HTMLPurifier/URI.php @@ -24,6 +24,12 @@ class HTMLPurifier_URI $this->fragment = $fragment; } + /** + * Retrieves a scheme object corresponding to the URI's scheme/default + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Scheme object appropriate for validating this URI + */ function getSchemeObj($config, &$context) { $registry =& HTMLPurifier_URISchemeRegistry::instance(); if ($this->scheme !== null) { @@ -46,26 +52,17 @@ class HTMLPurifier_URI /** * Generic validation method applicable for all schemes + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return True if validation/filtering succeeds, false if failure */ function validate($config, &$context) { // validate host if (!is_null($this->host)) { - $host_def = new HTMLPurifier_AttrDef_URI_Host(); $this->host = $host_def->validate($this->host, $config, $context); if ($this->host === false) $this->host = null; - - // check host against blacklist - if ($this->checkBlacklist($this->host, $config, $context)) return false; - - } - - // munge scheme off if necessary - if (!is_null($this->scheme) && is_null($this->host)) { - if ($config->get('URI', 'DefaultScheme') == $this->scheme) { - $this->scheme = null; - } } // validate port @@ -85,27 +82,9 @@ class HTMLPurifier_URI } - /** - * Checks a host against an array blacklist - * @param $host Host to check - * @param $config HTMLPurifier_Config instance - * @param $context HTMLPurifier_Context instance - * @return bool Is spam? - */ - function checkBlacklist($host, $config, &$context) { - $blacklist = $config->get('URI', 'HostBlacklist'); - if (!empty($blacklist)) { - foreach($blacklist as $blacklisted_host_fragment) { - if (strpos($host, $blacklisted_host_fragment) !== false) { - return true; - } - } - } - return false; - } - /** * Convert URI back to string + * @return String URI appropriate for output */ function toString() { // reconstruct authority diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php index c3efa6ad..0623c983 100644 --- a/library/HTMLPurifier/URIDefinition.php +++ b/library/HTMLPurifier/URIDefinition.php @@ -5,6 +5,16 @@ require_once 'HTMLPurifier/URIFilter.php'; require_once 'HTMLPurifier/URIFilter/DisableExternal.php'; require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php'; +require_once 'HTMLPurifier/URIFilter/HostBlacklist.php'; + +HTMLPurifier_ConfigSchema::define( + 'URI', 'DefinitionID', null, 'string/null', ' +

+ Unique identifier for a custom-built URI definition. If you want + to add custom URIFilters, you must specify this value. + This directive has been available since 2.1.0. +

+'); HTMLPurifier_ConfigSchema::define( 'URI', 'DefinitionRev', 1, 'int', ' @@ -25,6 +35,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition function HTMLPurifier_URIDefinition() { $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); + $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); } function registerFilter($filter) { diff --git a/library/HTMLPurifier/URIFilter/HostBlacklist.php b/library/HTMLPurifier/URIFilter/HostBlacklist.php new file mode 100644 index 00000000..5f0d790e --- /dev/null +++ b/library/HTMLPurifier/URIFilter/HostBlacklist.php @@ -0,0 +1,28 @@ +moo.com will catch moo.com.example.com. '. + 'This directive has been available since 1.3.0.' +); + +class HTMLPurifier_URIFilter_HostBlacklist +{ + var $name = 'HostBlacklist'; + var $blacklist = array(); + function prepare($config) { + $this->blacklist = $config->get('URI', 'HostBlacklist'); + } + function filter(&$uri, $config, &$context) { + foreach($this->blacklist as $blacklisted_host_fragment) { + if (strpos($uri->host, $blacklisted_host_fragment) !== false) { + return false; + } + } + return true; + } +} diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 43b041cd..58b77248 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -2,6 +2,7 @@ require_once 'HTMLPurifier/AttrDefHarness.php'; require_once 'HTMLPurifier/AttrDef/URI.php'; +require_once 'HTMLPurifier/URIParser.php'; /** * @todo Aim for complete code coverage with mocks @@ -48,48 +49,34 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $this->assertDef('javascript:foobar();', false); } - function test_validate_configDisableExternal() { - - $this->def = new HTMLPurifier_AttrDef_URI(); - - $this->config->set('URI', 'DisableExternal', true); - $this->config->set('URI', 'Host', 'sub.example.com'); - - $this->assertDef('/foobar.txt'); - $this->assertDef('http://google.com/', false); - $this->assertDef('http://sub.example.com/alas?foo=asd'); - $this->assertDef('http://example.com/teehee', false); - $this->assertDef('http://www.example.com/#man', false); - $this->assertDef('http://go.sub.example.com/perhaps?p=foo'); - + function testDefaultSchemeRemovedInBlank() { + $this->assertDef('http:', ''); } - function test_validate_configDisableExternalResources() { - - $this->config->set('URI', 'DisableExternalResources', true); - - $this->assertDef('http://sub.example.com/alas?foo=asd'); - $this->assertDef('/img.png'); - - $this->def = new HTMLPurifier_AttrDef_URI(true); - - $this->assertDef('http://sub.example.com/alas?foo=asd', false); - $this->assertDef('/img.png'); - + function testDefaultSchemeRemovedInRelativeURI() { + $this->assertDef('http:/foo/bar', '/foo/bar'); } - function test_validate_configBlacklist() { - - $this->config->set('URI', 'HostBlacklist', array('example.com', 'moo')); - - $this->assertDef('foo.txt'); - $this->assertDef('http://www.google.com/example.com/moo'); - - $this->assertDef('http://example.com/#23', false); - $this->assertDef('https://sub.domain.example.com/foobar', false); - $this->assertDef('http://example.com.example.net/?whoo=foo', false); - $this->assertDef('ftp://moo-moo.net/foo/foo/', false); - + function testDefaultSchemeNotRemovedInAbsoluteURI() { + $this->assertDef('http://example.com/foo/bar'); + } + + function testAltSchemeNotRemoved() { + $this->assertDef('mailto:this-looks-like-a-path@example.com'); + } + + function testURIDefinitionValidation() { + $parser = new HTMLPurifier_URIParser(); + $uri = $parser->parse('http://example.com'); + $this->config->set('URI', 'DefinitionID', 'HTMLPurifier_AttrDef_URITest->testURIDefinitionValidation'); + $uri_def =& $this->config->getDefinition('URI'); + // overload with mock + generate_mock_once('HTMLPurifier_URIDefinition'); + $uri_def = new HTMLPurifier_URIDefinitionMock(); + $uri_def->expectOnce('filter', array($uri, '*', '*')); + $uri_def->setReturnValue('filter', true, array($uri, '*', '*')); + $uri_def->setup = true; + $this->assertDef('http://example.com'); } /* diff --git a/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php b/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php new file mode 100644 index 00000000..d9a3fdd2 --- /dev/null +++ b/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php @@ -0,0 +1,30 @@ +filter = new HTMLPurifier_URIFilter_HostBlacklist(); + } + + function testRejectBlacklistedHost() { + $this->config->set('URI', 'HostBlacklist', 'example.com'); + $this->assertFiltering('http://example.com', false); + } + + function testRejectBlacklistedHostThoughNotTrue() { + // maybe this behavior should change + $this->config->set('URI', 'HostBlacklist', 'example.com'); + $this->assertFiltering('http://example.comcast.com', false); + } + + function testPreserveNonBlacklistedHost() { + $this->config->set('URI', 'HostBlacklist', 'example.com'); + $this->assertFiltering('http://google.com'); + } + +} diff --git a/tests/HTMLPurifier/URITest.php b/tests/HTMLPurifier/URITest.php index 85017be1..9da37a7a 100644 --- a/tests/HTMLPurifier/URITest.php +++ b/tests/HTMLPurifier/URITest.php @@ -151,22 +151,6 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness } } - function test_validate_defaultSchemeRemovedInBlank() { - $this->assertValidation('http:', ''); - } - - function test_validate_defaultSchemeRemovedInRelativeURI() { - $this->assertValidation('http:/foo/bar', '/foo/bar'); - } - - function test_validate_defaultSchemeNotRemovedInAbsoluteURI() { - $this->assertValidation('http://example.com/foo/bar'); - } - - function test_validate_altSchemeNotRemoved() { - $this->assertValidation('mailto:this-looks-like-a-path@example.com'); - } - function test_validate_overlongPort() { $this->assertValidation('http://example.com:65536', 'http://example.com'); } @@ -176,7 +160,7 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness } function test_validate_invalidHostThatLooksLikeIPv6() { - $this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', ''); + $this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:'); } } diff --git a/tests/test_files.php b/tests/test_files.php index cd61b5ce..44bc86f2 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -105,6 +105,7 @@ $test_files[] = 'HTMLPurifier/TokenTest.php'; $test_files[] = 'HTMLPurifier/URIDefinitionTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php'; +$test_files[] = 'HTMLPurifier/URIFilter/HostBlacklistTest.php'; $test_files[] = 'HTMLPurifier/URIParserTest.php'; $test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php'; $test_files[] = 'HTMLPurifier/URISchemeTest.php';