0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

[2.1.0] Migrate host blacklist functionality to URIFilter.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1336 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-08-02 01:41:37 +00:00
parent 4919187fc6
commit 22ef52a7f6
8 changed files with 114 additions and 95 deletions

View File

@ -47,14 +47,6 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define(
'URI', 'HostBlacklist', array(), 'list',
'List of strings that are forbidden in the host of any URI. Use it to '.
'kill domain names of spam, etc. Note that it will catch anything in '.
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'URI', 'Disable', false, 'bool', 'URI', 'Disable', false, 'bool',
'Disables all URIs in all forms. Not sure why you\'d want to do that '. 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
@ -124,10 +116,17 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$context->destroy('EmbeddedURI'); $context->destroy('EmbeddedURI');
if (!$ok) return false; if (!$ok) return false;
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
if ($config->get('URI', 'DefaultScheme') == $uri->scheme) {
$uri->scheme = null;
}
}
// back to string // back to string
$result = $uri->toString(); $result = $uri->toString();
// munge if necessary // munge entire URI if necessary
if ( if (
!is_null($uri->host) && // indicator for authority !is_null($uri->host) && // indicator for authority
!empty($scheme_obj->browsable) && !empty($scheme_obj->browsable) &&

View File

@ -24,6 +24,12 @@ class HTMLPurifier_URI
$this->fragment = $fragment; $this->fragment = $fragment;
} }
/**
* Retrieves a scheme object corresponding to the URI's scheme/default
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Scheme object appropriate for validating this URI
*/
function getSchemeObj($config, &$context) { function getSchemeObj($config, &$context) {
$registry =& HTMLPurifier_URISchemeRegistry::instance(); $registry =& HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) { if ($this->scheme !== null) {
@ -46,26 +52,17 @@ class HTMLPurifier_URI
/** /**
* Generic validation method applicable for all schemes * Generic validation method applicable for all schemes
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return True if validation/filtering succeeds, false if failure
*/ */
function validate($config, &$context) { function validate($config, &$context) {
// validate host // validate host
if (!is_null($this->host)) { if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host(); $host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context); $this->host = $host_def->validate($this->host, $config, $context);
if ($this->host === false) $this->host = null; if ($this->host === false) $this->host = null;
// check host against blacklist
if ($this->checkBlacklist($this->host, $config, $context)) return false;
}
// munge scheme off if necessary
if (!is_null($this->scheme) && is_null($this->host)) {
if ($config->get('URI', 'DefaultScheme') == $this->scheme) {
$this->scheme = null;
}
} }
// validate port // validate port
@ -85,27 +82,9 @@ class HTMLPurifier_URI
} }
/**
* Checks a host against an array blacklist
* @param $host Host to check
* @param $config HTMLPurifier_Config instance
* @param $context HTMLPurifier_Context instance
* @return bool Is spam?
*/
function checkBlacklist($host, $config, &$context) {
$blacklist = $config->get('URI', 'HostBlacklist');
if (!empty($blacklist)) {
foreach($blacklist as $blacklisted_host_fragment) {
if (strpos($host, $blacklisted_host_fragment) !== false) {
return true;
}
}
}
return false;
}
/** /**
* Convert URI back to string * Convert URI back to string
* @return String URI appropriate for output
*/ */
function toString() { function toString() {
// reconstruct authority // reconstruct authority

View File

@ -5,6 +5,16 @@ require_once 'HTMLPurifier/URIFilter.php';
require_once 'HTMLPurifier/URIFilter/DisableExternal.php'; require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php'; require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionID', null, 'string/null', '
<p>
Unique identifier for a custom-built URI definition. If you want
to add custom URIFilters, you must specify this value.
This directive has been available since 2.1.0.
</p>
');
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionRev', 1, 'int', ' 'URI', 'DefinitionRev', 1, 'int', '
@ -25,6 +35,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
function HTMLPurifier_URIDefinition() { function HTMLPurifier_URIDefinition() {
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
} }
function registerFilter($filter) { function registerFilter($filter) {

View File

@ -0,0 +1,28 @@
<?php
require_once 'HTMLPurifier/URIFilter.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'HostBlacklist', array(), 'list',
'List of strings that are forbidden in the host of any URI. Use it to '.
'kill domain names of spam, etc. Note that it will catch anything in '.
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
'This directive has been available since 1.3.0.'
);
class HTMLPurifier_URIFilter_HostBlacklist
{
var $name = 'HostBlacklist';
var $blacklist = array();
function prepare($config) {
$this->blacklist = $config->get('URI', 'HostBlacklist');
}
function filter(&$uri, $config, &$context) {
foreach($this->blacklist as $blacklisted_host_fragment) {
if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
return false;
}
}
return true;
}
}

View File

@ -2,6 +2,7 @@
require_once 'HTMLPurifier/AttrDefHarness.php'; require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/URI.php'; require_once 'HTMLPurifier/AttrDef/URI.php';
require_once 'HTMLPurifier/URIParser.php';
/** /**
* @todo Aim for complete code coverage with mocks * @todo Aim for complete code coverage with mocks
@ -48,48 +49,34 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$this->assertDef('javascript:foobar();', false); $this->assertDef('javascript:foobar();', false);
} }
function test_validate_configDisableExternal() { function testDefaultSchemeRemovedInBlank() {
$this->assertDef('http:', '');
$this->def = new HTMLPurifier_AttrDef_URI();
$this->config->set('URI', 'DisableExternal', true);
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertDef('/foobar.txt');
$this->assertDef('http://google.com/', false);
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('http://example.com/teehee', false);
$this->assertDef('http://www.example.com/#man', false);
$this->assertDef('http://go.sub.example.com/perhaps?p=foo');
} }
function test_validate_configDisableExternalResources() { function testDefaultSchemeRemovedInRelativeURI() {
$this->assertDef('http:/foo/bar', '/foo/bar');
$this->config->set('URI', 'DisableExternalResources', true);
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('/img.png');
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
$this->assertDef('/img.png');
} }
function test_validate_configBlacklist() { function testDefaultSchemeNotRemovedInAbsoluteURI() {
$this->assertDef('http://example.com/foo/bar');
}
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo')); function testAltSchemeNotRemoved() {
$this->assertDef('mailto:this-looks-like-a-path@example.com');
$this->assertDef('foo.txt'); }
$this->assertDef('http://www.google.com/example.com/moo');
$this->assertDef('http://example.com/#23', false);
$this->assertDef('https://sub.domain.example.com/foobar', false);
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
function testURIDefinitionValidation() {
$parser = new HTMLPurifier_URIParser();
$uri = $parser->parse('http://example.com');
$this->config->set('URI', 'DefinitionID', 'HTMLPurifier_AttrDef_URITest->testURIDefinitionValidation');
$uri_def =& $this->config->getDefinition('URI');
// overload with mock
generate_mock_once('HTMLPurifier_URIDefinition');
$uri_def = new HTMLPurifier_URIDefinitionMock();
$uri_def->expectOnce('filter', array($uri, '*', '*'));
$uri_def->setReturnValue('filter', true, array($uri, '*', '*'));
$uri_def->setup = true;
$this->assertDef('http://example.com');
} }
/* /*

View File

@ -0,0 +1,30 @@
<?php
require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
require_once 'HTMLPurifier/URIFilterHarness.php';
class HTMLPurifier_URIFilter_HostBlacklistTest extends HTMLPurifier_URIFilterHarness
{
function setUp() {
parent::setUp();
$this->filter = new HTMLPurifier_URIFilter_HostBlacklist();
}
function testRejectBlacklistedHost() {
$this->config->set('URI', 'HostBlacklist', 'example.com');
$this->assertFiltering('http://example.com', false);
}
function testRejectBlacklistedHostThoughNotTrue() {
// maybe this behavior should change
$this->config->set('URI', 'HostBlacklist', 'example.com');
$this->assertFiltering('http://example.comcast.com', false);
}
function testPreserveNonBlacklistedHost() {
$this->config->set('URI', 'HostBlacklist', 'example.com');
$this->assertFiltering('http://google.com');
}
}

View File

@ -151,22 +151,6 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
} }
} }
function test_validate_defaultSchemeRemovedInBlank() {
$this->assertValidation('http:', '');
}
function test_validate_defaultSchemeRemovedInRelativeURI() {
$this->assertValidation('http:/foo/bar', '/foo/bar');
}
function test_validate_defaultSchemeNotRemovedInAbsoluteURI() {
$this->assertValidation('http://example.com/foo/bar');
}
function test_validate_altSchemeNotRemoved() {
$this->assertValidation('mailto:this-looks-like-a-path@example.com');
}
function test_validate_overlongPort() { function test_validate_overlongPort() {
$this->assertValidation('http://example.com:65536', 'http://example.com'); $this->assertValidation('http://example.com:65536', 'http://example.com');
} }
@ -176,7 +160,7 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
} }
function test_validate_invalidHostThatLooksLikeIPv6() { function test_validate_invalidHostThatLooksLikeIPv6() {
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', ''); $this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:');
} }
} }

View File

@ -105,6 +105,7 @@ $test_files[] = 'HTMLPurifier/TokenTest.php';
$test_files[] = 'HTMLPurifier/URIDefinitionTest.php'; $test_files[] = 'HTMLPurifier/URIDefinitionTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/HostBlacklistTest.php';
$test_files[] = 'HTMLPurifier/URIParserTest.php'; $test_files[] = 'HTMLPurifier/URIParserTest.php';
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php'; $test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
$test_files[] = 'HTMLPurifier/URISchemeTest.php'; $test_files[] = 'HTMLPurifier/URISchemeTest.php';