0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-19 10:45:18 +00:00

[1.3.0] New directive %URI.HostBlacklist for blocking links to bad hosts. xssAttacks.php smoketest updated accordingly.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@586 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-26 23:14:12 +00:00
parent 45a70e8ae4
commit 4bdc0446de
5 changed files with 66 additions and 5 deletions

2
NEWS
View File

@ -28,6 +28,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
service to avoid PageRank leaks or warn users that they are exiting your site.
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
the configuration settings and see how the internal rules are affected.
! New directive %URI.HostBlacklist for blocking links to bad hosts.
xssAttacks.php smoketest updated accordingly.
- Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags

View File

@ -26,7 +26,6 @@ time. Note the naming convention: %Namespace.Directive
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
%URI.HostBlacklist - strings that if found in the host of a URI are disallowed
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
%URI.HostWhitelist - domain names that are excluded from the host blacklist
%URI.HostPolicy - determines whether or not its reject all and then whitelist

View File

@ -69,6 +69,14 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'HostBlacklist', array(), 'list',
'List of strings that are forbidden in the host of any URI. Use it to '.
'kill domain names of spam, etc. Note that it will catch anything in '.
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
'This directive has been available since 1.3.0.'
);
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@ -185,6 +193,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
if ($this->checkBlacklist($host, $config, $context)) return false;
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $host));
@ -252,6 +262,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
/**
* Checks a host against an array blacklist
* @param $host Host to check
* @param $config HTMLPurifier_Config instance
* @param $context HTMLPurifier_Context instance
* @return bool Is spam?
*/
function checkBlacklist($host, &$config, &$context) {
$blacklist = $config->get('URI', 'HostBlacklist');
if (!empty($blacklist)) {
foreach($blacklist as $blacklisted_host_fragment) {
if (strpos($host, $blacklisted_host_fragment) !== false) {
return true;
}
}
}
return false;
}
}
?>

View File

@ -35,9 +35,9 @@ function formatCode($string) {
<p>XSS attacks are from
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
<p><strong>Caveats:</strong>
The last segment of tests regarding blacklisted websites is not
applicable at the moment, but when we add that functionality they'll be
relevant. Most XSS broadcasts its presence by spawning an alert dialogue.
<tt>Google.com</tt> has been programatically disallowed, but as you can
see, there are ways of getting around that, so coverage in this area
is not complete. Most XSS broadcasts its presence by spawning an alert dialogue.
The displayed code is not strictly correct, as linebreaks have been forced for
readability. Linewraps have been marked with <tt>»</tt>. Some tests are
omitted for your convenience. Not all control characters are displayed.</p>
@ -48,7 +48,12 @@ omitted for your convenience. Not all control characters are displayed.</p>
if (version_compare(PHP_VERSION, '5', '<')) exit('<p>Requires PHP 5.</p>');
$xml = simplexml_load_file('xssAttacks.xml');
$purifier = new HTMLPurifier();
// programatically disallow google.com for URI evasion tests
// not complete
$config = HTMLPurifier_Config::createDefault();
$config->set('URI', 'HostBlacklist', array('google.com'));
$purifier = new HTMLPurifier($config);
?>
<table cellspacing="0" cellpadding="2">

View File

@ -300,6 +300,32 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
}
function testBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertDef('foo.txt');
$this->assertDef('http://www.google.com/example.com/moo');
$this->assertDef('http://example.com/#23', false);
$this->assertDef('https://sub.domain.example.com/foobar', false);
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
}
function testWhitelist() {
/*
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertDef('http://example.com/fo/google.com', false);
$this->assertDef('server.txt');
$this->assertDef('ftp://www.google.com/?t=a');
$this->assertDef('http://google.com.tricky.spamsite.net', false);
*/
}
}
?>