From 4bdc0446dedc807d61c8630418201090d4c0336d Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Sun, 26 Nov 2006 23:14:12 +0000
Subject: [PATCH] [1.3.0] New directive %URI.HostBlacklist for blocking links
to bad hosts. xssAttacks.php smoketest updated accordingly.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@586 48356398-32a2-884e-a903-53898d9a118a
---
NEWS | 2 ++
docs/proposal-new-directives.txt | 1 -
library/HTMLPurifier/AttrDef/URI.php | 29 ++++++++++++++++++++++++++
smoketests/xssAttacks.php | 13 ++++++++----
tests/HTMLPurifier/AttrDef/URITest.php | 26 +++++++++++++++++++++++
5 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/NEWS b/NEWS
index b9c97098..e61c972a 100644
--- a/NEWS
+++ b/NEWS
@@ -28,6 +28,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
service to avoid PageRank leaks or warn users that they are exiting your site.
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
the configuration settings and see how the internal rules are affected.
+! New directive %URI.HostBlacklist for blocking links to bad hosts.
+ xssAttacks.php smoketest updated accordingly.
- Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags
diff --git a/docs/proposal-new-directives.txt b/docs/proposal-new-directives.txt
index 4f1c76a1..75c963e6 100644
--- a/docs/proposal-new-directives.txt
+++ b/docs/proposal-new-directives.txt
@@ -26,7 +26,6 @@ time. Note the naming convention: %Namespace.Directive
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
-%URI.HostBlacklist - strings that if found in the host of a URI are disallowed
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
%URI.HostWhitelist - domain names that are excluded from the host blacklist
%URI.HostPolicy - determines whether or not its reject all and then whitelist
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index ad3bb573..d5a36434 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -69,6 +69,14 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.'
);
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'HostBlacklist', array(), 'list',
+ 'List of strings that are forbidden in the host of any URI. Use it to '.
+ 'kill domain names of spam, etc. Note that it will catch anything in '.
+ 'the domain, so moo.com will catch moo.com.example.com. '.
+ 'This directive has been available since 1.3.0.'
+);
+
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@@ -185,6 +193,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
+ if ($this->checkBlacklist($host, $config, $context)) return false;
+
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $host));
@@ -252,6 +262,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
+ /**
+ * Checks a host against an array blacklist
+ * @param $host Host to check
+ * @param $config HTMLPurifier_Config instance
+ * @param $context HTMLPurifier_Context instance
+ * @return bool Is spam?
+ */
+ function checkBlacklist($host, &$config, &$context) {
+ $blacklist = $config->get('URI', 'HostBlacklist');
+ if (!empty($blacklist)) {
+ foreach($blacklist as $blacklisted_host_fragment) {
+ if (strpos($host, $blacklisted_host_fragment) !== false) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
}
?>
diff --git a/smoketests/xssAttacks.php b/smoketests/xssAttacks.php
index b0fec354..f5bb78bb 100644
--- a/smoketests/xssAttacks.php
+++ b/smoketests/xssAttacks.php
@@ -35,9 +35,9 @@ function formatCode($string) {
XSS attacks are from
http://ha.ckers.org/xss.html.
Caveats:
-The last segment of tests regarding blacklisted websites is not
-applicable at the moment, but when we add that functionality they'll be
-relevant. Most XSS broadcasts its presence by spawning an alert dialogue.
+Google.com has been programatically disallowed, but as you can
+see, there are ways of getting around that, so coverage in this area
+is not complete. Most XSS broadcasts its presence by spawning an alert dialogue.
The displayed code is not strictly correct, as linebreaks have been forced for
readability. Linewraps have been marked with ยป. Some tests are
omitted for your convenience. Not all control characters are displayed.
@@ -48,7 +48,12 @@ omitted for your convenience. Not all control characters are displayed.
if (version_compare(PHP_VERSION, '5', '<')) exit('Requires PHP 5.
');
$xml = simplexml_load_file('xssAttacks.xml');
-$purifier = new HTMLPurifier();
+
+// programatically disallow google.com for URI evasion tests
+// not complete
+$config = HTMLPurifier_Config::createDefault();
+$config->set('URI', 'HostBlacklist', array('google.com'));
+$purifier = new HTMLPurifier($config);
?>
diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php
index f321ee04..a80c436f 100644
--- a/tests/HTMLPurifier/AttrDef/URITest.php
+++ b/tests/HTMLPurifier/AttrDef/URITest.php
@@ -300,6 +300,32 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
}
+ function testBlacklist() {
+
+ $this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
+
+ $this->assertDef('foo.txt');
+ $this->assertDef('http://www.google.com/example.com/moo');
+
+ $this->assertDef('http://example.com/#23', false);
+ $this->assertDef('https://sub.domain.example.com/foobar', false);
+ $this->assertDef('http://example.com.example.net/?whoo=foo', false);
+ $this->assertDef('ftp://moo-moo.net/foo/foo/', false);
+
+ }
+
+ function testWhitelist() {
+ /*
+ $this->config->set('URI', 'HostPolicy', 'DenyAll');
+ $this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
+
+ $this->assertDef('http://example.com/fo/google.com', false);
+ $this->assertDef('server.txt');
+ $this->assertDef('ftp://www.google.com/?t=a');
+ $this->assertDef('http://google.com.tricky.spamsite.net', false);
+ */
+ }
+
}
?>
\ No newline at end of file