diff --git a/NEWS b/NEWS index b9c97098..e61c972a 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier service to avoid PageRank leaks or warn users that they are exiting your site. ! Added spiffy new smoketest printDefinition.php, which lets you twiddle with the configuration settings and see how the internal rules are affected. +! New directive %URI.HostBlacklist for blocking links to bad hosts. + xssAttacks.php smoketest updated accordingly. - Added missing type to ChildDef_Chameleon - Remove Tidy option from demo if there is not Tidy available . ChildDef_Required guards against empty tags diff --git a/docs/proposal-new-directives.txt b/docs/proposal-new-directives.txt index 4f1c76a1..75c963e6 100644 --- a/docs/proposal-new-directives.txt +++ b/docs/proposal-new-directives.txt @@ -26,7 +26,6 @@ time. Note the naming convention: %Namespace.Directive %URI.RelativeToAbsolute - transforms all relative URIs to absolute form -%URI.HostBlacklist - strings that if found in the host of a URI are disallowed %URI.HostBlacklistRegex - regexes that if matching the host are disallowed %URI.HostWhitelist - domain names that are excluded from the host blacklist %URI.HostPolicy - determines whether or not its reject all and then whitelist diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index ad3bb573..d5a36434 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -69,6 +69,14 @@ HTMLPurifier_ConfigSchema::define( 'This directive has been available since 1.3.0.' ); +HTMLPurifier_ConfigSchema::define( + 'URI', 'HostBlacklist', array(), 'list', + 'List of strings that are forbidden in the host of any URI. Use it to '. + 'kill domain names of spam, etc. Note that it will catch anything in '. + 'the domain, so moo.com will catch moo.com.example.com. '. + 'This directive has been available since 1.3.0.' +); + /** * Validates a URI as defined by RFC 3986. * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme @@ -185,6 +193,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $host = $this->host->validate($host, $config, $context); if ($host === false) $host = null; + if ($this->checkBlacklist($host, $config, $context)) return false; + // more lenient absolute checking if (isset($our_host)) { $host_parts = array_reverse(explode('.', $host)); @@ -252,6 +262,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef } + /** + * Checks a host against an array blacklist + * @param $host Host to check + * @param $config HTMLPurifier_Config instance + * @param $context HTMLPurifier_Context instance + * @return bool Is spam? + */ + function checkBlacklist($host, &$config, &$context) { + $blacklist = $config->get('URI', 'HostBlacklist'); + if (!empty($blacklist)) { + foreach($blacklist as $blacklisted_host_fragment) { + if (strpos($host, $blacklisted_host_fragment) !== false) { + return true; + } + } + } + return false; + } + } ?> diff --git a/smoketests/xssAttacks.php b/smoketests/xssAttacks.php index b0fec354..f5bb78bb 100644 --- a/smoketests/xssAttacks.php +++ b/smoketests/xssAttacks.php @@ -35,9 +35,9 @@ function formatCode($string) {
XSS attacks are from http://ha.ckers.org/xss.html.
Caveats: -The last segment of tests regarding blacklisted websites is not -applicable at the moment, but when we add that functionality they'll be -relevant. Most XSS broadcasts its presence by spawning an alert dialogue. +Google.com has been programatically disallowed, but as you can +see, there are ways of getting around that, so coverage in this area +is not complete. Most XSS broadcasts its presence by spawning an alert dialogue. The displayed code is not strictly correct, as linebreaks have been forced for readability. Linewraps have been marked with ยป. Some tests are omitted for your convenience. Not all control characters are displayed.
@@ -48,7 +48,12 @@ omitted for your convenience. Not all control characters are displayed. if (version_compare(PHP_VERSION, '5', '<')) exit('Requires PHP 5.
'); $xml = simplexml_load_file('xssAttacks.xml'); -$purifier = new HTMLPurifier(); + +// programatically disallow google.com for URI evasion tests +// not complete +$config = HTMLPurifier_Config::createDefault(); +$config->set('URI', 'HostBlacklist', array('google.com')); +$purifier = new HTMLPurifier($config); ?>