From f38fe431ed070fb4c03f0cc9c5a6764bc5278e6f Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 12 Nov 2006 03:35:41 +0000 Subject: [PATCH] [1.2.0] - Added %URI.DisableExternal, which prevents links to external websites. You can also use %URI.Host to permit absolute linking to subdomains - Fixed a few bugs involving null configuration values git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@522 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 ++ docs/config-ideas.txt | 4 --- library/HTMLPurifier/AttrDef/URI.php | 40 ++++++++++++++++++++++++++ library/HTMLPurifier/Config.php | 4 +-- tests/HTMLPurifier/AttrDef/URITest.php | 19 +++++++++++- tests/HTMLPurifier/ConfigTest.php | 19 ++++++++++++ 6 files changed, 81 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index f5943431..c6cf6f74 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Added percent encoding normalization ! XSS attacks smoketest given facelift ! Configuration documentation now has table of contents +! Added %URI.DisableExternal, which prevents links to external websites. You + can also use %URI.Host to permit absolute linking to subdomains - Documentation updated + TODO added request Phalanger + TODO added request Native compression diff --git a/docs/config-ideas.txt b/docs/config-ideas.txt index 852b3aca..412df8ce 100644 --- a/docs/config-ideas.txt +++ b/docs/config-ideas.txt @@ -39,8 +39,4 @@ time. Note the naming convention: %Namespace.Directive %URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the spread of ill-gotten pagerank -%URI.Host - host of website, for external link checks - %URI.RelativeToAbsolute - transforms all relative URIs to absolute form - -%URI.DisableExternal - disable external links diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 770b1282..1547a0e6 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -12,6 +12,28 @@ HTMLPurifier_ConfigSchema::define( 'select the proper object validator when no scheme information is present.' ); +HTMLPurifier_ConfigSchema::define( + 'URI', 'Host', null, 'string/null', + 'Defines the domain name of the server, so we can determine whether or '. + 'an absolute URI is from your website or not. Not strictly necessary, '. + 'as users should be using relative URIs to reference resources on your '. + 'website. It will, however, let you use absolute URIs to link to '. + 'subdomains of the domain you post here: i.e. example.com will allow '. + 'sub.example.com. However, higher up domains will still be excluded: '. + 'if you set %URI.Host to sub.example.com, example.com will be blocked. '. + 'This directive has been available since 1.2.0.' +); + +HTMLPurifier_ConfigSchema::Define( + 'URI', 'DisableExternal', false, 'bool', + 'Disables links to external websites. This is a highly effective '. + 'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'. + 'links or images outside of your domain will be allowed. Non-linkified '. + 'URIs will still be preserved. If you want to be able to link to '. + 'subdomains or use absolute URIs, specify %URI.Host for your website. '. + 'This directive has been available since 1.2.0.' +); + /** * Validates a URI as defined by RFC 3986. * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme @@ -81,6 +103,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if ($authority !== null) { + // remove URI if it's absolute and we disallow externals + unset($our_host); + if ($config->get('URI', 'DisableExternal')) { + $our_host = $config->get('URI', 'Host'); + if ($our_host === null) return false; + } + $HEXDIG = '[A-Fa-f0-9]'; $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] $sub_delims = '!$&\'()'; // needs [] @@ -103,6 +132,17 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $host = $this->host->validate($host, $config, $context); if ($host === false) $host = null; + // more lenient absolute checking + if (isset($our_host)) { + $host_parts = array_reverse(explode('.', $host)); + // could be cached + $our_host_parts = array_reverse(explode('.', $our_host)); + foreach ($our_host_parts as $i => $discard) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $our_host_parts[$i]) return false; + } + } + // userinfo and host are validated within the regexp } else { diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index b2ac0e70..aa63ea41 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -60,7 +60,7 @@ class HTMLPurifier_Config * @param $key String key */ function get($namespace, $key) { - if (!isset($this->conf[$namespace][$key])) { + if (!isset($this->def->info[$namespace][$key])) { trigger_error('Cannot retrieve value of undefined directive', E_USER_WARNING); return; @@ -75,7 +75,7 @@ class HTMLPurifier_Config * @param $value Mixed value */ function set($namespace, $key, $value) { - if (!isset($this->conf[$namespace][$key])) { + if (!isset($this->def->info[$namespace][$key])) { trigger_error('Cannot set undefined directive to value', E_USER_WARNING); return; diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 50a1f5e7..daf03152 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -233,7 +233,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness function testIntegration() { $this->def = new HTMLPurifier_AttrDef_URI(); - $this->config = $this->context = null; $this->assertDef('http://www.google.com/'); $this->assertDef('javascript:bad_stuff();', false); @@ -244,6 +243,24 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } + function testDisableExternal() { + + $this->def = new HTMLPurifier_AttrDef_URI(); + $this->config->set('URI', 'DisableExternal', true); + + $this->assertDef('/foobar.txt'); + $this->assertDef('http://google.com/', false); + $this->assertDef('http://sub.example.com/alas?foo=asd', false); + + $this->config->set('URI', 'Host', 'sub.example.com'); + + $this->assertDef('http://sub.example.com/alas?foo=asd'); + $this->assertDef('http://example.com/teehee', false); + $this->assertDef('http://www.example.com/#man', false); + $this->assertDef('http://go.sub.example.com/perhaps?p=foo'); + + } + } ?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php index bec55540..77fa12fe 100644 --- a/tests/HTMLPurifier/ConfigTest.php +++ b/tests/HTMLPurifier/ConfigTest.php @@ -37,6 +37,10 @@ class HTMLPurifier_ConfigTest extends UnitTestCase 'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!' ); + HTMLPurifier_ConfigSchema::define( + 'Extension', 'CanBeNull', null, 'string/null', 'Null or string!' + ); + HTMLPurifier_ConfigSchema::defineAllowedValues( 'Extension', 'Pert', array('foo', 'moo') ); @@ -92,6 +96,21 @@ class HTMLPurifier_ConfigTest extends UnitTestCase $this->assertNoErrors(); $this->assertIdentical($config->get('Core', 'Encoding'), 'iso-8859-1'); + // set null to directive that allows null + $config->set('Extension', 'CanBeNull', null); + $this->assertNoErrors(); + $this->assertIdentical($config->get('Extension', 'CanBeNull'), null); + + $config->set('Extension', 'CanBeNull', 'foobar'); + $this->assertNoErrors(); + $this->assertIdentical($config->get('Extension', 'CanBeNull'), 'foobar'); + + // set null to directive that doesn't allow null + $config->set('Extension', 'Pert', null); + $this->assertError('Value is of invalid type'); + $this->assertNoErrors(); + $this->swallowErrors(); + } function test_getDefinition() {