From 7bccc2497773a60e2c4160d2a29278e1c64e4719 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Thu, 2 Aug 2007 21:47:24 +0000
Subject: [PATCH] [2.1.0] Implement MakeAbsolute URI filter - Move some
directives with complex dependencies to URIDefinition - Fix a missing extends
- Add hierarchical information to URI schemes - Fix bug in URIHarness.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1346 48356398-32a2-884e-a903-53898d9a118a
---
NEWS | 2 +
docs/proposal-new-directives.txt | 2 -
library/HTMLPurifier/AttrDef/URI.php | 91 +++++++------
library/HTMLPurifier/URI.php | 12 +-
library/HTMLPurifier/URIDefinition.php | 77 +++++++++++
.../HTMLPurifier/URIFilter/HostBlacklist.php | 2 +-
.../HTMLPurifier/URIFilter/MakeAbsolute.php | 115 +++++++++++++++++
library/HTMLPurifier/URIScheme.php | 6 +
library/HTMLPurifier/URIScheme/ftp.php | 1 +
library/HTMLPurifier/URIScheme/http.php | 1 +
tests/HTMLPurifier/URIDefinitionTest.php | 25 ++++
.../DisableExternalResourcesTest.php | 1 +
.../URIFilter/MakeAbsoluteTest.php | 122 ++++++++++++++++++
tests/HTMLPurifier/URIHarness.php | 2 +-
tests/test_files.php | 1 +
15 files changed, 411 insertions(+), 49 deletions(-)
create mode 100644 library/HTMLPurifier/URIFilter/MakeAbsolute.php
create mode 100644 tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
diff --git a/NEWS b/NEWS
index a4c3d59c..a6509bdd 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Standalone file now available, which greatly reduces the amount of
includes (although there are still a few files that reside in the
standalone folder)
+! Relative URIs can now be transformed into their absolute equivalents
+ using %URI.Base and %URI.MakeAbsolute
- AutoFormatters emit friendly error messages if tags or attributes they
need are not allowed
- ConfigForm's compactification of directive names is now configurable
diff --git a/docs/proposal-new-directives.txt b/docs/proposal-new-directives.txt
index 2c08ddbb..b3351b4c 100644
--- a/docs/proposal-new-directives.txt
+++ b/docs/proposal-new-directives.txt
@@ -22,8 +22,6 @@ time. Note the naming convention: %Namespace.Directive
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
spread of ill-gotten pagerank
-%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
-
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
%URI.HostWhitelist - domain names that are excluded from the host blacklist
%URI.HostPolicy - determines whether or not its reject all and then whitelist
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index 3b97e007..dcf9849c 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -7,54 +7,59 @@ require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
require_once 'HTMLPurifier/PercentEncoder.php';
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'DefaultScheme', 'http', 'string',
- 'Defines through what scheme the output will be served, in order to '.
- 'select the proper object validator when no scheme information is present.'
-);
+// special case filtering directives
HTMLPurifier_ConfigSchema::define(
- 'URI', 'Host', null, 'string/null',
- 'Defines the domain name of the server, so we can determine whether or '.
- 'an absolute URI is from your website or not. Not strictly necessary, '.
- 'as users should be using relative URIs to reference resources on your '.
- 'website. It will, however, let you use absolute URIs to link to '.
- 'subdomains of the domain you post here: i.e. example.com will allow '.
- 'sub.example.com. However, higher up domains will still be excluded: '.
- 'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
- 'This directive has been available since 1.2.0.'
-);
+ 'URI', 'Munge', null, 'string/null', '
+
+ Munges all browsable (usually http, https and ftp)
+ absolute URI\'s into another URI, usually a URI redirection service.
+ This directive accepts a URI, formatted with a %s
where
+ the url-encoded original URI should be inserted (sample:
+ http://www.google.com/url?q=%s
).
+
+
+ Uses for this directive:
+
+
+ -
+ Prevent PageRank leaks, while being fairly transparent
+ to users (you may also want to add some client side JavaScript to
+ override the text in the statusbar). Notice:
+ Many security experts believe that this form of protection does not deter spam-bots.
+
+ -
+ Redirect users to a splash page telling them they are leaving your
+ website. While this is poor usability practice, it is often mandated
+ in corporate environments.
+
+
+
+ This directive has been available since 1.3.0.
+
+');
+
+// disabling directives
HTMLPurifier_ConfigSchema::define(
- 'URI', 'DisableResources', false, 'bool',
- 'Disables embedding resources, essentially meaning no pictures. You can '.
- 'still link to them though. See %URI.DisableExternalResources for why '.
- 'this might be a good idea. This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Munge', null, 'string/null',
- 'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
- 'redirection service. Pass this directive a URI, with %s inserted where '.
- 'the url-encoded original URI should be inserted (sample: '.
- 'http://www.google.com/url?q=%s
). '.
- 'This prevents PageRank leaks, while being as transparent as possible '.
- 'to users (you may also want to add some client side JavaScript to '.
- 'override the text in the statusbar). Warning: many security experts '.
- 'believe that this form of protection does not deter spam-bots. '.
- 'You can also use this directive to redirect users to a splash page '.
- 'telling them they are leaving your website. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'URI', 'Disable', false, 'bool',
- 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
- '(after all, the Internet\'s founded on the notion of a hyperlink). '.
- 'This directive has been available since 1.3.0.'
-);
+ 'URI', 'Disable', false, 'bool', '
+
+ Disables all URIs in all forms. Not sure why you\'d want to do that
+ (after all, the Internet\'s founded on the notion of a hyperlink).
+ This directive has been available since 1.3.0.
+
+');
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DisableResources', false, 'bool', '
+
+ Disables embedding resources, essentially meaning no pictures. You can
+ still link to them though. See %URI.DisableExternalResources for why
+ this might be a good idea. This directive has been available since 1.3.0.
+
+');
+
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@@ -118,7 +123,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
- if ($config->get('URI', 'DefaultScheme') == $uri->scheme) {
+ if ($uri_def->defaultScheme == $uri->scheme) {
$uri->scheme = null;
}
}
diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php
index cec71436..ed7ffdd6 100644
--- a/library/HTMLPurifier/URI.php
+++ b/library/HTMLPurifier/URI.php
@@ -37,11 +37,12 @@ class HTMLPurifier_URI
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
- $scheme_obj = $registry->getScheme($config->get('URI', 'DefaultScheme'), $config, $context);
+ $def = $config->getDefinition('URI');
+ $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
- 'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
+ 'Default scheme object "' . $def->defaultScheme . '" was not readable',
E_USER_WARNING
);
return false;
@@ -107,5 +108,12 @@ class HTMLPurifier_URI
return $result;
}
+ /**
+ * Returns a copy of the URI object
+ */
+ function copy() {
+ return unserialize(serialize($this));
+ }
+
}
diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php
index 0623c983..131c95de 100644
--- a/library/HTMLPurifier/URIDefinition.php
+++ b/library/HTMLPurifier/URIDefinition.php
@@ -2,10 +2,12 @@
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/URIFilter.php';
+require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
+require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionID', null, 'string/null', '
@@ -25,6 +27,48 @@ HTMLPurifier_ConfigSchema::define(
');
+// informative URI directives
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'DefaultScheme', 'http', 'string', '
+
+ Defines through what scheme the output will be served, in order to
+ select the proper object validator when no scheme information is present.
+
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'Host', null, 'string/null', '
+
+ Defines the domain name of the server, so we can determine whether or
+ an absolute URI is from your website or not. Not strictly necessary,
+ as users should be using relative URIs to reference resources on your
+ website. It will, however, let you use absolute URIs to link to
+ subdomains of the domain you post here: i.e. example.com will allow
+ sub.example.com. However, higher up domains will still be excluded:
+ if you set %URI.Host to sub.example.com, example.com will be blocked.
+ Note: This directive overrides %URI.Base because
+ a given page may be on a sub-domain, but you wish HTML Purifier to be
+ more relaxed and allow some of the parent domains too.
+ This directive has been available since 1.2.0.
+
+');
+
+HTMLPurifier_ConfigSchema::define(
+ 'URI', 'Base', null, 'string/null', '
+
+ The base URI is the URI of the document this purified HTML will be
+ inserted into. This information is important if HTML Purifier needs
+ to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
+ is on. You may use a non-absolute URI for this value, but behavior
+ may vary (%URI.MakeAbsolute deals nicely with both absolute and
+ relative paths, but forwards-compatibility is not guaranteed).
+ Warning: If set, the scheme on this URI
+ overrides the one specified by %URI.DefaultScheme. This directive has
+ been available since 2.1.0.
+
+');
+
class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
{
@@ -32,10 +76,26 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
var $filters = array();
var $registeredFilters = array();
+ /**
+ * HTMLPurifier_URI object of the base specified at %URI.Base
+ */
+ var $base;
+
+ /**
+ * String host to consider "home" base
+ */
+ var $host;
+
+ /**
+ * Name of default scheme based on %URI.DefaultScheme and %URI.Base
+ */
+ var $defaultScheme;
+
function HTMLPurifier_URIDefinition() {
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
+ $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
}
function registerFilter($filter) {
@@ -43,6 +103,11 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
}
function doSetup($config) {
+ $this->setupFilters($config);
+ $this->setupMemberVariables($config);
+ }
+
+ function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) {
$conf = $config->get('URI', $name);
if ($conf !== false && $conf !== null) {
@@ -53,6 +118,18 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
unset($this->registeredFilters);
}
+ function setupMemberVariables($config) {
+ $this->host = $config->get('URI', 'Host');
+ $base_uri = $config->get('URI', 'Base');
+ if (!is_null($base_uri)) {
+ $parser = new HTMLPurifier_URIParser();
+ $this->base = $parser->parse($base_uri);
+ $this->defaultScheme = $this->base->scheme;
+ if (is_null($this->host)) $this->host = $this->base->host;
+ }
+ if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
+ }
+
function filter(&$uri, $config, &$context) {
foreach ($this->filters as $name => $x) {
$result = $this->filters[$name]->filter($uri, $config, $context);
diff --git a/library/HTMLPurifier/URIFilter/HostBlacklist.php b/library/HTMLPurifier/URIFilter/HostBlacklist.php
index 5f0d790e..d3429d5c 100644
--- a/library/HTMLPurifier/URIFilter/HostBlacklist.php
+++ b/library/HTMLPurifier/URIFilter/HostBlacklist.php
@@ -10,7 +10,7 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.'
);
-class HTMLPurifier_URIFilter_HostBlacklist
+class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{
var $name = 'HostBlacklist';
var $blacklist = array();
diff --git a/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
new file mode 100644
index 00000000..9935dc6e
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -0,0 +1,115 @@
+
+ Converts all URIs into absolute forms. This is useful when the HTML
+ being filtered assumes a specific base path, but will actually be
+ viewed in a different context (and setting an alternate base URI is
+ not possible). %URI.Base must be set for this directive to work.
+ This directive has been available since 2.1.0.
+
+');
+
+class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
+{
+ var $name = 'MakeAbsolute';
+ var $base;
+ var $basePathStack = array();
+ function prepare($config) {
+ $def = $config->getDefinition('URI');
+ $this->base = $def->base;
+ if (is_null($this->base)) {
+ trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_ERROR);
+ return;
+ }
+ $this->base->fragment = null; // fragment is invalid for base URI
+ $stack = explode('/', $this->base->path);
+ array_pop($stack); // discard last segment
+ $stack = $this->_collapseStack($stack); // do pre-parsing
+ $this->basePathStack = $stack;
+ }
+ function filter(&$uri, $config, &$context) {
+ if (is_null($this->base)) return true; // abort early
+ if (
+ $uri->path === '' && is_null($uri->scheme) &&
+ is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
+ ) {
+ // reference to current document
+ $uri = $this->base->copy();
+ return true;
+ }
+ if (!is_null($uri->scheme)) {
+ // absolute URI already: don't change
+ if (!is_null($uri->host)) return true;
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj->hierarchical) {
+ // non-hierarchal URI with explicit scheme, don't change
+ return true;
+ }
+ // special case: had a scheme but always is hierarchical and had no authority
+ }
+ if (!is_null($uri->host)) {
+ // network path, don't bother
+ return true;
+ }
+ if ($uri->path === '') {
+ $uri->path = $this->base->path;
+ }elseif ($uri->path[0] !== '/') {
+ // relative path, needs more complicated processing
+ $stack = explode('/', $uri->path);
+ $new_stack = array_merge($this->basePathStack, $stack);
+ $new_stack = $this->_collapseStack($new_stack);
+ $uri->path = implode('/', $new_stack);
+ }
+ // re-combine
+ $uri->scheme = $this->base->scheme;
+ if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
+ if (is_null($uri->host)) $uri->host = $this->base->host;
+ if (is_null($uri->port)) $uri->port = $this->base->port;
+ return true;
+ }
+
+ /**
+ * Resolve dots and double-dots in a path stack
+ * @private
+ */
+ function _collapseStack($stack) {
+ $result = array();
+ for ($i = 0; isset($stack[$i]); $i++) {
+ $is_folder = false;
+ // absorb an internally duplicated slash
+ if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
+ if ($stack[$i] == '..') {
+ if (!empty($result)) {
+ $segment = array_pop($result);
+ if ($segment === '' && empty($result)) {
+ // error case: attempted to back out too far:
+ // restore the leading slash
+ $result[] = '';
+ } elseif ($segment === '..') {
+ $result[] = '..'; // cannot remove .. with ..
+ }
+ } else {
+ // relative path, preserve the double-dots
+ $result[] = '..';
+ }
+ $is_folder = true;
+ continue;
+ }
+ if ($stack[$i] == '.') {
+ // silently absorb
+ $is_folder = true;
+ continue;
+ }
+ $result[] = $stack[$i];
+ }
+ if ($is_folder) $result[] = '';
+ return $result;
+ }
+}
+
diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php
index a34cdd4a..41c02f70 100644
--- a/library/HTMLPurifier/URIScheme.php
+++ b/library/HTMLPurifier/URIScheme.php
@@ -19,6 +19,12 @@ class HTMLPurifier_URIScheme
*/
var $browsable = false;
+ /**
+ * Whether or not the URI always uses , resolves edge cases
+ * with making relative URIs absolute
+ */
+ var $hierarchical = false;
+
/**
* Validates the components of a URI
* @note This implementation should be called by children if they define
diff --git a/library/HTMLPurifier/URIScheme/ftp.php b/library/HTMLPurifier/URIScheme/ftp.php
index 950fe032..5555ef33 100644
--- a/library/HTMLPurifier/URIScheme/ftp.php
+++ b/library/HTMLPurifier/URIScheme/ftp.php
@@ -9,6 +9,7 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
var $default_port = 21;
var $browsable = true; // usually
+ var $hierarchical = true;
function validate(&$uri, $config, &$context) {
parent::validate($uri, $config, $context);
diff --git a/library/HTMLPurifier/URIScheme/http.php b/library/HTMLPurifier/URIScheme/http.php
index 262e2bd9..7abc6680 100644
--- a/library/HTMLPurifier/URIScheme/http.php
+++ b/library/HTMLPurifier/URIScheme/http.php
@@ -9,6 +9,7 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
var $default_port = 80;
var $browsable = true;
+ var $hierarchical = true;
function validate(&$uri, $config, &$context) {
parent::validate($uri, $config, $context);
diff --git a/tests/HTMLPurifier/URIDefinitionTest.php b/tests/HTMLPurifier/URIDefinitionTest.php
index cc782840..149f89b2 100644
--- a/tests/HTMLPurifier/URIDefinitionTest.php
+++ b/tests/HTMLPurifier/URIDefinitionTest.php
@@ -31,4 +31,29 @@ class HTMLPurifier_URIDefinitionTest extends HTMLPurifier_URIHarness
$this->assertFalse($def->filter($uri, $this->config, $this->context));
}
+ function test_setupMemberVariables_collisionPrecedenceIsHostBaseScheme() {
+ $this->config->set('URI', 'Host', $host = 'example.com');
+ $this->config->set('URI', 'Base', $base = 'http://sub.example.com/foo/bar.html');
+ $this->config->set('URI', 'DefaultScheme', 'ftp');
+ $def = new HTMLPurifier_URIDefinition();
+ $def->setupMemberVariables($this->config);
+ $this->assertIdentical($def->host, $host);
+ $this->assertIdentical($def->base, $this->createURI($base));
+ $this->assertIdentical($def->defaultScheme, 'http'); // not ftp!
+ }
+
+ function test_setupMemberVariables_onlyScheme() {
+ $this->config->set('URI', 'DefaultScheme', 'ftp');
+ $def = new HTMLPurifier_URIDefinition();
+ $def->setupMemberVariables($this->config);
+ $this->assertIdentical($def->defaultScheme, 'ftp');
+ }
+
+ function test_setupMemberVariables_onlyBase() {
+ $this->config->set('URI', 'Base', 'http://sub.example.com/foo/bar.html');
+ $def = new HTMLPurifier_URIDefinition();
+ $def->setupMemberVariables($this->config);
+ $this->assertIdentical($def->host, 'sub.example.com');
+ }
+
}
diff --git a/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php
index 4362cbd8..545e421b 100644
--- a/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php
+++ b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php
@@ -9,6 +9,7 @@ class HTMLPurifier_URIFilter_DisableExternalResourcesTest extends
function setUp() {
parent::setUp();
+ $this->filter = new HTMLPurifier_URIFilter_DisableExternalResources();
$var = true;
$this->context->register('EmbeddedURI', $var);
}
diff --git a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
new file mode 100644
index 00000000..d509a6a1
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
@@ -0,0 +1,122 @@
+filter = new HTMLPurifier_URIFilter_MakeAbsolute();
+ $this->setBase();
+ }
+
+ function setBase($base = 'http://example.com/foo/bar.html?q=s#frag') {
+ $this->config->set('URI', 'Base', $base);
+ }
+
+ // corresponding to RFC 2396
+
+ function testPreserveAbsolute() {
+ $this->assertFiltering('http://example.com/foo.html');
+ }
+
+ function testFilterBlank() {
+ $this->assertFiltering('', 'http://example.com/foo/bar.html?q=s');
+ }
+
+ function testFilterEmptyPath() {
+ $this->assertFiltering('?q=s#frag', 'http://example.com/foo/bar.html?q=s#frag');
+ }
+
+ function testPreserveAltScheme() {
+ $this->assertFiltering('mailto:bob@example.com');
+ }
+
+ function testFilterIgnoreHTTPSpecialCase() {
+ $this->assertFiltering('http:/', 'http://example.com/');
+ }
+
+ function testFilterAbsolutePath() {
+ $this->assertFiltering('/foo.txt', 'http://example.com/foo.txt');
+ }
+
+ function testFilterRelativePath() {
+ $this->assertFiltering('baz.txt', 'http://example.com/foo/baz.txt');
+ }
+
+ function testFilterRelativePathWithInternalDot() {
+ $this->assertFiltering('./baz.txt', 'http://example.com/foo/baz.txt');
+ }
+
+ function testFilterRelativePathWithEndingDot() {
+ $this->assertFiltering('baz/.', 'http://example.com/foo/baz/');
+ }
+
+ function testFilterRelativePathDot() {
+ $this->assertFiltering('.', 'http://example.com/foo/');
+ }
+
+ function testFilterRelativePathWithInternalDotDot() {
+ $this->assertFiltering('../baz.txt', 'http://example.com/baz.txt');
+ }
+
+ function testFilterRelativePathWithEndingDotDot() {
+ $this->assertFiltering('..', 'http://example.com/');
+ }
+
+ function testFilterRelativePathTooManyDotDots() {
+ $this->assertFiltering('../../', 'http://example.com/');
+ }
+
+ function testFilterAppendingQueryAndFragment() {
+ $this->assertFiltering('/foo.php?q=s#frag', 'http://example.com/foo.php?q=s#frag');
+ }
+
+ // edge cases below
+
+ function testFilterAbsolutePathBase() {
+ $this->setBase('/foo/baz.txt');
+ $this->assertFiltering('test.php', '/foo/test.php');
+ }
+
+ function testFilterAbsolutePathBaseDirectory() {
+ $this->setBase('/foo/');
+ $this->assertFiltering('test.php', '/foo/test.php');
+ }
+
+ function testFilterAbsolutePathBaseBelow() {
+ $this->setBase('/foo/baz.txt');
+ $this->assertFiltering('../../test.php', '/test.php');
+ }
+
+ function testFilterRelativePathBase() {
+ $this->setBase('foo/baz.html');
+ $this->assertFiltering('foo.php', 'foo/foo.php');
+ }
+
+ function testFilterRelativePathBaseBelow() {
+ $this->setBase('../baz.html');
+ $this->assertFiltering('test/strike.html', '../test/strike.html');
+ }
+
+ function testFilterRelativePathBaseWithAbsoluteURI() {
+ $this->setBase('../baz.html');
+ $this->assertFiltering('/test/strike.html');
+ }
+
+ function testFilterRelativePathBaseWithDot() {
+ $this->setBase('../baz.html');
+ $this->assertFiltering('.', '../');
+ }
+
+ // error case
+
+ function testErrorNoBase() {
+ $this->setBase(null);
+ $this->expectError('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration');
+ $this->assertFiltering('foo/bar.txt');
+ }
+
+}
diff --git a/tests/HTMLPurifier/URIHarness.php b/tests/HTMLPurifier/URIHarness.php
index bec80845..63e6d7d6 100644
--- a/tests/HTMLPurifier/URIHarness.php
+++ b/tests/HTMLPurifier/URIHarness.php
@@ -13,7 +13,7 @@ class HTMLPurifier_URIHarness extends HTMLPurifier_Harness
*/
function prepareURI(&$uri, &$expect_uri) {
$parser = new HTMLPurifier_URIParser();
- if ($expect_uri === true) $uri = $expect_uri;
+ if ($expect_uri === true) $expect_uri = $uri;
$uri = $parser->parse($uri);
if ($expect_uri !== false) {
$expect_uri = $parser->parse($expect_uri);
diff --git a/tests/test_files.php b/tests/test_files.php
index 44bc86f2..93766376 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -106,6 +106,7 @@ $test_files[] = 'HTMLPurifier/URIDefinitionTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/HostBlacklistTest.php';
+$test_files[] = 'HTMLPurifier/URIFilter/MakeAbsoluteTest.php';
$test_files[] = 'HTMLPurifier/URIParserTest.php';
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
$test_files[] = 'HTMLPurifier/URISchemeTest.php';