From 82afd890c47c3d80a4ab2d95ec6783af29a7e699 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 17 Nov 2006 23:09:10 +0000 Subject: [PATCH] [1.2.0] Non-accessible resources (ex. mailto) blocked from embedded URIs (img src) git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@528 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 1 + TODO | 2 -- library/HTMLPurifier/AttrDef/URI.php | 13 ++++++++++++- library/HTMLPurifier/HTMLDefinition.php | 4 +++- library/HTMLPurifier/URIScheme.php | 7 +++++++ library/HTMLPurifier/URIScheme/ftp.php | 1 + library/HTMLPurifier/URIScheme/http.php | 1 + library/HTMLPurifier/URIScheme/mailto.php | 2 ++ library/HTMLPurifier/URIScheme/news.php | 2 ++ library/HTMLPurifier/URIScheme/nntp.php | 1 + tests/HTMLPurifier/AttrDef/URITest.php | 10 ++++++++++ .../Strategy/ValidateAttributesTest.php | 6 ++++++ 12 files changed, 46 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index dd6ea426..92c172df 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Configuration documentation now has table of contents ! Added %URI.DisableExternal, which prevents links to external websites. You can also use %URI.Host to permit absolute linking to subdomains +! Non-accessible resources (ex. mailto) blocked from embedded URIs (img src) - Documentation updated + TODO added request Phalanger + TODO added request Native compression diff --git a/TODO b/TODO index f19688f8..154487f7 100644 --- a/TODO +++ b/TODO @@ -5,8 +5,6 @@ TODO List - Make URI validation routines tighter (especially mailto) - More extensive URI filtering schemes (see URI in config-ideas.txt) - Allow for background-image and list-style-image (see above) - - Distinguish between different types of URIs, for instance, a mailto URI - in IMG SRC is nonsensical - Error logging for filtering/cleanup procedures - Rich set* methods and config file loaders for HTMLPurifier_Config diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 1547a0e6..4c8f96e3 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -43,10 +43,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef var $host; var $PercentEncoder; + var $embeds; - function HTMLPurifier_AttrDef_URI() { + /** + * @param $embeds Does the URI here result in an extra HTTP request? + */ + function HTMLPurifier_AttrDef_URI($embeds = false) { $this->host = new HTMLPurifier_AttrDef_Host(); $this->PercentEncoder = new HTMLPurifier_PercentEncoder(); + $this->embeds = (bool) $embeds; } function validate($uri, $config, &$context) { @@ -100,6 +105,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef } + // the URI we're processing embeds a resource in the page, but the URI + // it references cannot be located + if ($this->embeds && !$scheme_obj->browsable) { + return false; + } + if ($authority !== null) { diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 096b919f..44e16dbc 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -351,12 +351,14 @@ class HTMLPurifier_HTMLDefinition $e_URI = new HTMLPurifier_AttrDef_URI(); $this->info['a']->attr['href'] = $this->info['img']->attr['longdesc'] = - $this->info['img']->attr['src'] = $this->info['del']->attr['cite'] = $this->info['ins']->attr['cite'] = $this->info['blockquote']->attr['cite'] = $this->info['q']->attr['cite'] = $e_URI; + // URI that causes HTTP request + $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true); + ////////////////////////////////////////////////////////////////////// // info_tag_transform : transformations of tags diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php index 945e847e..20a9781b 100644 --- a/library/HTMLPurifier/URIScheme.php +++ b/library/HTMLPurifier/URIScheme.php @@ -12,6 +12,13 @@ class HTMLPurifier_URIScheme */ var $default_port = null; + /** + * Whether or not URIs of this schem are locatable by a browser + * http and ftp are accessible, while mailto and news are not. + * @public + */ + var $browsable = false; + /** * Validates the components of a URI * @note This implementation should be called by children if they define diff --git a/library/HTMLPurifier/URIScheme/ftp.php b/library/HTMLPurifier/URIScheme/ftp.php index 16ad097c..dab9c981 100644 --- a/library/HTMLPurifier/URIScheme/ftp.php +++ b/library/HTMLPurifier/URIScheme/ftp.php @@ -8,6 +8,7 @@ require_once 'HTMLPurifier/URIScheme.php'; class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { var $default_port = 21; + var $browsable = true; // usually function validateComponents( $userinfo, $host, $port, $path, $query, $config, &$context diff --git a/library/HTMLPurifier/URIScheme/http.php b/library/HTMLPurifier/URIScheme/http.php index b036fe66..54b250da 100644 --- a/library/HTMLPurifier/URIScheme/http.php +++ b/library/HTMLPurifier/URIScheme/http.php @@ -8,6 +8,7 @@ require_once 'HTMLPurifier/URIScheme.php'; class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme { var $default_port = 80; + var $browsable = true; function validateComponents( $userinfo, $host, $port, $path, $query, $config, &$context diff --git a/library/HTMLPurifier/URIScheme/mailto.php b/library/HTMLPurifier/URIScheme/mailto.php index 6558d17e..2292072e 100644 --- a/library/HTMLPurifier/URIScheme/mailto.php +++ b/library/HTMLPurifier/URIScheme/mailto.php @@ -13,6 +13,8 @@ require_once 'HTMLPurifier/URIScheme.php'; class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme { + var $browsable = false; + function validateComponents( $userinfo, $host, $port, $path, $query, $config, &$context ) { diff --git a/library/HTMLPurifier/URIScheme/news.php b/library/HTMLPurifier/URIScheme/news.php index f14a7228..c9d1c2b0 100644 --- a/library/HTMLPurifier/URIScheme/news.php +++ b/library/HTMLPurifier/URIScheme/news.php @@ -7,6 +7,8 @@ require_once 'HTMLPurifier/URIScheme.php'; */ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme { + var $browsable = false; + function validateComponents( $userinfo, $host, $port, $path, $query, $config, &$context ) { diff --git a/library/HTMLPurifier/URIScheme/nntp.php b/library/HTMLPurifier/URIScheme/nntp.php index 4b935b32..49fca4c3 100644 --- a/library/HTMLPurifier/URIScheme/nntp.php +++ b/library/HTMLPurifier/URIScheme/nntp.php @@ -8,6 +8,7 @@ require_once 'HTMLPurifier/URIScheme.php'; class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme { var $default_port = 119; + var $browsable = false; function validateComponents( $userinfo, $host, $port, $path, $query, $config, &$context diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index daf03152..1ad1f283 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -261,6 +261,16 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } + function testEmbeds() { + + // embedded URI + $this->def = new HTMLPurifier_AttrDef_URI(true); + + $this->assertDef('http://sub.example.com/alas?foo=asd'); + $this->assertDef('mailto:foo@example.com', false); + + } + } ?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 5f499706..0297425f 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -163,6 +163,12 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends '' ); + // mailto in image is not allowed + $this->assertResult( + '', + 'Invalid image' + ); + } }