0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[2.1.0] Further refactoring of AttrDef_URI, creation of new URIFilter and URIDefinition subsystems.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1335 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-08-02 01:12:27 +00:00
parent 797b899305
commit 4919187fc6
20 changed files with 420 additions and 132 deletions

2
NEWS
View File

@ -54,6 +54,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Interface for URIScheme changed
. Generic URI object to hold components of URI added, most systems involved
in URI validation have been migrated to use it
. Custom filtering for URIs factored out to URIDefinition interface for
maximum extensibility
2.0.1, released 2007-06-27
! Tag auto-closing now based on a ChildDef heuristic rather than a

View File

@ -25,29 +25,6 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternal', false, 'bool',
'Disables links to external websites. This is a highly effective '.
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
'links or images outside of your domain will be allowed. Non-linkified '.
'URIs will still be preserved. If you want to be able to link to '.
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
'This directive has been available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternalResources', false, 'bool',
'Disables the embedding of external resources, preventing users from '.
'embedding things like images from other hosts. This prevents '.
'access tracking (good for email viewers), bandwidth leeching, '.
'cross-site request forging, goatse.cx posting, and '.
'other nasties, but also results in '.
'a loss of end-user functionality (they can\'t directly post a pic '.
'they posted from Flickr anymore). Use it if you don\'t have a '.
'robust user-content moderation team. This directive has been '.
'available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableResources', false, 'bool',
'Disables embedding resources, essentially meaning no pictures. You can '.
@ -117,18 +94,35 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$uri = $this->parser->parse($uri);
if ($uri === false) return false;
// generic validation
$context->register('EmbeddedURI', $this->embedsResource); // flag
$result = $uri->validate($config, $context);
$context->destroy('EmbeddedURI');
if (!$result) return false;
// add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource);
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) return false;
if ($this->embedsResource && !$scheme_obj->browsable) return false;
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) return false;
$ok = false;
do {
// generic validation
$result = $uri->validate($config, $context);
if (!$result) break;
// chained validation
$uri_def =& $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) break;
if ($this->embedsResource && !$scheme_obj->browsable) break;
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break;
// survived gauntlet
$ok = true;
} while (false);
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// back to string
$result = $uri->toString();

View File

@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ConfigSchema.php';
// member variables
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/CSSDefinition.php';
require_once 'HTMLPurifier/URIDefinition.php';
require_once 'HTMLPurifier/Doctype.php';
require_once 'HTMLPurifier/DefinitionCacheFactory.php';
@ -313,6 +314,8 @@ class HTMLPurifier_Config
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
} else {
trigger_error("Definition of $type type not supported");
$false = false;

View File

@ -1,6 +1,7 @@
<?php
require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIFilter.php';
/**
* HTML Purifier's internal representation of a URI
@ -50,19 +51,7 @@ class HTMLPurifier_URI
// validate host
if (!is_null($this->host)) {
// remove URI if it's absolute and we disabled externals or
// if it's absolute and embedded and we disabled external resources
unset($our_host); // ensure this variable is not set
if (
$config->get('URI', 'DisableExternal') ||
(
$config->get('URI', 'DisableExternalResources') &&
$context->get('EmbeddedURI', true) // suppress errors
)
) {
$our_host = $config->get('URI', 'Host');
if ($our_host === null) return false;
}
$host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context);
if ($this->host === false) $this->host = null;
@ -70,16 +59,6 @@ class HTMLPurifier_URI
// check host against blacklist
if ($this->checkBlacklist($this->host, $config, $context)) return false;
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $this->host));
// could be cached
$our_host_parts = array_reverse(explode('.', $our_host));
foreach ($our_host_parts as $i => $discard) {
if (!isset($host_parts[$i])) return false;
if ($host_parts[$i] != $our_host_parts[$i]) return false;
}
}
}
// munge scheme off if necessary

View File

@ -0,0 +1,53 @@
<?php
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/URIFilter.php';
require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details. This directive has been available
since 2.1.0.
</p>
');
class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
{
var $type = 'URI';
var $filters = array();
var $registeredFilters = array();
function HTMLPurifier_URIDefinition() {
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
}
function registerFilter($filter) {
$this->registeredFilters[$filter->name] = $filter;
}
function doSetup($config) {
foreach ($this->registeredFilters as $name => $filter) {
$conf = $config->get('URI', $name);
if ($conf !== false && $conf !== null) {
$this->filters[$name] = $filter;
}
}
foreach ($this->filters as $n => $x) $this->filters[$n]->prepare($config);
unset($this->registeredFilters);
}
function filter(&$uri, $config, &$context) {
foreach ($this->filters as $name => $x) {
$result = $this->filters[$name]->filter($uri, $config, $context);
if (!$result) return false;
}
return true;
}
}

View File

@ -0,0 +1,24 @@
<?php
/**
* Chainable filters for custom URI processing
*/
class HTMLPurifier_URIFilter
{
var $name;
/**
* Performs initialization for the filter
*/
function prepare($config) {}
/**
* Filter a URI object
* @param &$uri Reference to URI object
* @param $config Instance of HTMLPurifier_Config
* @param &$context Instance of HTMLPurifier_Context
*/
function filter(&$uri, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}
}

View File

@ -0,0 +1,34 @@
<?php
require_once 'HTMLPurifier/URIFilter.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternal', false, 'bool',
'Disables links to external websites. This is a highly effective '.
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
'links or images outside of your domain will be allowed. Non-linkified '.
'URIs will still be preserved. If you want to be able to link to '.
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
'This directive has been available since 1.2.0.'
);
class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
{
var $name = 'DisableExternal';
var $ourHostParts = false;
function prepare($config) {
$our_host = $config->get('URI', 'Host');
if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
}
function filter(&$uri, $config, &$context) {
if (is_null($uri->host)) return true;
if ($this->ourHostParts === false) return false;
$host_parts = array_reverse(explode('.', $uri->host));
foreach ($this->ourHostParts as $i => $x) {
if (!isset($host_parts[$i])) return false;
if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
}
return true;
}
}

View File

@ -0,0 +1,26 @@
<?php
require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternalResources', false, 'bool',
'Disables the embedding of external resources, preventing users from '.
'embedding things like images from other hosts. This prevents '.
'access tracking (good for email viewers), bandwidth leeching, '.
'cross-site request forging, goatse.cx posting, and '.
'other nasties, but also results in '.
'a loss of end-user functionality (they can\'t directly post a pic '.
'they posted from Flickr anymore). Use it if you don\'t have a '.
'robust user-content moderation team. This directive has been '.
'available since 1.3.0.'
);
class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
{
var $name = 'DisableExternalResources';
function filter(&$uri, $config, &$context) {
if (!$context->get('EmbeddedURI', true)) return true;
return parent::filter($uri, $config, $context);
}
}

View File

@ -16,7 +16,8 @@ require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php');
$config = HTMLPurifier_Config::createDefault();
$names = array('HTML', 'CSS', 'Test');
//$names = array('HTML', 'CSS', 'URI', 'Test');
$names = array('URI');
foreach ($names as $name) {
echo " - Flushing $name\n";
$cache = new HTMLPurifier_DefinitionCache_Serializer($name);

View File

@ -48,6 +48,64 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$this->assertDef('javascript:foobar();', false);
}
function test_validate_configDisableExternal() {
$this->def = new HTMLPurifier_AttrDef_URI();
$this->config->set('URI', 'DisableExternal', true);
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertDef('/foobar.txt');
$this->assertDef('http://google.com/', false);
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('http://example.com/teehee', false);
$this->assertDef('http://www.example.com/#man', false);
$this->assertDef('http://go.sub.example.com/perhaps?p=foo');
}
function test_validate_configDisableExternalResources() {
$this->config->set('URI', 'DisableExternalResources', true);
$this->assertDef('http://sub.example.com/alas?foo=asd');
$this->assertDef('/img.png');
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
$this->assertDef('/img.png');
}
function test_validate_configBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertDef('foo.txt');
$this->assertDef('http://www.google.com/example.com/moo');
$this->assertDef('http://example.com/#23', false);
$this->assertDef('https://sub.domain.example.com/foobar', false);
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
}
/*
function test_validate_configWhitelist() {
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertDef('http://example.com/fo/google.com', false);
$this->assertDef('server.txt');
$this->assertDef('ftp://www.google.com/?t=a');
$this->assertDef('http://google.com.tricky.spamsite.net', false);
}
*/
}

View File

@ -1,5 +1,7 @@
<?php
require_once 'HTMLPurifier/URIParser.php';
/**
* All-use harness, use this rather than SimpleTest's
*/
@ -12,18 +14,46 @@ class HTMLPurifier_Harness extends UnitTestCase
var $config, $context;
/**
* Generates easily accessible default config/context
*/
function setUp() {
list($this->config, $this->context) = $this->createCommon();
}
/**
* Accepts config and context and prepares them into a valid state
* @param &$config Reference to config variable
* @param &$context Reference to context variable
*/
function prepareCommon(&$config, &$context) {
$config = HTMLPurifier_Config::create($config);
if (!$context) $context = new HTMLPurifier_Context();
}
/**
* Generates default configuration and context objects
* @return Defaults in form of array($config, $context)
*/
function createCommon() {
return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
}
/**
* If $expect is false, ignore $result and check if status failed.
* Otherwise, check if $status if true and $result === $expect.
* @param $status Boolean status
* @param $result Mixed result from processing
* @param $expect Mixed expectation for result
*/
function assertEitherFailOrIdentical($status, $result, $expect) {
if ($expect === false) {
$this->assertFalse($status, 'Expected false result, got true');
} else {
$this->assertTrue($status, 'Expected true result, got false');
$this->assertIdentical($result, $expect);
}
}
}

View File

@ -0,0 +1,34 @@
<?php
require_once 'HTMLPurifier/URIHarness.php';
require_once 'HTMLPurifier/URIDefinition.php';
class HTMLPurifier_URIDefinitionTest extends HTMLPurifier_URIHarness
{
function createFilterMock($expect = true, $result = true) {
generate_mock_once('HTMLPurifier_URIFilter');
$mock = new HTMLPurifier_URIFilterMock();
if ($expect) $mock->expectOnce('filter');
else $mock->expectNever('filter');
$mock->setReturnValue('filter', $result);
return $mock;
}
function test_filter() {
$def = new HTMLPurifier_URIDefinition();
$def->filters[] = $this->createFilterMock();
$def->filters[] = $this->createFilterMock();
$uri = $this->createURI('test');
$this->assertTrue($def->filter($uri, $this->config, $this->context));
}
function test_filter_earlyAbortIfFail() {
$def = new HTMLPurifier_URIDefinition();
$def->filters[] = $this->createFilterMock(true, false);
$def->filters[] = $this->createFilterMock(false); // never called
$uri = $this->createURI('test');
$this->assertFalse($def->filter($uri, $this->config, $this->context));
}
}

View File

@ -0,0 +1,23 @@
<?php
require_once 'HTMLPurifier/URIFilter/DisableExternalTest.php';
require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
class HTMLPurifier_URIFilter_DisableExternalResourcesTest extends
HTMLPurifier_URIFilter_DisableExternalTest
{
function setUp() {
parent::setUp();
$var = true;
$this->context->register('EmbeddedURI', $var);
}
function testPreserveWhenNotEmbedded() {
$this->context->destroy('EmbeddedURI'); // undo setUp
$this->assertFiltering(
'http://example.com'
);
}
}

View File

@ -0,0 +1,47 @@
<?php
require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
require_once 'HTMLPurifier/URIFilterHarness.php';
class HTMLPurifier_URIFilter_DisableExternalTest extends HTMLPurifier_URIFilterHarness
{
function setUp() {
parent::setUp();
$this->filter = new HTMLPurifier_URIFilter_DisableExternal();
}
function testRemoveExternal() {
$this->assertFiltering(
'http://example.com', false
);
}
function testPreserveInternal() {
$this->assertFiltering(
'/foo/bar'
);
}
function testPreserveOurHost() {
$this->config->set('URI', 'Host', 'example.com');
$this->assertFiltering(
'http://example.com'
);
}
function testPreserveOurSubdomain() {
$this->config->set('URI', 'Host', 'example.com');
$this->assertFiltering(
'http://www.example.com'
);
}
function testRemoveSuperdomain() {
$this->config->set('URI', 'Host', 'www.example.com');
$this->assertFiltering(
'http://example.com', false
);
}
}

View File

@ -0,0 +1,15 @@
<?php
require_once 'HTMLPurifier/URIHarness.php';
class HTMLPurifier_URIFilterHarness extends HTMLPurifier_URIHarness
{
function assertFiltering($uri, $expect_uri = true) {
$this->prepareURI($uri, $expect_uri);
$this->filter->prepare($this->config, $this->context);
$result = $this->filter->filter($uri, $this->config, $this->context);
$this->assertEitherFailOrIdentical($result, $uri, $expect_uri);
}
}

View File

@ -0,0 +1,31 @@
<?php
require_once 'HTMLPurifier/URIParser.php';
class HTMLPurifier_URIHarness extends HTMLPurifier_Harness
{
/**
* Prepares two URIs into object form
* @param &$uri Reference to string input URI
* @param &$expect_uri Reference to string expectation URI
* @note If $expect_uri is false, it will stay false
*/
function prepareURI(&$uri, &$expect_uri) {
$parser = new HTMLPurifier_URIParser();
if ($expect_uri === true) $uri = $expect_uri;
$uri = $parser->parse($uri);
if ($expect_uri !== false) {
$expect_uri = $parser->parse($expect_uri);
}
}
/**
* Generates a URI object from the corresponding string
*/
function createURI($uri) {
$parser = new HTMLPurifier_URIParser();
return $parser->parse($uri);
}
}

View File

@ -1,7 +1,6 @@
<?php
require_once 'HTMLPurifier/URI.php';
require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
@ -16,25 +15,15 @@ require_once 'HTMLPurifier/URIScheme/nntp.php';
// WARNING: All the URI schemes are far to relaxed, we need to tighten
// the checks.
class HTMLPurifier_URISchemeTest extends HTMLPurifier_Harness
class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
{
function assertValidation($uri, $expect_uri = true) {
$parser = new HTMLPurifier_URIParser();
if ($expect_uri === true) $uri = $expect_uri;
$uri = $parser->parse($uri);
if ($expect_uri !== false) {
$expect_uri = $parser->parse($expect_uri);
}
$this->prepareURI($uri, $expect_uri);
// convenience hack: the scheme should be explicitly specified
$scheme = $uri->getSchemeObj($this->config, $this->context);
$result = $scheme->validate($uri, $this->config, $this->context);
if ($expect_uri !== false) {
$this->assertTrue($result);
$this->assertIdentical($uri, $expect_uri);
} else {
$this->assertFalse($result);
}
$this->assertEitherFailOrIdentical($result, $uri, $expect_uri);
}
function test_http_regular() {

View File

@ -3,7 +3,7 @@
require_once 'HTMLPurifier/URI.php';
require_once 'HTMLPurifier/URIParser.php';
class HTMLPurifier_URITest extends HTMLPurifier_Harness
class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
{
function createURI($uri) {
@ -179,62 +179,4 @@ class HTMLPurifier_URITest extends HTMLPurifier_Harness
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', '');
}
function test_validate_configDisableExternal() {
$this->def = new HTMLPurifier_AttrDef_URI();
$this->config->set('URI', 'DisableExternal', true);
$this->config->set('URI', 'Host', 'sub.example.com');
$this->assertValidation('/foobar.txt');
$this->assertValidation('http://google.com/', false);
$this->assertValidation('http://sub.example.com/alas?foo=asd');
$this->assertValidation('http://example.com/teehee', false);
$this->assertValidation('http://www.example.com/#man', false);
$this->assertValidation('http://go.sub.example.com/perhaps?p=foo');
}
function test_validate_configDisableExternalResources() {
$this->config->set('URI', 'DisableExternalResources', true);
$this->assertValidation('http://sub.example.com/alas?foo=asd');
$this->assertValidation('/img.png');
$embeds = true; // passed by reference
$this->context->register('EmbeddedURI', $embeds);
$this->assertValidation('http://sub.example.com/alas?foo=asd', false);
$this->assertValidation('/img.png');
}
function test_validate_configBlacklist() {
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
$this->assertValidation('foo.txt');
$this->assertValidation('http://www.google.com/example.com/moo');
$this->assertValidation('http://example.com/#23', false);
$this->assertValidation('https://sub.domain.example.com/foobar', false);
$this->assertValidation('http://example.com.example.net/?whoo=foo', false);
$this->assertValidation('ftp://moo-moo.net/foo/foo/', false);
}
/*
function test_validate_configWhitelist() {
$this->config->set('URI', 'HostPolicy', 'DenyAll');
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
$this->assertValidation('http://example.com/fo/google.com', false);
$this->assertValidation('server.txt');
$this->assertValidation('ftp://www.google.com/?t=a');
$this->assertValidation('http://google.com.tricky.spamsite.net', false);
}
*/
}

View File

@ -21,7 +21,6 @@ require_once $simpletest_location . 'unit_tester.php';
require_once $simpletest_location . 'reporter.php';
require_once $simpletest_location . 'mock_objects.php';
require_once 'HTMLPurifier/SimpleTest/Reporter.php';
require_once 'HTMLPurifier/Harness.php';
// load Debugger
require_once 'Debugger.php';
@ -47,6 +46,7 @@ if (isset($_GET['standalone']) || (isset($argv[1]) && $argv[1] == 'standalone'))
} else {
require_once '../library/HTMLPurifier.auto.php';
}
require_once 'HTMLPurifier/Harness.php';
// setup special DefinitionCacheFactory decorator
$factory =& HTMLPurifier_DefinitionCacheFactory::instance();

View File

@ -102,6 +102,9 @@ $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
$test_files[] = 'HTMLPurifier/TagTransformTest.php';
$test_files[] = 'HTMLPurifier/TokenTest.php';
$test_files[] = 'HTMLPurifier/URIDefinitionTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';
$test_files[] = 'HTMLPurifier/URIParserTest.php';
$test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
$test_files[] = 'HTMLPurifier/URISchemeTest.php';