mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
[1.3.0] More control of URIs granted
# Invalid images are now removed, rather than replaced with a dud <img src="" alt="Invalid image" />. Previous behavior can be restored with new directive %Core.RemoveInvalidImg set to false. ! New directives %URI.DisableExternalResources and %URI.DisableResources ! New directive %Attr.DisableURI, which eliminates all hyperlinking - Missing "Available since" documentation added git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@575 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
61b6ee7183
commit
49cb2a4a7c
5
NEWS
5
NEWS
@ -11,6 +11,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
|
||||
1.3.0, unknown release date
|
||||
(major feature release)
|
||||
# Invalid images are now removed, rather than replaced with a dud
|
||||
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||
with new directive %Core.RemoveInvalidImg set to false.
|
||||
! (X)HTML Strict now supported
|
||||
+ Transparently handles inline elements in block context (blockquote)
|
||||
! Added GET method to demo for easier validation, added 50kb max input size
|
||||
@ -19,6 +22,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
|
||||
users narrow the set of allowed tags
|
||||
! <li value="4"> and <ul start="2"> now allowed in loose mode
|
||||
! New directives %URI.DisableExternalResources and %URI.DisableResources
|
||||
! New directive %Attr.DisableURI, which eliminates all hyperlinking
|
||||
- Added missing type to ChildDef_Chameleon
|
||||
- Remove Tidy option from demo if there is not Tidy available
|
||||
. ChildDef_Required guards against empty tags
|
||||
|
54
TODO
54
TODO
@ -1,41 +1,51 @@
|
||||
|
||||
TODO List
|
||||
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? At-risk
|
||||
==========================
|
||||
|
||||
1.3 release
|
||||
- Enable strict-compliant (X)HTML output
|
||||
- Requires to some extent 2.0 formatters to save elements in blockquote
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
- Error logging for filtering/cleanup procedures
|
||||
- Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
- Caching of everything
|
||||
- Configuration profiles: sets of directives that get set with one func call
|
||||
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Allow for background-image and list-style-image (intrinsically tied to above)
|
||||
- Aggressive caching
|
||||
- Pretty-printer of *Definition, allowing users to see at a glance what is
|
||||
allowed and what isn't
|
||||
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
? Configuration profiles: sets of directives that get set with one func call
|
||||
? ConfigSchema directive aliases (so we can rename some of them)
|
||||
? URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
|
||||
1.4 release
|
||||
- Add various "levels" of cleaning
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
|
||||
1.5 release
|
||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
|
||||
1.5 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||
1.6 release
|
||||
# Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms (BIG!)
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
|
||||
2.0 release
|
||||
- Formatters for plaintext
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and
|
||||
class names
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
@ -43,7 +53,11 @@ TODO List
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- Drupal
|
||||
- WordPress
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
@ -54,7 +68,7 @@ Unknown release (on a scratch-an-itch basis)
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
- Info on how to embed YouTube videos (and related content) without patches
|
||||
- Docs on how to embed YouTube videos (and friends) without patches
|
||||
|
||||
Encoding workarounds
|
||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||
|
@ -53,7 +53,3 @@ time. Note the naming convention: %Namespace.Directive
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
||||
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
||||
in immediate requests, such as src in IMG) to external websites
|
||||
|
||||
%HTML.DisableImg - disables all images
|
||||
|
@ -24,7 +24,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::Define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
@ -34,6 +34,26 @@ HTMLPurifier_ConfigSchema::Define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@ -43,15 +63,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds;
|
||||
var $embeds_resource;
|
||||
|
||||
/**
|
||||
* @param $embeds Does the URI here result in an extra HTTP request?
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds = false) {
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds = (bool) $embeds;
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
@ -105,18 +125,25 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds a resource in the page, but the URI
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds && !$scheme_obj->browsable) {
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disallow externals
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
unset($our_host);
|
||||
if ($config->get('URI', 'DisableExternal')) {
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
|
@ -43,7 +43,8 @@ HTMLPurifier_ConfigSchema::define(
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Strict', false, 'bool',
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets.'
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@ -53,14 +54,16 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||
'<code><p></code> tags can be replaced '.
|
||||
'with whatever you desire, as long as it is a block level element.'
|
||||
'with whatever you desire, as long as it is a block level element. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Parent', 'div', 'string',
|
||||
'String name of element that HTML fragment passed to library will be '.
|
||||
'inserted in. An interesting variation would be using span as the '.
|
||||
'parent element, meaning that only inline tags would be allowed.'
|
||||
'parent element, meaning that only inline tags would be allowed. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@ -72,7 +75,8 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'supported in the first place (like embed). If you change this, you '.
|
||||
'probably also want to change %HTML.AllowedAttributes. '.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override.'
|
||||
'elements here, <em>that</em> directive will win and override. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@ -84,7 +88,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'elements here, <em>that</em> directive will win and override. For '.
|
||||
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
|
||||
'directive. You must set that directive to true before you can use '.
|
||||
'IDs at all.'
|
||||
'IDs at all. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DisableURI', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
@ -444,16 +455,18 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['td']->attr['colspan'] =
|
||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
||||
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
if (!$config->get('Attr', 'DisableURI')) {
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
}
|
||||
|
||||
if (!$this->strict) {
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
|
@ -5,6 +5,14 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'RemoveInvalidImg', true, 'bool',
|
||||
'This directive enables pre-emptive URI checking in <code>img</code> '.
|
||||
'tags, as the attribute validation strategy is not authorized to '.
|
||||
'remove elements from the document. This directive has been available '.
|
||||
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Removes all unrecognized tags from the list of tokens.
|
||||
*
|
||||
@ -25,7 +33,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if (!empty( $token->is_tag )) {
|
||||
// DEFINITION CALL
|
||||
if (isset($definition->info[$token->name])) {
|
||||
// leave untouched
|
||||
// leave untouched, except for a few special cases:
|
||||
|
||||
// hard-coded image special case, pre-emptively drop
|
||||
// if not available. Probably not abstract-able
|
||||
if ( $token->name == 'img' ) {
|
||||
if (!isset($token->attr['src'])) continue;
|
||||
if (!isset($definition->info['img']->attr['src'])) {
|
||||
continue;
|
||||
}
|
||||
$token->attr['src'] =
|
||||
$definition->
|
||||
info['img']->
|
||||
attr['src']->
|
||||
validate($token->attr['src']);
|
||||
if ($token->attr['src'] === false) continue;
|
||||
}
|
||||
|
||||
} elseif (
|
||||
isset($definition->info_tag_transform[$token->name])
|
||||
) {
|
||||
|
@ -271,6 +271,20 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
}
|
||||
|
||||
function testDisableExternalResources() {
|
||||
|
||||
$this->config->set('URI', 'DisableExternalResources', true);
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||
$this->assertDef('/img.png');
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
|
||||
$this->assertDef('/img.png');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -42,6 +42,12 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
|
||||
' Warning!</span>'
|
||||
);
|
||||
|
||||
// test removal of img tag
|
||||
$this->assertResult(
|
||||
'<img />',
|
||||
''
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -125,6 +125,9 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
|
||||
);
|
||||
|
||||
// test required attributes for img
|
||||
|
||||
// (this should never happen, as RemoveForeignElements
|
||||
// should have removed the offending image tag)
|
||||
$this->assertResult(
|
||||
'<img />',
|
||||
'<img src="" alt="Invalid image" />'
|
||||
|
@ -8,6 +8,10 @@ class HTMLPurifier_Test extends UnitTestCase
|
||||
{
|
||||
var $purifier;
|
||||
|
||||
function setUp() {
|
||||
$this->purifier = new HTMLPurifier();
|
||||
}
|
||||
|
||||
function assertPurification($input, $expect = null) {
|
||||
if ($expect === null) $expect = $input;
|
||||
$result = $this->purifier->purify($input);
|
||||
@ -15,7 +19,6 @@ class HTMLPurifier_Test extends UnitTestCase
|
||||
}
|
||||
|
||||
function testNull() {
|
||||
$this->purifier = new HTMLPurifier();
|
||||
$this->assertPurification("Null byte\0", "Null byte");
|
||||
}
|
||||
|
||||
@ -53,6 +56,19 @@ class HTMLPurifier_Test extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function testDisableURI() {
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Attr', 'DisableURI', true);
|
||||
$this->purifier = new HTMLPurifier($config);
|
||||
|
||||
$this->assertPurification(
|
||||
'<img src="foobar"/>',
|
||||
''
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Loading…
Reference in New Issue
Block a user