mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-20 12:31:53 +00:00
[1.2.0]
- Update TODO . Add another possible plaintext formatter . Reference config-ideas.txt for URI options - Update code-quality.txt, removing issues that have been addressed and updating time for post-beta - Update config-ideas.txt . Added more possible URI directives . Removed silly language control directive - Improved documentation on Class, CSS and Host git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@524 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
2df5896324
commit
d48f9b6b21
4
NEWS
4
NEWS
@ -19,7 +19,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
+ TODO added request Phalanger
|
+ TODO added request Phalanger
|
||||||
+ TODO added request Native compression
|
+ TODO added request Native compression
|
||||||
+ TODO added request Remove redundant tags
|
+ TODO added request Remove redundant tags
|
||||||
|
+ TODO added possible plaintext formatter for HTML Purifier documentation
|
||||||
+ Updated ConfigDoc TODO
|
+ Updated ConfigDoc TODO
|
||||||
|
+ Updated code-quality.txt, removing issues that have been resolved
|
||||||
|
+ Improved inline comments in AttrDef/Class.php, AttrDef/CSS.php
|
||||||
|
and AttrDef/Host.php
|
||||||
. Switched to purify()-wide Context object registry
|
. Switched to purify()-wide Context object registry
|
||||||
. Refactored unit tests to minimize duplication
|
. Refactored unit tests to minimize duplication
|
||||||
. XSS attack sheet updated
|
. XSS attack sheet updated
|
||||||
|
4
TODO
4
TODO
@ -3,7 +3,7 @@ TODO List
|
|||||||
|
|
||||||
1.2 release
|
1.2 release
|
||||||
- Make URI validation routines tighter (especially mailto)
|
- Make URI validation routines tighter (especially mailto)
|
||||||
- More extensive URI filtering schemes
|
- More extensive URI filtering schemes (see URI in config-ideas.txt)
|
||||||
- Allow for background-image and list-style-image (see above)
|
- Allow for background-image and list-style-image (see above)
|
||||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||||
in IMG SRC is nonsensical
|
in IMG SRC is nonsensical
|
||||||
@ -29,6 +29,8 @@ TODO List
|
|||||||
shouldn't be paragraphed, such as lists and tables).
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
- Linkify URLs
|
- Linkify URLs
|
||||||
- Smileys
|
- Smileys
|
||||||
|
- Linkification for HTML Purifier docs: notably configuration and
|
||||||
|
class names
|
||||||
|
|
||||||
3.0 release
|
3.0 release
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms
|
- Extended HTML capabilities based on namespacing and tag transforms
|
||||||
|
@ -4,11 +4,8 @@ Code Quality Issues
|
|||||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||||
while I can't list mistakes in here, I can list prototype-like segments
|
while I can't list mistakes in here, I can list prototype-like segments
|
||||||
of code that should be aggressively refactored after the beta is released.
|
of code that should be aggressively refactored. This does not list
|
||||||
This does not list optimization issues, that needs to be done after intense
|
optimization issues, that needs to be done after intense profiling.
|
||||||
profiling.
|
|
||||||
|
|
||||||
Here we go:
|
|
||||||
|
|
||||||
AttrDef
|
AttrDef
|
||||||
Class - doesn't support Unicode characters (fringe); uses regular
|
Class - doesn't support Unicode characters (fringe); uses regular
|
||||||
@ -16,12 +13,10 @@ AttrDef
|
|||||||
Lang - code duplication; premature optimization; doesn't consult official
|
Lang - code duplication; premature optimization; doesn't consult official
|
||||||
lists (fringe)
|
lists (fringe)
|
||||||
Length - easily mistaken for CSSLength
|
Length - easily mistaken for CSSLength
|
||||||
URI - multiple regular expressions; needs host validation routines factored
|
URI - multiple regular expressions; missing validation for query,
|
||||||
out for mailto scheme; missing validation for query; fragment and path,
|
fragment and path
|
||||||
no percent-encode fixing
|
|
||||||
CSS - parser doesn't accept advanced CSS (fringe)
|
CSS - parser doesn't accept advanced CSS (fringe)
|
||||||
Number - constructor interface is inconsistent with Integer
|
Number - constructor interface inconsistent with Integer
|
||||||
AttrTransform - doesn't accept AttrContext
|
|
||||||
Config - "load configuration" hooks missing, rich set* accessors missing
|
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||||
ConfigSchema - redefinition is a mess
|
ConfigSchema - redefinition is a mess
|
||||||
Strategy
|
Strategy
|
||||||
@ -31,8 +26,7 @@ Strategy
|
|||||||
might be efficient).
|
might be efficient).
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
URIScheme - needs to have callable generic checks
|
URIScheme - needs to have callable generic checks
|
||||||
ftp - missing typecode check
|
mailto - doesn't validate emails, doesn't validate querystring
|
||||||
mailto - doesn't validate emails
|
|
||||||
news - doesn't validate opaque path
|
news - doesn't validate opaque path
|
||||||
nntp - doesn't constrain path
|
nntp - doesn't constrain path
|
||||||
EOL
|
EOL
|
||||||
|
@ -17,24 +17,17 @@ time. Note the naming convention: %Namespace.Directive
|
|||||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||||
%Attr.ClassWhitelist.
|
%Attr.ClassWhitelist.
|
||||||
|
|
||||||
%Attr.LangAlphaOnly - designate whether or not to allow numerals in language
|
|
||||||
code subtags
|
|
||||||
* RFC 1766, the current standard referenced by XML, does not permit
|
|
||||||
numbers, but,
|
|
||||||
* RFC 3066, the superseding best practice standard since January 2001,
|
|
||||||
permits them.
|
|
||||||
We allow numbers by default, but you generally never see them
|
|
||||||
at all, which makes this a little more sane.
|
|
||||||
|
|
||||||
%Attr.MaxWidth,
|
%Attr.MaxWidth,
|
||||||
%Attr.MaxHeight - caps for width and height related checks.
|
%Attr.MaxHeight - caps for width and height related checks.
|
||||||
(a hack in Pixels for an image crashing attack could be replaced by this)
|
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||||
|
|
||||||
%URI.Munge - will munge all URIs to a different URI, which should redirect
|
%URI.Munge - will munge all external URIs to a different URI, which redirects
|
||||||
the user to the applicable page. A urlencoded version of the URI
|
the user to the applicable page. A urlencoded version of the URI
|
||||||
will replace any instances of %s in the string. One possible
|
will replace any instances of %s in the string. One possible
|
||||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||||
pagerank from being sent to other sites
|
pagerank from being sent to other sites, but can also be used to
|
||||||
|
redirect to a splash page notifying user that they are leaving your
|
||||||
|
website.
|
||||||
|
|
||||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||||
spread of ill-gotten pagerank
|
spread of ill-gotten pagerank
|
||||||
@ -49,7 +42,16 @@ time. Note the naming convention: %Namespace.Directive
|
|||||||
'DenyAll' or 'AllowAll' (default)
|
'DenyAll' or 'AllowAll' (default)
|
||||||
|
|
||||||
%URI.DisableIPHosts - URIs that have IP addresses for hosts are disallowed.
|
%URI.DisableIPHosts - URIs that have IP addresses for hosts are disallowed.
|
||||||
Be sure to also grab unusual encodings (dword, hex and octal)
|
Be sure to also grab unusual encodings (dword, hex and octal), which may
|
||||||
|
be currently be caught by regular DNS
|
||||||
|
%URI.DisableAbsoluteDNS - Remove extra dots after host names that trigger
|
||||||
|
absolute DNS. While this is actually the preferred method according to
|
||||||
|
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||||
|
%URI.DisableIDN - Disallow raw internationalized domain names. Punycode
|
||||||
|
will still be permitted.
|
||||||
|
|
||||||
|
%URI.ConvertUnusualIPHosts - transform dword/hex/octal IP addresses to the
|
||||||
|
regular form
|
||||||
|
|
||||||
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
||||||
in immediate requests, such as src in IMG) to external websites
|
in immediate requests, such as src in IMG) to external websites
|
||||||
|
@ -43,6 +43,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||||||
$propvalues[$property] = $result;
|
$propvalues[$property] = $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// procedure does not write the new CSS simultaneously, so it's
|
||||||
// slightly inefficient, but it's the only way of getting rid of
|
// slightly inefficient, but it's the only way of getting rid of
|
||||||
// duplicates. Perhaps config to optimize it, but not now.
|
// duplicates. Perhaps config to optimize it, but not now.
|
||||||
|
|
||||||
|
@ -24,13 +24,14 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
|||||||
// and plus it would complicate optimization efforts (you never
|
// and plus it would complicate optimization efforts (you never
|
||||||
// see that anyway).
|
// see that anyway).
|
||||||
$matches = array();
|
$matches = array();
|
||||||
$pattern = '/(?:(?<=\s)|\A)'.
|
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||||
'(?:(?=\s)|\z)/';
|
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||||
preg_match_all($pattern, $string, $matches);
|
preg_match_all($pattern, $string, $matches);
|
||||||
|
|
||||||
if (empty($matches[1])) return false;
|
if (empty($matches[1])) return false;
|
||||||
|
|
||||||
|
// reconstruct class string
|
||||||
$new_string = '';
|
$new_string = '';
|
||||||
foreach ($matches[1] as $class_names) {
|
foreach ($matches[1] as $class_names) {
|
||||||
$new_string .= $class_names . ' ';
|
$new_string .= $class_names . ' ';
|
||||||
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
|||||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a host according to the IPv4, IPv6 and DNS specifications.
|
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
@ -35,6 +35,8 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
|||||||
if ($valid === false) return false;
|
if ($valid === false) return false;
|
||||||
return '['. $valid . ']';
|
return '['. $valid . ']';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// need to do checks on unusual encodings too
|
||||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||||
if ($ipv4 !== false) return $ipv4;
|
if ($ipv4 !== false) return $ipv4;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user