mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-03 05:11:52 +00:00
[3.1.0] Revamp URI handling of percent encoding and validation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1709 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
77ce3e8b4a
commit
cb5d5d0648
2
NEWS
2
NEWS
@ -32,6 +32,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
! Commas, not dashes, used for serializer IDs. This change is forwards-compatible
|
! Commas, not dashes, used for serializer IDs. This change is forwards-compatible
|
||||||
and allows for version numbers like "3.1.0-dev".
|
and allows for version numbers like "3.1.0-dev".
|
||||||
! %HTML.Allowed deals gracefully with whitespace anywhere, anytime!
|
! %HTML.Allowed deals gracefully with whitespace anywhere, anytime!
|
||||||
|
! HTML Purifier's URI handling is a lot more robust, with much stricter
|
||||||
|
validation checks and better percent encoding handling.
|
||||||
- InterchangeBuilder now alphabetizes its lists
|
- InterchangeBuilder now alphabetizes its lists
|
||||||
- Validation error in configdoc output fixed
|
- Validation error in configdoc output fixed
|
||||||
- Iconv and other encoding errors muted even with custom error handlers that
|
- Iconv and other encoding errors muted even with custom error handlers that
|
||||||
|
3
TODO
3
TODO
@ -11,6 +11,8 @@ If no interest is expressed for a feature that may require a considerable
|
|||||||
amount of effort to implement, it may get endlessly delayed. Do not be
|
amount of effort to implement, it may get endlessly delayed. Do not be
|
||||||
afraid to cast your vote for the next feature to be implemented!
|
afraid to cast your vote for the next feature to be implemented!
|
||||||
|
|
||||||
|
- Implement validation for query and for fragment
|
||||||
|
|
||||||
FUTURE VERSIONS
|
FUTURE VERSIONS
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
@ -47,6 +49,7 @@ FUTURE VERSIONS
|
|||||||
AttrDef class). Probably will use CSSTidy class?
|
AttrDef class). Probably will use CSSTidy class?
|
||||||
# More control over allowed CSS properties using a modularization
|
# More control over allowed CSS properties using a modularization
|
||||||
# HTML 5 support
|
# HTML 5 support
|
||||||
|
# IRI support
|
||||||
- Standardize token armor for all areas of processing
|
- Standardize token armor for all areas of processing
|
||||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||||
Also, enable disabling of directionality
|
Also, enable disabling of directionality
|
||||||
|
@ -215,12 +215,12 @@
|
|||||||
</directive>
|
</directive>
|
||||||
<directive id="URI.Disable">
|
<directive id="URI.Disable">
|
||||||
<file name="HTMLPurifier/AttrDef/URI.php">
|
<file name="HTMLPurifier/AttrDef/URI.php">
|
||||||
<line>24</line>
|
<line>23</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
<directive id="URI.Munge">
|
<directive id="URI.Munge">
|
||||||
<file name="HTMLPurifier/AttrDef/URI.php">
|
<file name="HTMLPurifier/AttrDef/URI.php">
|
||||||
<line>78</line>
|
<line>68</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
<directive id="Core.ColorKeywords">
|
<directive id="Core.ColorKeywords">
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
protected $parser, $percentEncoder;
|
protected $parser;
|
||||||
protected $embedsResource;
|
protected $embedsResource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -15,7 +15,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
public function __construct($embeds_resource = false) {
|
public function __construct($embeds_resource = false) {
|
||||||
$this->parser = new HTMLPurifier_URIParser();
|
$this->parser = new HTMLPurifier_URIParser();
|
||||||
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
|
||||||
$this->embedsResource = (bool) $embeds_resource;
|
$this->embedsResource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,9 +22,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
if ($config->get('URI', 'Disable')) return false;
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// initial operations
|
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
$uri = $this->percentEncoder->normalize($uri);
|
|
||||||
|
|
||||||
// parse the URI
|
// parse the URI
|
||||||
$uri = $this->parser->parse($uri);
|
$uri = $this->parser->parse($uri);
|
||||||
@ -61,13 +58,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$context->destroy('EmbeddedURI');
|
$context->destroy('EmbeddedURI');
|
||||||
if (!$ok) return false;
|
if (!$ok) return false;
|
||||||
|
|
||||||
// munge scheme off if necessary (this must be last)
|
|
||||||
if (!is_null($uri->scheme) && is_null($uri->host)) {
|
|
||||||
if ($uri_def->defaultScheme == $uri->scheme) {
|
|
||||||
$uri->scheme = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// back to string
|
// back to string
|
||||||
$result = $uri->toString();
|
$result = $uri->toString();
|
||||||
|
|
||||||
|
@ -36,11 +36,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||||
if ($ipv4 !== false) return $ipv4;
|
if ($ipv4 !== false) return $ipv4;
|
||||||
|
|
||||||
// validate a domain name here, do filtering, etc etc etc
|
// A regular domain name.
|
||||||
|
|
||||||
// We could use this, but it would break I18N domain names
|
// This breaks I18N domain names, but we don't have proper IRI support,
|
||||||
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
|
// so force users to insert Punycode. If there's complaining we'll
|
||||||
//if (!$match) return false;
|
// try to fix things into an international friendly form.
|
||||||
|
|
||||||
|
// The productions describing this are:
|
||||||
|
$a = '[a-z]'; // alpha
|
||||||
|
$an = '[a-z0-9]'; // alphanum
|
||||||
|
$and = '[a-z0-9-]'; // alphanum | "-"
|
||||||
|
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
||||||
|
$domainlabel = "$an($and*$an)?";
|
||||||
|
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||||
|
$toplabel = "$a($and*$an)?";
|
||||||
|
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||||
|
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
||||||
|
if (!$match) return false;
|
||||||
|
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,68 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that handles operations involving percent-encoding in URIs.
|
* Class that handles operations involving percent-encoding in URIs.
|
||||||
|
*
|
||||||
|
* @warning
|
||||||
|
* Be careful when reusing instances of PercentEncoder. The object
|
||||||
|
* you use for normalize() SHOULD NOT be used for encode(), or
|
||||||
|
* vice-versa.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_PercentEncoder
|
class HTMLPurifier_PercentEncoder
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fix up percent-encoding by decoding unreserved characters and normalizing
|
* Reserved characters to preserve when using encode().
|
||||||
|
*/
|
||||||
|
protected $preserve = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String of characters that should be preserved while using encode().
|
||||||
|
*/
|
||||||
|
public function __construct($preserve = false) {
|
||||||
|
// unreserved letters, ought to const-ify
|
||||||
|
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
|
||||||
|
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
|
||||||
|
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
|
||||||
|
$this->preserve[45] = true; // Dash -
|
||||||
|
$this->preserve[46] = true; // Period .
|
||||||
|
$this->preserve[95] = true; // Underscore _
|
||||||
|
$this->preserve[126]= true; // Tilde ~
|
||||||
|
|
||||||
|
// extra letters not to escape
|
||||||
|
if ($preserve !== false) {
|
||||||
|
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
|
||||||
|
$this->preserve[ord($preserve[$i])] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Our replacement for urlencode, it encodes all non-reserved characters,
|
||||||
|
* as well as any extra characters that were instructed to be preserved.
|
||||||
|
* @note
|
||||||
|
* Assumes that the string has already been normalized, making any
|
||||||
|
* and all percent escape sequences valid. Percents will not be
|
||||||
|
* re-escaped, regardless of their status in $preserve
|
||||||
|
* @param $string String to be encoded
|
||||||
|
* @return Encoded string.
|
||||||
|
*/
|
||||||
|
public function encode($string) {
|
||||||
|
$ret = '';
|
||||||
|
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
||||||
|
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
|
||||||
|
$ret .= '%' . sprintf('%02X', $int);
|
||||||
|
} else {
|
||||||
|
$ret .= $string[$i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix up percent-encoding by decoding unreserved characters and normalizing.
|
||||||
|
* @warning This function is affected by $preserve, even though the
|
||||||
|
* usual desired behavior is for this not to preserve those
|
||||||
|
* characters. Be careful when reusing instances of PercentEncoder!
|
||||||
* @param $string String to normalize
|
* @param $string String to normalize
|
||||||
*/
|
*/
|
||||||
public function normalize($string) {
|
public function normalize($string) {
|
||||||
@ -27,12 +83,7 @@ class HTMLPurifier_PercentEncoder
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$int = hexdec($encoding);
|
$int = hexdec($encoding);
|
||||||
if (
|
if (isset($this->preserve[$int])) {
|
||||||
($int >= 48 && $int <= 57) || // digits
|
|
||||||
($int >= 65 && $int <= 90) || // uppercase letters
|
|
||||||
($int >= 97 && $int <= 122) || // lowercase letters
|
|
||||||
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
|
|
||||||
) {
|
|
||||||
$ret .= chr($int) . $text;
|
$ret .= chr($int) . $text;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,12 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HTML Purifier's internal representation of a URI
|
* HTML Purifier's internal representation of a URI.
|
||||||
|
* @note
|
||||||
|
* Internal data-structures are completely escaped. If the data needs
|
||||||
|
* to be used in a non-URI context (which is very unlikely), be sure
|
||||||
|
* to decode it first. The URI may not necessarily be well-formed until
|
||||||
|
* validate() is called.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URI
|
class HTMLPurifier_URI
|
||||||
{
|
{
|
||||||
@ -49,13 +54,27 @@ class HTMLPurifier_URI
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic validation method applicable for all schemes
|
* Generic validation method applicable for all schemes. May modify
|
||||||
|
* this URI in order to get it into a compliant form.
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
* @param $context Instance of HTMLPurifier_Context
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
* @return True if validation/filtering succeeds, false if failure
|
* @return True if validation/filtering succeeds, false if failure
|
||||||
*/
|
*/
|
||||||
public function validate($config, $context) {
|
public function validate($config, $context) {
|
||||||
|
|
||||||
|
// ABNF definitions from RFC 3986
|
||||||
|
$chars_sub_delims = '!$&\'()*+,;=';
|
||||||
|
$chars_gen_delims = ':/?#[]@';
|
||||||
|
$chars_pchar = $chars_sub_delims . ':@';
|
||||||
|
|
||||||
|
// validate scheme (MUST BE FIRST!)
|
||||||
|
if (!is_null($this->scheme) && is_null($this->host)) {
|
||||||
|
$def = $config->getDefinition('URI');
|
||||||
|
if ($def->defaultScheme === $this->scheme) {
|
||||||
|
$this->scheme = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// validate host
|
// validate host
|
||||||
if (!is_null($this->host)) {
|
if (!is_null($this->host)) {
|
||||||
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
||||||
@ -63,18 +82,51 @@ class HTMLPurifier_URI
|
|||||||
if ($this->host === false) $this->host = null;
|
if ($this->host === false) $this->host = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validate username
|
||||||
|
if (!is_null($this->userinfo)) {
|
||||||
|
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
|
||||||
|
$this->userinfo = $encoder->encode($this->userinfo);
|
||||||
|
}
|
||||||
|
|
||||||
// validate port
|
// validate port
|
||||||
if (!is_null($this->port)) {
|
if (!is_null($this->port)) {
|
||||||
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// query and fragment are quite simple in terms of definition:
|
// validate path
|
||||||
// *( pchar / "/" / "?" ), so define their validation routines
|
$path_parts = array();
|
||||||
// when we start fixing percent encoding
|
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
|
||||||
|
if (!is_null($this->host)) {
|
||||||
// path gets to be validated against a hodge-podge of rules depending
|
// path-abempty (hier and relative)
|
||||||
// on the status of authority and scheme, but it's not that important,
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
// esp. since it won't be applicable to everyone
|
} elseif ($this->path !== '' && $this->path[0] === '/') {
|
||||||
|
// path-absolute (hier and relative)
|
||||||
|
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
|
||||||
|
// This shouldn't ever happen!
|
||||||
|
$this->path = '';
|
||||||
|
} else {
|
||||||
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
|
}
|
||||||
|
} elseif (!is_null($this->scheme) && $this->path !== '') {
|
||||||
|
// path-rootless (hier)
|
||||||
|
// Short circuit evaluation means we don't need to check nz
|
||||||
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
|
} elseif (is_null($this->scheme) && $this->path !== '') {
|
||||||
|
// path-noscheme (relative)
|
||||||
|
// (once again, not checking nz)
|
||||||
|
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
|
||||||
|
$c = strpos($this->path, '/');
|
||||||
|
if ($c !== false) {
|
||||||
|
$this->path =
|
||||||
|
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
|
||||||
|
$segments_encoder->encode(substr($this->path, $c));
|
||||||
|
} else {
|
||||||
|
$this->path = $segment_nc_encoder->encode($this->path);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// path-empty (hier and relative)
|
||||||
|
$this->path = ''; // just to be safe
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -2,24 +2,39 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a URI into the components and fragment identifier as specified
|
* Parses a URI into the components and fragment identifier as specified
|
||||||
* by RFC 2396.
|
* by RFC 3986.
|
||||||
* @todo Replace regexps with a native PHP parser
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIParser
|
class HTMLPurifier_URIParser
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a URI
|
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
|
||||||
|
*/
|
||||||
|
protected $percentEncoder;
|
||||||
|
|
||||||
|
public function __construct() {
|
||||||
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a URI.
|
||||||
* @param $uri string URI to parse
|
* @param $uri string URI to parse
|
||||||
* @return HTMLPurifier_URI representation of URI
|
* @return HTMLPurifier_URI representation of URI. This representation has
|
||||||
|
* not been validated yet and may not conform to RFC.
|
||||||
*/
|
*/
|
||||||
public function parse($uri) {
|
public function parse($uri) {
|
||||||
|
|
||||||
|
$uri = $this->percentEncoder->normalize($uri);
|
||||||
|
|
||||||
|
// Regexp is as per Appendix B.
|
||||||
|
// Note that ["<>] are an addition to the RFC's recommended
|
||||||
|
// characters, because they represent external delimeters.
|
||||||
$r_URI = '!'.
|
$r_URI = '!'.
|
||||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
'(([^:/?#"<>]+):)?'. // 2. Scheme
|
||||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
'(//([^/?#"<>]*))?'. // 4. Authority
|
||||||
'([^?#<>\'"]*)'. // 5. Path
|
'([^?#"<>]*)'. // 5. Path
|
||||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
'(\?([^#"<>]*))?'. // 7. Query
|
||||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
'(#([^"<>]*))?'. // 8. Fragment
|
||||||
'!';
|
'!';
|
||||||
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
@ -36,13 +51,7 @@ class HTMLPurifier_URIParser
|
|||||||
|
|
||||||
// further parse authority
|
// further parse authority
|
||||||
if ($authority !== null) {
|
if ($authority !== null) {
|
||||||
// ridiculously inefficient: it's a stacked regex!
|
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||||
$HEXDIG = '[A-Fa-f0-9]';
|
|
||||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
|
||||||
$sub_delims = '!$&\'()'; // needs []
|
|
||||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
|
||||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
|
||||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
preg_match($r_authority, $authority, $matches);
|
preg_match($r_authority, $authority, $matches);
|
||||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
||||||
|
@ -14,6 +14,27 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef('124.15.6.89'); // IPv4
|
$this->assertDef('124.15.6.89'); // IPv4
|
||||||
$this->assertDef('www.google.com'); // reg-name
|
$this->assertDef('www.google.com'); // reg-name
|
||||||
|
|
||||||
|
// more domain name tests
|
||||||
|
$this->assertDef('test.');
|
||||||
|
$this->assertDef('sub.test.');
|
||||||
|
$this->assertDef('.test', false);
|
||||||
|
$this->assertDef('ff');
|
||||||
|
$this->assertDef('1f', false);
|
||||||
|
$this->assertDef('-f', false);
|
||||||
|
$this->assertDef('f1');
|
||||||
|
$this->assertDef('f-', false);
|
||||||
|
$this->assertDef('sub.ff');
|
||||||
|
$this->assertDef('sub.1f', false);
|
||||||
|
$this->assertDef('sub.-f', false);
|
||||||
|
$this->assertDef('sub.f1');
|
||||||
|
$this->assertDef('sub.f-', false);
|
||||||
|
$this->assertDef('ff.top');
|
||||||
|
$this->assertDef('1f.top');
|
||||||
|
$this->assertDef('-f.top', false);
|
||||||
|
$this->assertDef('ff.top');
|
||||||
|
$this->assertDef('f1.top');
|
||||||
|
$this->assertDef('f-.top', false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,19 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testPercentEncoding() {
|
||||||
|
$this->assertDef(
|
||||||
|
'http:colon:mercenary',
|
||||||
|
'colon%3Amercenary'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPercentEncodingPreserve() {
|
||||||
|
$this->assertDef(
|
||||||
|
'http://www.example.com/abcABC123-_.!~*()\''
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function testEmbeds() {
|
function testEmbeds() {
|
||||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
||||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||||
|
@ -35,5 +35,28 @@ class HTMLPurifier_PercentEncoderTest extends HTMLPurifier_Harness
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function assertEncode($string, $expect = true, $preserve = false) {
|
||||||
|
if ($expect === true) $expect = $string;
|
||||||
|
$encoder = new HTMLPurifier_PercentEncoder($preserve);
|
||||||
|
$result = $encoder->encode($string);
|
||||||
|
$this->assertIdentical($result, $expect);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_encode_noChange() {
|
||||||
|
$this->assertEncode('abc012-_~.');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_encode_encode() {
|
||||||
|
$this->assertEncode('>', '%3E');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_encode_preserve() {
|
||||||
|
$this->assertEncode('<>', '<%3E', '<');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_encode_low() {
|
||||||
|
$this->assertEncode("\1", '%01');
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,13 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
|||||||
$this->assertEqual($result, $expect);
|
$this->assertEqual($result, $expect);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testPercentNormalization() {
|
||||||
|
$this->assertParsing(
|
||||||
|
'%G',
|
||||||
|
null, null, null, null, '%25G', null, null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function testRegular() {
|
function testRegular() {
|
||||||
$this->assertParsing(
|
$this->assertParsing(
|
||||||
'http://www.example.com/webhp?q=foo#result2',
|
'http://www.example.com/webhp?q=foo#result2',
|
||||||
@ -121,7 +128,7 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
|
|||||||
|
|
||||||
function testMalformedTag() {
|
function testMalformedTag() {
|
||||||
$this->assertParsing(
|
$this->assertParsing(
|
||||||
'http://www.example.com/\'>"',
|
'http://www.example.com/>',
|
||||||
'http', null, 'www.example.com', null, '/', null, null
|
'http', null, 'www.example.com', null, '/', null, null
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -160,4 +160,32 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
|
|||||||
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:');
|
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_validate_removeRedundantScheme() {
|
||||||
|
$this->assertValidation('http:foo:/:', 'foo%3A/:');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_username() {
|
||||||
|
$this->assertValidation("http://user\xE3\x91\x94:@foo.com", 'http://user%E3%91%94:@foo.com');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_path_abempty() {
|
||||||
|
$this->assertValidation("http://host/\xE3\x91\x94:", 'http://host/%E3%91%94:');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_path_absolute() {
|
||||||
|
$this->assertValidation("/\xE3\x91\x94:", '/%E3%91%94:');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_path_rootless() {
|
||||||
|
$this->assertValidation("mailto:\xE3\x91\x94:", 'mailto:%E3%91%94:');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_path_noscheme() {
|
||||||
|
$this->assertValidation("\xE3\x91\x94", '%E3%91%94');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_validate_path_empty() {
|
||||||
|
$this->assertValidation('http://google.com');
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user