mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-24 01:01:53 +00:00
12b811d749
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
174 lines
6.3 KiB
PHP
174 lines
6.3 KiB
PHP
<?php
|
|
|
|
/**
|
|
* HTML Purifier's internal representation of a URI.
|
|
* @note
|
|
* Internal data-structures are completely escaped. If the data needs
|
|
* to be used in a non-URI context (which is very unlikely), be sure
|
|
* to decode it first. The URI may not necessarily be well-formed until
|
|
* validate() is called.
|
|
*/
|
|
class HTMLPurifier_URI
|
|
{
|
|
|
|
public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
|
|
|
/**
|
|
* @note Automatically normalizes scheme and port
|
|
*/
|
|
public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
|
|
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
|
$this->userinfo = $userinfo;
|
|
$this->host = $host;
|
|
$this->port = is_null($port) ? $port : (int) $port;
|
|
$this->path = $path;
|
|
$this->query = $query;
|
|
$this->fragment = $fragment;
|
|
}
|
|
|
|
/**
|
|
* Retrieves a scheme object corresponding to the URI's scheme/default
|
|
* @param $config Instance of HTMLPurifier_Config
|
|
* @param $context Instance of HTMLPurifier_Context
|
|
* @return Scheme object appropriate for validating this URI
|
|
*/
|
|
public function getSchemeObj($config, $context) {
|
|
$registry = HTMLPurifier_URISchemeRegistry::instance();
|
|
if ($this->scheme !== null) {
|
|
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
|
|
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
|
} else {
|
|
// no scheme: retrieve the default one
|
|
$def = $config->getDefinition('URI');
|
|
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
|
|
if (!$scheme_obj) {
|
|
// something funky happened to the default scheme object
|
|
trigger_error(
|
|
'Default scheme object "' . $def->defaultScheme . '" was not readable',
|
|
E_USER_WARNING
|
|
);
|
|
return false;
|
|
}
|
|
}
|
|
return $scheme_obj;
|
|
}
|
|
|
|
/**
|
|
* Generic validation method applicable for all schemes. May modify
|
|
* this URI in order to get it into a compliant form.
|
|
* @param $config Instance of HTMLPurifier_Config
|
|
* @param $context Instance of HTMLPurifier_Context
|
|
* @return True if validation/filtering succeeds, false if failure
|
|
*/
|
|
public function validate($config, $context) {
|
|
|
|
// ABNF definitions from RFC 3986
|
|
$chars_sub_delims = '!$&\'()*+,;=';
|
|
$chars_gen_delims = ':/?#[]@';
|
|
$chars_pchar = $chars_sub_delims . ':@';
|
|
|
|
// validate scheme (MUST BE FIRST!)
|
|
if (!is_null($this->scheme) && is_null($this->host)) {
|
|
$def = $config->getDefinition('URI');
|
|
if ($def->defaultScheme === $this->scheme) {
|
|
$this->scheme = null;
|
|
}
|
|
}
|
|
|
|
// validate host
|
|
if (!is_null($this->host)) {
|
|
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
|
$this->host = $host_def->validate($this->host, $config, $context);
|
|
if ($this->host === false) $this->host = null;
|
|
}
|
|
|
|
// validate username
|
|
if (!is_null($this->userinfo)) {
|
|
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
|
|
$this->userinfo = $encoder->encode($this->userinfo);
|
|
}
|
|
|
|
// validate port
|
|
if (!is_null($this->port)) {
|
|
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
|
}
|
|
|
|
// validate path
|
|
$path_parts = array();
|
|
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
|
|
if (!is_null($this->host)) {
|
|
// path-abempty (hier and relative)
|
|
$this->path = $segments_encoder->encode($this->path);
|
|
} elseif ($this->path !== '' && $this->path[0] === '/') {
|
|
// path-absolute (hier and relative)
|
|
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
|
|
// This shouldn't ever happen!
|
|
$this->path = '';
|
|
} else {
|
|
$this->path = $segments_encoder->encode($this->path);
|
|
}
|
|
} elseif (!is_null($this->scheme) && $this->path !== '') {
|
|
// path-rootless (hier)
|
|
// Short circuit evaluation means we don't need to check nz
|
|
$this->path = $segments_encoder->encode($this->path);
|
|
} elseif (is_null($this->scheme) && $this->path !== '') {
|
|
// path-noscheme (relative)
|
|
// (once again, not checking nz)
|
|
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
|
|
$c = strpos($this->path, '/');
|
|
if ($c !== false) {
|
|
$this->path =
|
|
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
|
|
$segments_encoder->encode(substr($this->path, $c));
|
|
} else {
|
|
$this->path = $segment_nc_encoder->encode($this->path);
|
|
}
|
|
} else {
|
|
// path-empty (hier and relative)
|
|
$this->path = ''; // just to be safe
|
|
}
|
|
|
|
// qf = query and fragment
|
|
$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
|
|
|
|
if (!is_null($this->query)) {
|
|
$this->query = $qf_encoder->encode($this->query);
|
|
}
|
|
|
|
if (!is_null($this->fragment)) {
|
|
$this->fragment = $qf_encoder->encode($this->fragment);
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/**
|
|
* Convert URI back to string
|
|
* @return String URI appropriate for output
|
|
*/
|
|
public function toString() {
|
|
// reconstruct authority
|
|
$authority = null;
|
|
if (!is_null($this->host)) {
|
|
$authority = '';
|
|
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
|
|
$authority .= $this->host;
|
|
if(!is_null($this->port)) $authority .= ':' . $this->port;
|
|
}
|
|
|
|
// reconstruct the result
|
|
$result = '';
|
|
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
|
|
if (!is_null($authority)) $result .= '//' . $authority;
|
|
$result .= $this->path;
|
|
if (!is_null($this->query)) $result .= '?' . $this->query;
|
|
if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
|
|
|
|
return $result;
|
|
}
|
|
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|