mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 15:28:40 +00:00
fac747bdbd
With minor corrections. Signed-off-by: Marcus Bointon <marcus@synchromedia.co.uk> Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
72 lines
2.2 KiB
PHP
72 lines
2.2 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Parses a URI into the components and fragment identifier as specified
|
|
* by RFC 3986.
|
|
*/
|
|
class HTMLPurifier_URIParser
|
|
{
|
|
|
|
/**
|
|
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
|
|
*/
|
|
protected $percentEncoder;
|
|
|
|
public function __construct()
|
|
{
|
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
|
}
|
|
|
|
/**
|
|
* Parses a URI.
|
|
* @param $uri string URI to parse
|
|
* @return HTMLPurifier_URI representation of URI. This representation has
|
|
* not been validated yet and may not conform to RFC.
|
|
*/
|
|
public function parse($uri)
|
|
{
|
|
$uri = $this->percentEncoder->normalize($uri);
|
|
|
|
// Regexp is as per Appendix B.
|
|
// Note that ["<>] are an addition to the RFC's recommended
|
|
// characters, because they represent external delimeters.
|
|
$r_URI = '!'.
|
|
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
|
|
'(//([^/?#"<>]*))?'. // 4. Authority
|
|
'([^?#"<>]*)'. // 5. Path
|
|
'(\?([^#"<>]*))?'. // 7. Query
|
|
'(#([^"<>]*))?'. // 8. Fragment
|
|
'!';
|
|
|
|
$matches = array();
|
|
$result = preg_match($r_URI, $uri, $matches);
|
|
|
|
if (!$result) return false; // *really* invalid URI
|
|
|
|
// seperate out parts
|
|
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
|
$authority = !empty($matches[3]) ? $matches[4] : null;
|
|
$path = $matches[5]; // always present, can be empty
|
|
$query = !empty($matches[6]) ? $matches[7] : null;
|
|
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
|
|
|
// further parse authority
|
|
if ($authority !== null) {
|
|
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
$matches = array();
|
|
preg_match($r_authority, $authority, $matches);
|
|
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
|
$host = !empty($matches[3]) ? $matches[3] : '';
|
|
$port = !empty($matches[4]) ? (int) $matches[5] : null;
|
|
} else {
|
|
$port = $host = $userinfo = null;
|
|
}
|
|
|
|
return new HTMLPurifier_URI(
|
|
$scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
|
}
|
|
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|