mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-08 15:11:51 +00:00
Commit IPv6 fix, with majoring factoring out. Thank you Feyd!
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@284 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
cd0108d656
commit
1cadb08fbb
41
library/HTMLPurifier/AttrDef/Host.php
Normal file
41
library/HTMLPurifier/AttrDef/Host.php
Normal file
@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $ipv4, $ipv6;
|
||||
|
||||
function HTMLPurifier_AttrDef_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$length = strlen($string);
|
||||
if ($string === '') return '';
|
||||
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
|
||||
//IPv6
|
||||
$ip = substr($string, 1, $length - 2);
|
||||
$valid = $this->ipv6->validate($ip, $config, $context);
|
||||
if ($valid === false) return false;
|
||||
return '['. $valid . ']';
|
||||
}
|
||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||
if ($ipv4 !== false) return $ipv4;
|
||||
|
||||
// validate a domain name here, do filtering, etc etc etc
|
||||
|
||||
// We could use this, but it would break I18N domain names
|
||||
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
|
||||
//if (!$match) return false;
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
31
library/HTMLPurifier/AttrDef/IPv4.php
Normal file
31
library/HTMLPurifier/AttrDef/IPv4.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// spliced from Feyd's IPv6 function (pd)
|
||||
|
||||
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// regex is public so that IPv6 can reuse it
|
||||
var $ip4;
|
||||
|
||||
function HTMLPurifier_AttrDef_IPv4() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
|
||||
{
|
||||
return $aIP;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
98
library/HTMLPurifier/AttrDef/IPv6.php
Normal file
98
library/HTMLPurifier/AttrDef/IPv6.php
Normal file
@ -0,0 +1,98 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
|
||||
// IPv6 by Feyd, source is in public domain
|
||||
|
||||
// note that this expects the brackets to be removed from IPv6 addresses
|
||||
// extends from the IPv4 impl. so we can borrow its regex
|
||||
|
||||
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
|
||||
{
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
$original = $aIP;
|
||||
|
||||
$hex = '[0-9a-fA-F]';
|
||||
$blk = '(?:' . $hex . '{1,4})';
|
||||
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
|
||||
|
||||
// prefix check
|
||||
if (strpos($aIP, '/') !== false)
|
||||
{
|
||||
if (preg_match('#' . $pre . '$#s', $aIP, $find))
|
||||
{
|
||||
$aIP = substr($aIP, 0, 0-strlen($find[0]));
|
||||
unset($find);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// IPv4-compatiblity check
|
||||
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
|
||||
{
|
||||
$aIP = substr($aIP, 0, 0-strlen($find[0]));
|
||||
$ip = explode('.', $find[0]);
|
||||
$ip = array_map('dechex', $ip);
|
||||
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
|
||||
unset($find, $ip);
|
||||
}
|
||||
|
||||
// compression check
|
||||
$aIP = explode('::', $aIP);
|
||||
$c = count($aIP);
|
||||
if ($c > 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
elseif ($c == 2)
|
||||
{
|
||||
list($first, $second) = $aIP;
|
||||
$first = explode(':', $first);
|
||||
$second = explode(':', $second);
|
||||
|
||||
if (count($first) + count($second) > 8)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
while(count($first) < 8)
|
||||
{
|
||||
array_push($first, '0');
|
||||
}
|
||||
|
||||
array_splice($first, 8 - count($second), 8, $second);
|
||||
$aIP = $first;
|
||||
unset($first,$second);
|
||||
}
|
||||
else
|
||||
{
|
||||
$aIP = explode(':', $aIP[0]);
|
||||
}
|
||||
$c = count($aIP);
|
||||
|
||||
if ($c != 8)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// All the pieces should be 16-bit hex strings. Are they?
|
||||
foreach ($aIP as $piece)
|
||||
{
|
||||
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return $original;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -1,7 +1,9 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
'URI', 'DefaultScheme', 'http',
|
||||
@ -12,6 +14,12 @@ HTMLPurifier_ConfigDef::define(
|
||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $host;
|
||||
|
||||
function HTMLPurifier_AttrDef_URI() {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
|
||||
// We'll write stack-based parsers later, for now, use regexps to
|
||||
@ -63,34 +71,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// define regexps
|
||||
// this stuff may need to be factored out so Email can get to it
|
||||
|
||||
$HEXDIG = '[A-Fa-f0-9]';
|
||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||
$sub_delims = '!$&\'()'; // needs []
|
||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
||||
$h16 = "{$HEXDIG}{1,4}";
|
||||
$dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
|
||||
$IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
|
||||
$ls32 = "(?:$h16:$h16|$IPv4address)";
|
||||
$IPvFuture = "v$HEXDIG+\.[:$unreserved$sub_delims]+";
|
||||
$IPv6Address = "(?:".
|
||||
"(?:$h16:){6}$ls32" .
|
||||
"|::(?:$h16:){5}$ls32" .
|
||||
"|(?:$h16)?::(?:$h16:){4}$ls32" .
|
||||
"|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
|
||||
"|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
|
||||
"|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
|
||||
"|(?:(?:$h16:){4}$h16)?::$ls32" .
|
||||
"|(?:(?:$h16:){5}$h16)?::$h16" .
|
||||
"|(?:(?:$h16:){6}$h16)?::" .
|
||||
")";
|
||||
$IP_literal = "\[(?:$IPvFuture|$IPv6Address)\]";
|
||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
||||
|
||||
// IPv6 is broken
|
||||
$r_authority = "/^(($r_userinfo)@)?(\[$IP_literal\]|[^:]*)(:(\d*))?/";
|
||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||
$matches = array();
|
||||
preg_match($r_authority, $authority, $matches);
|
||||
// overloads regexp!
|
||||
@ -104,6 +90,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
if ($port < 1 || $port > 65535) $port = null;
|
||||
}
|
||||
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
// userinfo and host are validated within the regexp
|
||||
|
||||
} else {
|
||||
|
24
tests/HTMLPurifier/AttrDef/HostTest.php
Normal file
24
tests/HTMLPurifier/AttrDef/HostTest.php
Normal file
@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
|
||||
// takes a URI formatted host and validates it
|
||||
|
||||
|
||||
class HTMLPurifier_AttrDef_HostTest extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_Host();
|
||||
|
||||
$this->assertDef('[2001:DB8:0:0:8:800:200C:417A]'); // IPv6
|
||||
$this->assertDef('124.15.6.89'); // IPv4
|
||||
$this->assertDef('www.google.com'); // reg-name
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
26
tests/HTMLPurifier/AttrDef/IPv4Test.php
Normal file
26
tests/HTMLPurifier/AttrDef/IPv4Test.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
|
||||
// IPv4 test case is spliced from Feyd's IPv6 implementation
|
||||
// we ought to disallow non-routable addresses
|
||||
|
||||
class HTMLPurifier_AttrDef_IPv4Test extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_IPv4();
|
||||
|
||||
$this->assertDef('127.0.0.1'); // standard IPv4, loopback, non-routable
|
||||
$this->assertDef('0.0.0.0'); // standard IPv4, unspecified, non-routable
|
||||
$this->assertDef('255.255.255.255'); // standard IPv4
|
||||
|
||||
$this->assertDef('300.0.0.0', false); // standard IPv4, out of range
|
||||
$this->assertDef('124.15.6.89/60', false); // standard IPv4, prefix not allowed
|
||||
|
||||
$this->assertDef('', false); // nothing
|
||||
|
||||
}
|
||||
}
|
46
tests/HTMLPurifier/AttrDef/IPv6Test.php
Normal file
46
tests/HTMLPurifier/AttrDef/IPv6Test.php
Normal file
@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||
|
||||
// test case is from Feyd's IPv6 implementation
|
||||
// we ought to disallow non-routable addresses
|
||||
|
||||
class HTMLPurifier_AttrDef_IPv6Test extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_IPv6();
|
||||
|
||||
$this->assertDef('2001:DB8:0:0:8:800:200C:417A'); // unicast, full
|
||||
$this->assertDef('FF01:0:0:0:0:0:0:101'); // multicast, full
|
||||
$this->assertDef('0:0:0:0:0:0:0:1'); // loopback, full
|
||||
$this->assertDef('0:0:0:0:0:0:0:0'); // unspecified, full
|
||||
$this->assertDef('2001:DB8::8:800:200C:417A'); // unicast, compressed
|
||||
$this->assertDef('FF01::101'); // multicast, compressed
|
||||
|
||||
$this->assertDef('::1'); // loopback, compressed, non-routable
|
||||
$this->assertDef('::'); // unspecified, compressed, non-routable
|
||||
$this->assertDef('0:0:0:0:0:0:13.1.68.3'); // IPv4-compatible IPv6 address, full, deprecated
|
||||
$this->assertDef('0:0:0:0:0:FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, full
|
||||
$this->assertDef('::13.1.68.3'); // IPv4-compatible IPv6 address, compressed, deprecated
|
||||
$this->assertDef('::FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, compressed
|
||||
$this->assertDef('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'); // full, with prefix
|
||||
$this->assertDef('2001:0DB8::CD30:0:0:0:0/60'); // compressed, with prefix
|
||||
$this->assertDef('2001:0DB8:0:CD30::/60'); // compressed, with prefix #2
|
||||
$this->assertDef('::/128'); // compressed, unspecified address type, non-routable
|
||||
$this->assertDef('::1/128'); // compressed, loopback address type, non-routable
|
||||
$this->assertDef('FF00::/8'); // compressed, multicast address type
|
||||
$this->assertDef('FE80::/10'); // compressed, link-local unicast, non-routable
|
||||
$this->assertDef('FEC0::/10'); // compressed, site-local unicast, deprecated
|
||||
|
||||
$this->assertDef('2001:DB8:0:0:8:800:200C:417A:221', false); // unicast, full
|
||||
$this->assertDef('FF01::101::2', false); //multicast, compressed
|
||||
$this->assertDef('', false); // nothing
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -4,9 +4,12 @@ require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
// WARNING: INCOMPLETE UNIT TESTS!
|
||||
// we are currently abstaining IPv6 and percent-encode fixing unit tests
|
||||
// we are currently abstaining percent-encode fixing unit tests
|
||||
// we also need to test all the configuration directives defined by this class
|
||||
|
||||
// http: is returned quite often when a URL is invalid. We have to change
|
||||
// this behavior to just a plain old "FALSE"!
|
||||
|
||||
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
@ -95,8 +98,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$components[8] = array(null, '333.123.32.123', null, '/', null);
|
||||
|
||||
// test IPv6 address, using amended form of RFC's example
|
||||
//$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
|
||||
//$components[9] = array('[2001:db8::7]', '/c=GB', 'objectClass?one', null);
|
||||
$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
|
||||
$components[9] = array(null, '[2001:db8::7]', null, '/c=GB',
|
||||
'objectClass?one');
|
||||
|
||||
// We will not implement punycode encoding, that's up to the browsers
|
||||
// We also will not implement percent to IDNA encoding transformations:
|
||||
@ -109,8 +113,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$components[10] = array(null, 'tūdaliņ.lv', null, '', null);
|
||||
|
||||
// test invalid IPv6 address and invalid reg-name
|
||||
//$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
|
||||
//$components[11] = array(null, '', null, null);
|
||||
$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
|
||||
$components[11] = array(null, null, null, '', null);
|
||||
$expect_uri[11] = 'http:';
|
||||
|
||||
// test invalid port
|
||||
$uri[12] = 'http://example.com:foobar';
|
||||
|
@ -73,6 +73,9 @@ $test_files[] = 'AttrDef/PercentageTest.php';
|
||||
$test_files[] = 'AttrDef/MultipleTest.php';
|
||||
$test_files[] = 'AttrDef/TextDecorationTest.php';
|
||||
$test_files[] = 'AttrDef/FontFamilyTest.php';
|
||||
$test_files[] = 'AttrDef/HostTest.php';
|
||||
$test_files[] = 'AttrDef/IPv4Test.php';
|
||||
$test_files[] = 'AttrDef/IPv6Test.php';
|
||||
$test_files[] = 'IDAccumulatorTest.php';
|
||||
$test_files[] = 'TagTransformTest.php';
|
||||
$test_files[] = 'AttrTransform/LangTest.php';
|
||||
|
Loading…
Reference in New Issue
Block a user