From 1cadb08fbba52c07d6e9c1b051c962b2daa81810 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 17 Aug 2006 01:05:35 +0000 Subject: [PATCH] Commit IPv6 fix, with majoring factoring out. Thank you Feyd! git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@284 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/AttrDef/Host.php | 41 +++++++++++ library/HTMLPurifier/AttrDef/IPv4.php | 31 ++++++++ library/HTMLPurifier/AttrDef/IPv6.php | 98 +++++++++++++++++++++++++ library/HTMLPurifier/AttrDef/URI.php | 35 +++------ tests/HTMLPurifier/AttrDef/HostTest.php | 24 ++++++ tests/HTMLPurifier/AttrDef/IPv4Test.php | 26 +++++++ tests/HTMLPurifier/AttrDef/IPv6Test.php | 46 ++++++++++++ tests/HTMLPurifier/AttrDef/URITest.php | 15 ++-- tests/index.php | 3 + 9 files changed, 291 insertions(+), 28 deletions(-) create mode 100644 library/HTMLPurifier/AttrDef/Host.php create mode 100644 library/HTMLPurifier/AttrDef/IPv4.php create mode 100644 library/HTMLPurifier/AttrDef/IPv6.php create mode 100644 tests/HTMLPurifier/AttrDef/HostTest.php create mode 100644 tests/HTMLPurifier/AttrDef/IPv4Test.php create mode 100644 tests/HTMLPurifier/AttrDef/IPv6Test.php diff --git a/library/HTMLPurifier/AttrDef/Host.php b/library/HTMLPurifier/AttrDef/Host.php new file mode 100644 index 00000000..69fa6323 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/Host.php @@ -0,0 +1,41 @@ +ipv4 = new HTMLPurifier_AttrDef_IPv4(); + $this->ipv6 = new HTMLPurifier_AttrDef_IPv6(); + } + + function validate($string, $config, &$context) { + $length = strlen($string); + if ($string === '') return ''; + if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { + //IPv6 + $ip = substr($string, 1, $length - 2); + $valid = $this->ipv6->validate($ip, $config, $context); + if ($valid === false) return false; + return '['. $valid . ']'; + } + $ipv4 = $this->ipv4->validate($string, $config, $context); + if ($ipv4 !== false) return $ipv4; + + // validate a domain name here, do filtering, etc etc etc + + // We could use this, but it would break I18N domain names + //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string); + //if (!$match) return false; + + return $string; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/IPv4.php b/library/HTMLPurifier/AttrDef/IPv4.php new file mode 100644 index 00000000..1982512d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/IPv4.php @@ -0,0 +1,31 @@ +ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; + } + + function validate($aIP, $config, &$context) { + + if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) + { + return $aIP; + } + + return false; + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/IPv6.php b/library/HTMLPurifier/AttrDef/IPv6.php new file mode 100644 index 00000000..70cbf79d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/IPv6.php @@ -0,0 +1,98 @@ +ip4 . '$#s', $aIP, $find)) + { + $aIP = substr($aIP, 0, 0-strlen($find[0])); + $ip = explode('.', $find[0]); + $ip = array_map('dechex', $ip); + $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; + unset($find, $ip); + } + + // compression check + $aIP = explode('::', $aIP); + $c = count($aIP); + if ($c > 2) + { + return false; + } + elseif ($c == 2) + { + list($first, $second) = $aIP; + $first = explode(':', $first); + $second = explode(':', $second); + + if (count($first) + count($second) > 8) + { + return false; + } + + while(count($first) < 8) + { + array_push($first, '0'); + } + + array_splice($first, 8 - count($second), 8, $second); + $aIP = $first; + unset($first,$second); + } + else + { + $aIP = explode(':', $aIP[0]); + } + $c = count($aIP); + + if ($c != 8) + { + return false; + } + + // All the pieces should be 16-bit hex strings. Are they? + foreach ($aIP as $piece) + { + if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) + { + return false; + } + } + + return $original; + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index b457efdc..ca437f96 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -1,7 +1,9 @@ host = new HTMLPurifier_AttrDef_Host(); + } + function validate($uri, $config, &$context) { // We'll write stack-based parsers later, for now, use regexps to @@ -63,34 +71,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if ($authority !== null) { - // define regexps - // this stuff may need to be factored out so Email can get to it - $HEXDIG = '[A-Fa-f0-9]'; $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] $sub_delims = '!$&\'()'; // needs [] $pct_encoded = "%$HEXDIG$HEXDIG"; - $h16 = "{$HEXDIG}{1,4}"; - $dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])'; - $IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet"; - $ls32 = "(?:$h16:$h16|$IPv4address)"; - $IPvFuture = "v$HEXDIG+\.[:$unreserved$sub_delims]+"; - $IPv6Address = "(?:". - "(?:$h16:){6}$ls32" . - "|::(?:$h16:){5}$ls32" . - "|(?:$h16)?::(?:$h16:){4}$ls32" . - "|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" . - "|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" . - "|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" . - "|(?:(?:$h16:){4}$h16)?::$ls32" . - "|(?:(?:$h16:){5}$h16)?::$h16" . - "|(?:(?:$h16:){6}$h16)?::" . - ")"; - $IP_literal = "\[(?:$IPvFuture|$IPv6Address)\]"; $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*"; - - // IPv6 is broken - $r_authority = "/^(($r_userinfo)@)?(\[$IP_literal\]|[^:]*)(:(\d*))?/"; + $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; $matches = array(); preg_match($r_authority, $authority, $matches); // overloads regexp! @@ -104,6 +90,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if ($port < 1 || $port > 65535) $port = null; } + $host = $this->host->validate($host, $config, $context); + if ($host === false) $host = null; + // userinfo and host are validated within the regexp } else { diff --git a/tests/HTMLPurifier/AttrDef/HostTest.php b/tests/HTMLPurifier/AttrDef/HostTest.php new file mode 100644 index 00000000..4b4b25b6 --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/HostTest.php @@ -0,0 +1,24 @@ +def = new HTMLPurifier_AttrDef_Host(); + + $this->assertDef('[2001:DB8:0:0:8:800:200C:417A]'); // IPv6 + $this->assertDef('124.15.6.89'); // IPv4 + $this->assertDef('www.google.com'); // reg-name + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/IPv4Test.php b/tests/HTMLPurifier/AttrDef/IPv4Test.php new file mode 100644 index 00000000..59f560d9 --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/IPv4Test.php @@ -0,0 +1,26 @@ +def = new HTMLPurifier_AttrDef_IPv4(); + + $this->assertDef('127.0.0.1'); // standard IPv4, loopback, non-routable + $this->assertDef('0.0.0.0'); // standard IPv4, unspecified, non-routable + $this->assertDef('255.255.255.255'); // standard IPv4 + + $this->assertDef('300.0.0.0', false); // standard IPv4, out of range + $this->assertDef('124.15.6.89/60', false); // standard IPv4, prefix not allowed + + $this->assertDef('', false); // nothing + + } +} \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/IPv6Test.php b/tests/HTMLPurifier/AttrDef/IPv6Test.php new file mode 100644 index 00000000..7ad3613f --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/IPv6Test.php @@ -0,0 +1,46 @@ +def = new HTMLPurifier_AttrDef_IPv6(); + + $this->assertDef('2001:DB8:0:0:8:800:200C:417A'); // unicast, full + $this->assertDef('FF01:0:0:0:0:0:0:101'); // multicast, full + $this->assertDef('0:0:0:0:0:0:0:1'); // loopback, full + $this->assertDef('0:0:0:0:0:0:0:0'); // unspecified, full + $this->assertDef('2001:DB8::8:800:200C:417A'); // unicast, compressed + $this->assertDef('FF01::101'); // multicast, compressed + + $this->assertDef('::1'); // loopback, compressed, non-routable + $this->assertDef('::'); // unspecified, compressed, non-routable + $this->assertDef('0:0:0:0:0:0:13.1.68.3'); // IPv4-compatible IPv6 address, full, deprecated + $this->assertDef('0:0:0:0:0:FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, full + $this->assertDef('::13.1.68.3'); // IPv4-compatible IPv6 address, compressed, deprecated + $this->assertDef('::FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, compressed + $this->assertDef('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'); // full, with prefix + $this->assertDef('2001:0DB8::CD30:0:0:0:0/60'); // compressed, with prefix + $this->assertDef('2001:0DB8:0:CD30::/60'); // compressed, with prefix #2 + $this->assertDef('::/128'); // compressed, unspecified address type, non-routable + $this->assertDef('::1/128'); // compressed, loopback address type, non-routable + $this->assertDef('FF00::/8'); // compressed, multicast address type + $this->assertDef('FE80::/10'); // compressed, link-local unicast, non-routable + $this->assertDef('FEC0::/10'); // compressed, site-local unicast, deprecated + + $this->assertDef('2001:DB8:0:0:8:800:200C:417A:221', false); // unicast, full + $this->assertDef('FF01::101::2', false); //multicast, compressed + $this->assertDef('', false); // nothing + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 921aa910..14fc409f 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -4,9 +4,12 @@ require_once 'HTMLPurifier/AttrDefHarness.php'; require_once 'HTMLPurifier/AttrDef/URI.php'; // WARNING: INCOMPLETE UNIT TESTS! -// we are currently abstaining IPv6 and percent-encode fixing unit tests +// we are currently abstaining percent-encode fixing unit tests // we also need to test all the configuration directives defined by this class +// http: is returned quite often when a URL is invalid. We have to change +// this behavior to just a plain old "FALSE"! + class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness { @@ -95,8 +98,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $components[8] = array(null, '333.123.32.123', null, '/', null); // test IPv6 address, using amended form of RFC's example - //$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one'; - //$components[9] = array('[2001:db8::7]', '/c=GB', 'objectClass?one', null); + $uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one'; + $components[9] = array(null, '[2001:db8::7]', null, '/c=GB', + 'objectClass?one'); // We will not implement punycode encoding, that's up to the browsers // We also will not implement percent to IDNA encoding transformations: @@ -109,8 +113,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $components[10] = array(null, 'tūdaliņ.lv', null, '', null); // test invalid IPv6 address and invalid reg-name - //$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]'; - //$components[11] = array(null, '', null, null); + $uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]'; + $components[11] = array(null, null, null, '', null); + $expect_uri[11] = 'http:'; // test invalid port $uri[12] = 'http://example.com:foobar'; diff --git a/tests/index.php b/tests/index.php index eb751ec5..b6baf263 100644 --- a/tests/index.php +++ b/tests/index.php @@ -73,6 +73,9 @@ $test_files[] = 'AttrDef/PercentageTest.php'; $test_files[] = 'AttrDef/MultipleTest.php'; $test_files[] = 'AttrDef/TextDecorationTest.php'; $test_files[] = 'AttrDef/FontFamilyTest.php'; +$test_files[] = 'AttrDef/HostTest.php'; +$test_files[] = 'AttrDef/IPv4Test.php'; +$test_files[] = 'AttrDef/IPv6Test.php'; $test_files[] = 'IDAccumulatorTest.php'; $test_files[] = 'TagTransformTest.php'; $test_files[] = 'AttrTransform/LangTest.php';