diff --git a/library/HTMLPurifier/AttrDef/Host.php b/library/HTMLPurifier/AttrDef/Host.php
new file mode 100644
index 00000000..69fa6323
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Host.php
@@ -0,0 +1,41 @@
+ipv4 = new HTMLPurifier_AttrDef_IPv4();
+ $this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
+ }
+
+ function validate($string, $config, &$context) {
+ $length = strlen($string);
+ if ($string === '') return '';
+ if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
+ //IPv6
+ $ip = substr($string, 1, $length - 2);
+ $valid = $this->ipv6->validate($ip, $config, $context);
+ if ($valid === false) return false;
+ return '['. $valid . ']';
+ }
+ $ipv4 = $this->ipv4->validate($string, $config, $context);
+ if ($ipv4 !== false) return $ipv4;
+
+ // validate a domain name here, do filtering, etc etc etc
+
+ // We could use this, but it would break I18N domain names
+ //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
+ //if (!$match) return false;
+
+ return $string;
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/AttrDef/IPv4.php b/library/HTMLPurifier/AttrDef/IPv4.php
new file mode 100644
index 00000000..1982512d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/IPv4.php
@@ -0,0 +1,31 @@
+ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
+ }
+
+ function validate($aIP, $config, &$context) {
+
+ if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
+ {
+ return $aIP;
+ }
+
+ return false;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/AttrDef/IPv6.php b/library/HTMLPurifier/AttrDef/IPv6.php
new file mode 100644
index 00000000..70cbf79d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/IPv6.php
@@ -0,0 +1,98 @@
+ip4 . '$#s', $aIP, $find))
+ {
+ $aIP = substr($aIP, 0, 0-strlen($find[0]));
+ $ip = explode('.', $find[0]);
+ $ip = array_map('dechex', $ip);
+ $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
+ unset($find, $ip);
+ }
+
+ // compression check
+ $aIP = explode('::', $aIP);
+ $c = count($aIP);
+ if ($c > 2)
+ {
+ return false;
+ }
+ elseif ($c == 2)
+ {
+ list($first, $second) = $aIP;
+ $first = explode(':', $first);
+ $second = explode(':', $second);
+
+ if (count($first) + count($second) > 8)
+ {
+ return false;
+ }
+
+ while(count($first) < 8)
+ {
+ array_push($first, '0');
+ }
+
+ array_splice($first, 8 - count($second), 8, $second);
+ $aIP = $first;
+ unset($first,$second);
+ }
+ else
+ {
+ $aIP = explode(':', $aIP[0]);
+ }
+ $c = count($aIP);
+
+ if ($c != 8)
+ {
+ return false;
+ }
+
+ // All the pieces should be 16-bit hex strings. Are they?
+ foreach ($aIP as $piece)
+ {
+ if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
+ {
+ return false;
+ }
+ }
+
+ return $original;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index b457efdc..ca437f96 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -1,7 +1,9 @@
host = new HTMLPurifier_AttrDef_Host();
+ }
+
function validate($uri, $config, &$context) {
// We'll write stack-based parsers later, for now, use regexps to
@@ -63,34 +71,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($authority !== null) {
- // define regexps
- // this stuff may need to be factored out so Email can get to it
-
$HEXDIG = '[A-Fa-f0-9]';
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
$sub_delims = '!$&\'()'; // needs []
$pct_encoded = "%$HEXDIG$HEXDIG";
- $h16 = "{$HEXDIG}{1,4}";
- $dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
- $IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
- $ls32 = "(?:$h16:$h16|$IPv4address)";
- $IPvFuture = "v$HEXDIG+\.[:$unreserved$sub_delims]+";
- $IPv6Address = "(?:".
- "(?:$h16:){6}$ls32" .
- "|::(?:$h16:){5}$ls32" .
- "|(?:$h16)?::(?:$h16:){4}$ls32" .
- "|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
- "|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
- "|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
- "|(?:(?:$h16:){4}$h16)?::$ls32" .
- "|(?:(?:$h16:){5}$h16)?::$h16" .
- "|(?:(?:$h16:){6}$h16)?::" .
- ")";
- $IP_literal = "\[(?:$IPvFuture|$IPv6Address)\]";
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
-
- // IPv6 is broken
- $r_authority = "/^(($r_userinfo)@)?(\[$IP_literal\]|[^:]*)(:(\d*))?/";
+ $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$matches = array();
preg_match($r_authority, $authority, $matches);
// overloads regexp!
@@ -104,6 +90,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($port < 1 || $port > 65535) $port = null;
}
+ $host = $this->host->validate($host, $config, $context);
+ if ($host === false) $host = null;
+
// userinfo and host are validated within the regexp
} else {
diff --git a/tests/HTMLPurifier/AttrDef/HostTest.php b/tests/HTMLPurifier/AttrDef/HostTest.php
new file mode 100644
index 00000000..4b4b25b6
--- /dev/null
+++ b/tests/HTMLPurifier/AttrDef/HostTest.php
@@ -0,0 +1,24 @@
+def = new HTMLPurifier_AttrDef_Host();
+
+ $this->assertDef('[2001:DB8:0:0:8:800:200C:417A]'); // IPv6
+ $this->assertDef('124.15.6.89'); // IPv4
+ $this->assertDef('www.google.com'); // reg-name
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/IPv4Test.php b/tests/HTMLPurifier/AttrDef/IPv4Test.php
new file mode 100644
index 00000000..59f560d9
--- /dev/null
+++ b/tests/HTMLPurifier/AttrDef/IPv4Test.php
@@ -0,0 +1,26 @@
+def = new HTMLPurifier_AttrDef_IPv4();
+
+ $this->assertDef('127.0.0.1'); // standard IPv4, loopback, non-routable
+ $this->assertDef('0.0.0.0'); // standard IPv4, unspecified, non-routable
+ $this->assertDef('255.255.255.255'); // standard IPv4
+
+ $this->assertDef('300.0.0.0', false); // standard IPv4, out of range
+ $this->assertDef('124.15.6.89/60', false); // standard IPv4, prefix not allowed
+
+ $this->assertDef('', false); // nothing
+
+ }
+}
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/IPv6Test.php b/tests/HTMLPurifier/AttrDef/IPv6Test.php
new file mode 100644
index 00000000..7ad3613f
--- /dev/null
+++ b/tests/HTMLPurifier/AttrDef/IPv6Test.php
@@ -0,0 +1,46 @@
+def = new HTMLPurifier_AttrDef_IPv6();
+
+ $this->assertDef('2001:DB8:0:0:8:800:200C:417A'); // unicast, full
+ $this->assertDef('FF01:0:0:0:0:0:0:101'); // multicast, full
+ $this->assertDef('0:0:0:0:0:0:0:1'); // loopback, full
+ $this->assertDef('0:0:0:0:0:0:0:0'); // unspecified, full
+ $this->assertDef('2001:DB8::8:800:200C:417A'); // unicast, compressed
+ $this->assertDef('FF01::101'); // multicast, compressed
+
+ $this->assertDef('::1'); // loopback, compressed, non-routable
+ $this->assertDef('::'); // unspecified, compressed, non-routable
+ $this->assertDef('0:0:0:0:0:0:13.1.68.3'); // IPv4-compatible IPv6 address, full, deprecated
+ $this->assertDef('0:0:0:0:0:FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, full
+ $this->assertDef('::13.1.68.3'); // IPv4-compatible IPv6 address, compressed, deprecated
+ $this->assertDef('::FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, compressed
+ $this->assertDef('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'); // full, with prefix
+ $this->assertDef('2001:0DB8::CD30:0:0:0:0/60'); // compressed, with prefix
+ $this->assertDef('2001:0DB8:0:CD30::/60'); // compressed, with prefix #2
+ $this->assertDef('::/128'); // compressed, unspecified address type, non-routable
+ $this->assertDef('::1/128'); // compressed, loopback address type, non-routable
+ $this->assertDef('FF00::/8'); // compressed, multicast address type
+ $this->assertDef('FE80::/10'); // compressed, link-local unicast, non-routable
+ $this->assertDef('FEC0::/10'); // compressed, site-local unicast, deprecated
+
+ $this->assertDef('2001:DB8:0:0:8:800:200C:417A:221', false); // unicast, full
+ $this->assertDef('FF01::101::2', false); //multicast, compressed
+ $this->assertDef('', false); // nothing
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php
index 921aa910..14fc409f 100644
--- a/tests/HTMLPurifier/AttrDef/URITest.php
+++ b/tests/HTMLPurifier/AttrDef/URITest.php
@@ -4,9 +4,12 @@ require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
// WARNING: INCOMPLETE UNIT TESTS!
-// we are currently abstaining IPv6 and percent-encode fixing unit tests
+// we are currently abstaining percent-encode fixing unit tests
// we also need to test all the configuration directives defined by this class
+// http: is returned quite often when a URL is invalid. We have to change
+// this behavior to just a plain old "FALSE"!
+
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
{
@@ -95,8 +98,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$components[8] = array(null, '333.123.32.123', null, '/', null);
// test IPv6 address, using amended form of RFC's example
- //$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
- //$components[9] = array('[2001:db8::7]', '/c=GB', 'objectClass?one', null);
+ $uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
+ $components[9] = array(null, '[2001:db8::7]', null, '/c=GB',
+ 'objectClass?one');
// We will not implement punycode encoding, that's up to the browsers
// We also will not implement percent to IDNA encoding transformations:
@@ -109,8 +113,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$components[10] = array(null, 'tūdaliņ.lv', null, '', null);
// test invalid IPv6 address and invalid reg-name
- //$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
- //$components[11] = array(null, '', null, null);
+ $uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
+ $components[11] = array(null, null, null, '', null);
+ $expect_uri[11] = 'http:';
// test invalid port
$uri[12] = 'http://example.com:foobar';
diff --git a/tests/index.php b/tests/index.php
index eb751ec5..b6baf263 100644
--- a/tests/index.php
+++ b/tests/index.php
@@ -73,6 +73,9 @@ $test_files[] = 'AttrDef/PercentageTest.php';
$test_files[] = 'AttrDef/MultipleTest.php';
$test_files[] = 'AttrDef/TextDecorationTest.php';
$test_files[] = 'AttrDef/FontFamilyTest.php';
+$test_files[] = 'AttrDef/HostTest.php';
+$test_files[] = 'AttrDef/IPv4Test.php';
+$test_files[] = 'AttrDef/IPv6Test.php';
$test_files[] = 'IDAccumulatorTest.php';
$test_files[] = 'TagTransformTest.php';
$test_files[] = 'AttrTransform/LangTest.php';