0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-10 16:01:53 +00:00

Commit IPv6 fix, with majoring factoring out. Thank you Feyd!

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@284 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-08-17 01:05:35 +00:00
parent cd0108d656
commit 1cadb08fbb
9 changed files with 291 additions and 28 deletions

View File

@ -0,0 +1,41 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/IPv4.php';
require_once 'HTMLPurifier/AttrDef/IPv6.php';
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
{
var $ipv4, $ipv6;
function HTMLPurifier_AttrDef_Host() {
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
}
function validate($string, $config, &$context) {
$length = strlen($string);
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
$ip = substr($string, 1, $length - 2);
$valid = $this->ipv6->validate($ip, $config, $context);
if ($valid === false) return false;
return '['. $valid . ']';
}
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// validate a domain name here, do filtering, etc etc etc
// We could use this, but it would break I18N domain names
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
//if (!$match) return false;
return $string;
}
}
?>

View File

@ -0,0 +1,31 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
// spliced from Feyd's IPv6 function (pd)
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
{
// regex is public so that IPv6 can reuse it
var $ip4;
function HTMLPurifier_AttrDef_IPv4() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
function validate($aIP, $config, &$context) {
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
}
return false;
}
}
?>

View File

@ -0,0 +1,98 @@
<?php
require_once 'HTMLPurifier/AttrDef/IPv4.php';
// IPv6 by Feyd, source is in public domain
// note that this expects the brackets to be removed from IPv6 addresses
// extends from the IPv4 impl. so we can borrow its regex
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
{
function validate($aIP, $config, &$context) {
$original = $aIP;
$hex = '[0-9a-fA-F]';
$blk = '(?:' . $hex . '{1,4})';
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
// prefix check
if (strpos($aIP, '/') !== false)
{
if (preg_match('#' . $pre . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
unset($find);
}
else
{
return false;
}
}
// IPv4-compatiblity check
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
$ip = explode('.', $find[0]);
$ip = array_map('dechex', $ip);
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
unset($find, $ip);
}
// compression check
$aIP = explode('::', $aIP);
$c = count($aIP);
if ($c > 2)
{
return false;
}
elseif ($c == 2)
{
list($first, $second) = $aIP;
$first = explode(':', $first);
$second = explode(':', $second);
if (count($first) + count($second) > 8)
{
return false;
}
while(count($first) < 8)
{
array_push($first, '0');
}
array_splice($first, 8 - count($second), 8, $second);
$aIP = $first;
unset($first,$second);
}
else
{
$aIP = explode(':', $aIP[0]);
}
$c = count($aIP);
if ($c != 8)
{
return false;
}
// All the pieces should be 16-bit hex strings. Are they?
foreach ($aIP as $piece)
{
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
{
return false;
}
}
return $original;
}
}
?>

View File

@ -1,7 +1,9 @@
<?php <?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/URIScheme.php'; require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php'; require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/Host.php';
HTMLPurifier_ConfigDef::define( HTMLPurifier_ConfigDef::define(
'URI', 'DefaultScheme', 'http', 'URI', 'DefaultScheme', 'http',
@ -12,6 +14,12 @@ HTMLPurifier_ConfigDef::define(
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{ {
var $host;
function HTMLPurifier_AttrDef_URI() {
$this->host = new HTMLPurifier_AttrDef_Host();
}
function validate($uri, $config, &$context) { function validate($uri, $config, &$context) {
// We'll write stack-based parsers later, for now, use regexps to // We'll write stack-based parsers later, for now, use regexps to
@ -63,34 +71,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($authority !== null) { if ($authority !== null) {
// define regexps
// this stuff may need to be factored out so Email can get to it
$HEXDIG = '[A-Fa-f0-9]'; $HEXDIG = '[A-Fa-f0-9]';
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with [] $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
$sub_delims = '!$&\'()'; // needs [] $sub_delims = '!$&\'()'; // needs []
$pct_encoded = "%$HEXDIG$HEXDIG"; $pct_encoded = "%$HEXDIG$HEXDIG";
$h16 = "{$HEXDIG}{1,4}";
$dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
$IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
$ls32 = "(?:$h16:$h16|$IPv4address)";
$IPvFuture = "v$HEXDIG+\.[:$unreserved$sub_delims]+";
$IPv6Address = "(?:".
"(?:$h16:){6}$ls32" .
"|::(?:$h16:){5}$ls32" .
"|(?:$h16)?::(?:$h16:){4}$ls32" .
"|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
"|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
"|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
"|(?:(?:$h16:){4}$h16)?::$ls32" .
"|(?:(?:$h16:){5}$h16)?::$h16" .
"|(?:(?:$h16:){6}$h16)?::" .
")";
$IP_literal = "\[(?:$IPvFuture|$IPv6Address)\]";
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*"; $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
// IPv6 is broken
$r_authority = "/^(($r_userinfo)@)?(\[$IP_literal\]|[^:]*)(:(\d*))?/";
$matches = array(); $matches = array();
preg_match($r_authority, $authority, $matches); preg_match($r_authority, $authority, $matches);
// overloads regexp! // overloads regexp!
@ -104,6 +90,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($port < 1 || $port > 65535) $port = null; if ($port < 1 || $port > 65535) $port = null;
} }
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
// userinfo and host are validated within the regexp // userinfo and host are validated within the regexp
} else { } else {

View File

@ -0,0 +1,24 @@
<?php
require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/Host.php';
// takes a URI formatted host and validates it
class HTMLPurifier_AttrDef_HostTest extends HTMLPurifier_AttrDefHarness
{
function test() {
$this->def = new HTMLPurifier_AttrDef_Host();
$this->assertDef('[2001:DB8:0:0:8:800:200C:417A]'); // IPv6
$this->assertDef('124.15.6.89'); // IPv4
$this->assertDef('www.google.com'); // reg-name
}
}
?>

View File

@ -0,0 +1,26 @@
<?php
require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/IPv4.php';
// IPv4 test case is spliced from Feyd's IPv6 implementation
// we ought to disallow non-routable addresses
class HTMLPurifier_AttrDef_IPv4Test extends HTMLPurifier_AttrDefHarness
{
function test() {
$this->def = new HTMLPurifier_AttrDef_IPv4();
$this->assertDef('127.0.0.1'); // standard IPv4, loopback, non-routable
$this->assertDef('0.0.0.0'); // standard IPv4, unspecified, non-routable
$this->assertDef('255.255.255.255'); // standard IPv4
$this->assertDef('300.0.0.0', false); // standard IPv4, out of range
$this->assertDef('124.15.6.89/60', false); // standard IPv4, prefix not allowed
$this->assertDef('', false); // nothing
}
}

View File

@ -0,0 +1,46 @@
<?php
require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/IPv6.php';
// test case is from Feyd's IPv6 implementation
// we ought to disallow non-routable addresses
class HTMLPurifier_AttrDef_IPv6Test extends HTMLPurifier_AttrDefHarness
{
function test() {
$this->def = new HTMLPurifier_AttrDef_IPv6();
$this->assertDef('2001:DB8:0:0:8:800:200C:417A'); // unicast, full
$this->assertDef('FF01:0:0:0:0:0:0:101'); // multicast, full
$this->assertDef('0:0:0:0:0:0:0:1'); // loopback, full
$this->assertDef('0:0:0:0:0:0:0:0'); // unspecified, full
$this->assertDef('2001:DB8::8:800:200C:417A'); // unicast, compressed
$this->assertDef('FF01::101'); // multicast, compressed
$this->assertDef('::1'); // loopback, compressed, non-routable
$this->assertDef('::'); // unspecified, compressed, non-routable
$this->assertDef('0:0:0:0:0:0:13.1.68.3'); // IPv4-compatible IPv6 address, full, deprecated
$this->assertDef('0:0:0:0:0:FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, full
$this->assertDef('::13.1.68.3'); // IPv4-compatible IPv6 address, compressed, deprecated
$this->assertDef('::FFFF:129.144.52.38'); // IPv4-mapped IPv6 address, compressed
$this->assertDef('2001:0DB8:0000:CD30:0000:0000:0000:0000/60'); // full, with prefix
$this->assertDef('2001:0DB8::CD30:0:0:0:0/60'); // compressed, with prefix
$this->assertDef('2001:0DB8:0:CD30::/60'); // compressed, with prefix #2
$this->assertDef('::/128'); // compressed, unspecified address type, non-routable
$this->assertDef('::1/128'); // compressed, loopback address type, non-routable
$this->assertDef('FF00::/8'); // compressed, multicast address type
$this->assertDef('FE80::/10'); // compressed, link-local unicast, non-routable
$this->assertDef('FEC0::/10'); // compressed, site-local unicast, deprecated
$this->assertDef('2001:DB8:0:0:8:800:200C:417A:221', false); // unicast, full
$this->assertDef('FF01::101::2', false); //multicast, compressed
$this->assertDef('', false); // nothing
}
}
?>

View File

@ -4,9 +4,12 @@ require_once 'HTMLPurifier/AttrDefHarness.php';
require_once 'HTMLPurifier/AttrDef/URI.php'; require_once 'HTMLPurifier/AttrDef/URI.php';
// WARNING: INCOMPLETE UNIT TESTS! // WARNING: INCOMPLETE UNIT TESTS!
// we are currently abstaining IPv6 and percent-encode fixing unit tests // we are currently abstaining percent-encode fixing unit tests
// we also need to test all the configuration directives defined by this class // we also need to test all the configuration directives defined by this class
// http: is returned quite often when a URL is invalid. We have to change
// this behavior to just a plain old "FALSE"!
class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
{ {
@ -95,8 +98,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$components[8] = array(null, '333.123.32.123', null, '/', null); $components[8] = array(null, '333.123.32.123', null, '/', null);
// test IPv6 address, using amended form of RFC's example // test IPv6 address, using amended form of RFC's example
//$uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one'; $uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
//$components[9] = array('[2001:db8::7]', '/c=GB', 'objectClass?one', null); $components[9] = array(null, '[2001:db8::7]', null, '/c=GB',
'objectClass?one');
// We will not implement punycode encoding, that's up to the browsers // We will not implement punycode encoding, that's up to the browsers
// We also will not implement percent to IDNA encoding transformations: // We also will not implement percent to IDNA encoding transformations:
@ -109,8 +113,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$components[10] = array(null, 'tūdaliņ.lv', null, '', null); $components[10] = array(null, 'tūdaliņ.lv', null, '', null);
// test invalid IPv6 address and invalid reg-name // test invalid IPv6 address and invalid reg-name
//$uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]'; $uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
//$components[11] = array(null, '', null, null); $components[11] = array(null, null, null, '', null);
$expect_uri[11] = 'http:';
// test invalid port // test invalid port
$uri[12] = 'http://example.com:foobar'; $uri[12] = 'http://example.com:foobar';

View File

@ -73,6 +73,9 @@ $test_files[] = 'AttrDef/PercentageTest.php';
$test_files[] = 'AttrDef/MultipleTest.php'; $test_files[] = 'AttrDef/MultipleTest.php';
$test_files[] = 'AttrDef/TextDecorationTest.php'; $test_files[] = 'AttrDef/TextDecorationTest.php';
$test_files[] = 'AttrDef/FontFamilyTest.php'; $test_files[] = 'AttrDef/FontFamilyTest.php';
$test_files[] = 'AttrDef/HostTest.php';
$test_files[] = 'AttrDef/IPv4Test.php';
$test_files[] = 'AttrDef/IPv6Test.php';
$test_files[] = 'IDAccumulatorTest.php'; $test_files[] = 'IDAccumulatorTest.php';
$test_files[] = 'TagTransformTest.php'; $test_files[] = 'TagTransformTest.php';
$test_files[] = 'AttrTransform/LangTest.php'; $test_files[] = 'AttrTransform/LangTest.php';