0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-08 15:11:51 +00:00

Accept leading digits in hostnames as per RFC 1123.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang 2016-03-23 22:42:18 -07:00
parent 25db9e1dd0
commit 45161b4fb1
3 changed files with 17 additions and 7 deletions

2
NEWS
View File

@ -15,6 +15,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
truncate. Thanks Brandon Farber for reporting. truncate. Thanks Brandon Farber for reporting.
- Linkify regex is smarter, based off of Gruber's regex. - Linkify regex is smarter, based off of Gruber's regex.
- IDNA supported natively on PHP 5.3 and later. - IDNA supported natively on PHP 5.3 and later.
- Non all-numeric top-level names (e.g., foo.1f, 1f) are now
allowed.
4.7.0, released 2015-08-04 4.7.0, released 2015-08-04
# opacity is now considered a "tricky" CSS property rather than a # opacity is now considered a "tricky" CSS property rather than a

View File

@ -76,18 +76,24 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// fairly well supported. // fairly well supported.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
// Based off of RFC 1738, but amended so that
// as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are: // The productions describing this are:
$a = '[a-z]'; // alpha $a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum $an = '[a-z0-9]'; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-" $and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?"; $domainlabel = "$an(?:$and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum // AMENDED as per RFC 3696
$toplabel = "$a($and*$an)?"; // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum
// side condition: not all numeric
$toplabel = "$an(?:$and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ] // hostname = *( domainlabel "." ) toplabel [ "." ]
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
if (!ctype_digit($matches[1])) {
return $string; return $string;
} }
}
// PHP 5.3 and later support this functionality natively // PHP 5.3 and later support this functionality natively
if (function_exists('idn_to_ascii')) { if (function_exists('idn_to_ascii')) {

View File

@ -19,12 +19,13 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('sub.test.'); $this->assertDef('sub.test.');
$this->assertDef('.test', false); $this->assertDef('.test', false);
$this->assertDef('ff'); $this->assertDef('ff');
$this->assertDef('1f', false); $this->assertDef('1f'); // per RFC 1123
// See also http://serverfault.com/questions/638260/is-it-valid-for-a-hostname-to-start-with-a-digit
$this->assertDef('-f', false); $this->assertDef('-f', false);
$this->assertDef('f1'); $this->assertDef('f1');
$this->assertDef('f-', false); $this->assertDef('f-', false);
$this->assertDef('sub.ff'); $this->assertDef('sub.ff');
$this->assertDef('sub.1f', false); $this->assertDef('sub.1f'); // per RFC 1123
$this->assertDef('sub.-f', false); $this->assertDef('sub.-f', false);
$this->assertDef('sub.f1'); $this->assertDef('sub.f1');
$this->assertDef('sub.f-', false); $this->assertDef('sub.f-', false);
@ -35,6 +36,7 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('f1.top'); $this->assertDef('f1.top');
$this->assertDef('f1_f2.ex.top', false); $this->assertDef('f1_f2.ex.top', false);
$this->assertDef('f-.top', false); $this->assertDef('f-.top', false);
$this->assertDef('1a');
$this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false); $this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false);