0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

fix: Adjust Core.AllowHostnameUnderscore to consider that "_" is defined as Unreserved Characters in RFC 3986 (#406)

This commit is contained in:
Atsushi Matsuo 2024-04-19 10:48:20 +09:00 committed by GitHub
parent c9d60c96d7
commit d9fbef8e27
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 13 deletions

View File

@ -63,24 +63,18 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// There is not a good sense in which underscores should be
// allowed, since it's technically not! (And if you go as
// far to allow everything as specified by the DNS spec...
// well, that's literally everything, modulo some space limits
// for the components and the overall name (which, by the way,
// we are NOT checking!). So we (arbitrarily) decide this:
// let's allow underscores wherever we would have allowed
// hyphens, if they are enabled. This is a pretty good match
// for browser behavior, for example, a large number of browsers
// cannot handle foo_.example.com, but foo_bar.example.com is
// fairly well supported.
// Underscores defined as Unreserved Characters in RFC 3986 are
// allowed in a URI. There are cases where we want to consider a
// URI containing "_" such as "_dmarc.example.com".
// Underscores are not allowed in the default. If you want to
// allow it, set Core.AllowHostnameUnderscore to true.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
// Based off of RFC 1738, but amended so that
// as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$an = "[a-z0-9$underscore]"; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an(?:$and*$an)?";

View File

@ -56,7 +56,8 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
function testAllowUnderscore() {
$this->config->set('Core.AllowHostnameUnderscore', true);
$this->assertDef("foo_bar.example.com");
$this->assertDef("foo_.example.com", false);
$this->assertDef("foo_.example.com");
$this->assertDef("_dmarc.example.com");
}
}