0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 23:28:42 +00:00

New directive %Core.AllowHostnameUnderscore

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2013-07-26 21:33:39 -07:00
parent af7107e830
commit 53c2907706
6 changed files with 47 additions and 4 deletions

2
NEWS
View File

@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
# URI parsing algorithm was made more strict, so only prefixes which # URI parsing algorithm was made more strict, so only prefixes which
looks like schemes will actually be schemes. Thanks looks like schemes will actually be schemes. Thanks
Michael Gusev <mgusev@sugarcrm.com> for fixing. Michael Gusev <mgusev@sugarcrm.com> for fixing.
! New directive %Core.AllowHostnameUnderscore which allows underscores
in hostnames.
- Made Linkify URL parser a bit less permissive, so that non-breaking - Made Linkify URL parser a bit less permissive, so that non-breaking
spaces and commas are not included as part of URL. Thanks nAS for fixing. spaces and commas are not included as part of URL. Thanks nAS for fixing.
- Fix some bad interactions with %HTML.Allowed and injectors. Thanks - Fix some bad interactions with %HTML.Allowed and injectors. Thanks

View File

@ -2,7 +2,7 @@
<usage> <usage>
<directive id="Core.CollectErrors"> <directive id="Core.CollectErrors">
<file name="HTMLPurifier.php"> <file name="HTMLPurifier.php">
<line>131</line> <line>150</line>
</file> </file>
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>81</line> <line>81</line>
@ -54,7 +54,7 @@
</directive> </directive>
<directive id="Cache.DefinitionImpl"> <directive id="Cache.DefinitionImpl">
<file name="HTMLPurifier/DefinitionCacheFactory.php"> <file name="HTMLPurifier/DefinitionCacheFactory.php">
<line>49</line> <line>59</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Doctype"> <directive id="HTML.Doctype">
@ -355,9 +355,14 @@
<line>30</line> <line>30</line>
</file> </file>
</directive> </directive>
<directive id="Core.AllowHostnameUnderscore">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>61</line>
</file>
</directive>
<directive id="Core.EnableIDNA"> <directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php"> <file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>67</line> <line>80</line>
</file> </file>
</directive> </directive>
<directive id="Attr.DefaultTextDir"> <directive id="Attr.DefaultTextDir">

View File

@ -47,10 +47,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// This doesn't match I18N domain names, but we don't have proper IRI support, // This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. // so force users to insert Punycode.
// There is not a good sense in which underscores should be
// allowed, since it's technically not! (And if you go as
// far to allow everything as specified by the DNS spec...
// well, that's literally everything, modulo some space limits
// for the components and the overall name (which, by the way,
// we are NOT checking!). So we (arbitrarily) decide this:
// let's allow underscores wherever we would have allowed
// hyphens, if they are enabled. This is a pretty good match
// for browser behavior, for example, a large number of browsers
// cannot handle foo_.example.com, but foo_bar.example.com is
// fairly well supported.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
// The productions describing this are: // The productions describing this are:
$a = '[a-z]'; // alpha $a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum $an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-" $and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?"; $domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum // toplabel = alpha | alpha *( alphanum | "-" ) alphanum

View File

@ -0,0 +1,16 @@
Core.AllowHostnameUnderscore
TYPE: bool
VERSION: 4.6.0
DEFAULT: false
--DESCRIPTION--
<p>
By RFC 1123, underscores are not permitted in host names.
(This is in contrast to the specification for DNS, RFC
2181, which allows underscores.)
However, most browsers do the right thing when faced with
an underscore in the host name, and so some poorly written
websites are written with the expectation this should work.
Setting this parameter to true relaxes our allowed character
check so that underscores are permitted.
</p>
--# vim: et sw=4 sts=4

View File

@ -33,6 +33,7 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('-f.top', false); $this->assertDef('-f.top', false);
$this->assertDef('ff.top'); $this->assertDef('ff.top');
$this->assertDef('f1.top'); $this->assertDef('f1.top');
$this->assertDef('f1_f2.ex.top', false);
$this->assertDef('f-.top', false); $this->assertDef('f-.top', false);
$this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false); $this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false);
@ -48,6 +49,12 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef("\xe2\x80\x85.com", false); // rejected $this->assertDef("\xe2\x80\x85.com", false); // rejected
} }
function testAllowUnderscore() {
$this->config->set('Core.AllowHostnameUnderscore', true);
$this->assertDef("foo_bar.example.com");
$this->assertDef("foo_.example.com", false);
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4