0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-21 21:11:51 +00:00
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2010-11-12 21:21:18 +00:00
parent b87f2b2748
commit 35048a85cf
2 changed files with 22 additions and 6 deletions

View File

@ -26,6 +26,12 @@ These optional extensions can enhance the capabilities of HTML Purifier:
* bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML
The following optional PEAR library can enhance the capabilities of
HTML Purifier:
* Net_IDNA2 : Allows HTML Purifier to convert IDNAs to traditional
domain names; otherwise they are discarded.
---------------------------------------------------------------------------
2. Reconnaissance

View File

@ -1,7 +1,8 @@
<?php
/**
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
* Validates a host according to the IPv4, IPv6 and DNS (future)
* specifications. See docs/ref-reg-name.txt for details.
*/
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
{
@ -38,10 +39,6 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// A regular domain name.
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
@ -52,7 +49,20 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
if (!$match) {
if (!class_exists('Net_IDNA2')) {
return false;
}
// Remember, this is a hostname in a URI. So we don't output
// funny Unicode business. But users might want it, so we'll
// have to make it Punycode.
if (strpos($string, '%') !== FALSE) {
// Normalize percent encoding (preserving sub-delimiters
// because they'll be invalid anyway)
$pct = new HTMLPurifier_PercentEncoder('!$&\'()*+,;=');
}
return false;
}
return $string;
}