0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 08:21:52 +00:00

Fix #76, linkify includes dots at end of URL.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang 2016-03-02 02:05:54 -08:00
parent aebe1c02a2
commit 92aabf2b23
3 changed files with 18 additions and 3 deletions

2
NEWS
View File

@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! %CSS.AllowDuplicates permits duplicate CSS properties.
- alt truncation could result in malformed UTF-8 sequence. Don't
truncate. Thanks Brandon Farber for reporting.
- Linkify regex is smarter, based off of Gruber's regex.
- IDNA supported natively on PHP 5.3 and later.
4.7.0, released 2015-08-04
# opacity is now considered a "tricky" CSS property rather than a

View File

@ -31,9 +31,14 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
return;
}
// there is/are URL(s). Let's split the string:
// Note: this regex is extremely permissive
$bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
// there is/are URL(s). Let's split the string.
// We use this regex:
// https://gist.github.com/gruber/249502
// but with @cscott's backtracking fix and also
// the Unicode characters un-Unicodified.
$bits = preg_split(
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
$token = array();

View File

@ -52,6 +52,14 @@ class HTMLPurifier_Injector_LinkifyTest extends HTMLPurifier_InjectorHarness
$this->assertResult('<a><span>http://example.com</span></a>');
}
public function testRegexIsSmart()
{
$this->assertResult('http://example.com/foo.', '<a href="http://example.com/foo">http://example.com/foo</a>.');
$this->assertResult('“http://example.com/foo”', '“<a href="http://example.com/foo">http://example.com/foo</a>”');
$this->assertResult('“http://example.com”', '“<a href="http://example.com">http://example.com</a>”');
$this->assertResult('(http://example.com/f(o)o)', '(<a href="http://example.com/f(o)o">http://example.com/f(o)o</a>)');
}
}
// vim: et sw=4 sts=4