mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
Fix #76, linkify includes dots at end of URL.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
parent
aebe1c02a2
commit
92aabf2b23
2
NEWS
2
NEWS
@ -13,6 +13,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
! %CSS.AllowDuplicates permits duplicate CSS properties.
|
||||
- alt truncation could result in malformed UTF-8 sequence. Don't
|
||||
truncate. Thanks Brandon Farber for reporting.
|
||||
- Linkify regex is smarter, based off of Gruber's regex.
|
||||
- IDNA supported natively on PHP 5.3 and later.
|
||||
|
||||
4.7.0, released 2015-08-04
|
||||
# opacity is now considered a "tricky" CSS property rather than a
|
||||
|
@ -31,9 +31,14 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
|
||||
return;
|
||||
}
|
||||
|
||||
// there is/are URL(s). Let's split the string:
|
||||
// Note: this regex is extremely permissive
|
||||
$bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
// there is/are URL(s). Let's split the string.
|
||||
// We use this regex:
|
||||
// https://gist.github.com/gruber/249502
|
||||
// but with @cscott's backtracking fix and also
|
||||
// the Unicode characters un-Unicodified.
|
||||
$bits = preg_split(
|
||||
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
|
||||
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
|
||||
|
||||
$token = array();
|
||||
|
@ -52,6 +52,14 @@ class HTMLPurifier_Injector_LinkifyTest extends HTMLPurifier_InjectorHarness
|
||||
$this->assertResult('<a><span>http://example.com</span></a>');
|
||||
}
|
||||
|
||||
public function testRegexIsSmart()
|
||||
{
|
||||
$this->assertResult('http://example.com/foo.', '<a href="http://example.com/foo">http://example.com/foo</a>.');
|
||||
$this->assertResult('“http://example.com/foo”', '“<a href="http://example.com/foo">http://example.com/foo</a>”');
|
||||
$this->assertResult('“http://example.com”', '“<a href="http://example.com">http://example.com</a>”');
|
||||
$this->assertResult('(http://example.com/f(o)o)', '(<a href="http://example.com/f(o)o">http://example.com/f(o)o</a>)');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
Loading…
Reference in New Issue
Block a user