diff --git a/NEWS b/NEWS index c8fbeed8..7680373c 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +1.1.0, unknown release date +- Made URI validator more forgiving: will ignore leading and trailing + quotes, apostrophes and less than or greater than signs. + 1.0.1, unknown release date - Fixed slight bug in DOMLex attribute parsing - Fixed rejection of case-insensitive configuration values when there is a diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 76d665fe..0806771a 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -36,13 +36,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef // for HTTP and thus won't work for our generic URI parsing // according to the RFC... (but this cuts corners, i.e. non-validating) - $r_URI = '!^'. - '(([^:/?#<>]+):)?'. // 2. Scheme - '(//([^/?#<>]*))?'. // 4. Authority - '([^?#<>]*)'. // 5. Path - '(\?([^#<>]*))?'. // 7. Query - '(#([^<>]*))?'. // 8. Fragment - '$!'; + $r_URI = '!'. + '(([^:/?#<>\'"]+):)?'. // 2. Scheme + '(//([^/?#<>\'"]*))?'. // 4. Authority + '([^?#<>\'"]*)'. // 5. Path + '(\?([^#<>\'"]*))?'. // 7. Query + '(#([^<>\'"]*))?'. // 8. Fragment + '!'; $matches = array(); $result = preg_match($r_URI, $uri, $matches); diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 14fc409f..1b9287b4 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -158,9 +158,15 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $uri[18] = '/a/b'; $components[18] = array(null, null, null, '/a/b', null); - // it's not allowed, so generic URI should get it - $uri[19] = '<'; - $expect_uri[19] = false; + // result of malformed tag, gracefully handle error + $uri[19] = 'http://www.google.com/\'>"'; + $components[19] = array(null, 'www.google.com', null, '/', null); + $expect_uri[19] = 'http://www.google.com/'; + + // test empty + $uri[20] = ''; + $components[20] = array(null, null, null, '', null); + $expect_uri[20] = ''; foreach ($uri as $i => $value) {