diff --git a/NEWS b/NEWS index 434bd8b0..d8466ba2 100644 --- a/NEWS +++ b/NEWS @@ -13,12 +13,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.1.2, unknown projected release date (bugfix release, may be merged with 1.2.0 if new features precede major bugs) -! Add HTMLPurifier.auto.php stub file that automatically configures path +! Add HTMLPurifier.auto.php stub file that automatically configures pathx - Documentation updated + INSTALL document rewritten + TODO added semi-lossy conversion + API Doxygen docs' file exclusions updated - Fixed lack of attribute parsing in HTMLPurifier_Lexer_PEARSax3 +- ftp:// URIs now have their typecodes checked . Line endings standardized throughout project (svn:eol-style standardized) . Refactored parseData() to general Lexer class diff --git a/library/HTMLPurifier/URIScheme/ftp.php b/library/HTMLPurifier/URIScheme/ftp.php index 0c97d951..c539c354 100644 --- a/library/HTMLPurifier/URIScheme/ftp.php +++ b/library/HTMLPurifier/URIScheme/ftp.php @@ -4,7 +4,6 @@ require_once 'HTMLPurifier/URIScheme.php'; /** * Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738. - * @todo Typecode check on path */ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { @@ -16,7 +15,27 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { list($userinfo, $host, $port, $path, $query) = parent::validateComponents( $userinfo, $host, $port, $path, $query, $config ); - // typecode check needed on path + $semicolon_pos = strrpos($path, ';'); // reverse + if ($semicolon_pos !== false) { + // typecode check + $type = substr($path, $semicolon_pos + 1); // no semicolon + $path = substr($path, 0, $semicolon_pos); + $type_ret = ''; + if (strpos($type, '=') !== false) { + // figure out whether or not the declaration is correct + list($key, $typecode) = explode('=', $type, 2); + if ($key !== 'type') { + // invalid key, tack it back on encoded + $path .= '%3B' . $type; + } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { + $type_ret = ";type=$typecode"; + } + } else { + $path .= '%3B' . $type; + } + $path = str_replace(';', '%3B', $path); + $path .= $type_ret; + } return array($userinfo, $host, $port, $path, null); } diff --git a/tests/HTMLPurifier/URISchemeTest.php b/tests/HTMLPurifier/URISchemeTest.php index 6cc32b5f..7400b8d1 100644 --- a/tests/HTMLPurifier/URISchemeTest.php +++ b/tests/HTMLPurifier/URISchemeTest.php @@ -54,12 +54,34 @@ class HTMLPurifier_URISchemeTest extends UnitTestCase $scheme = new HTMLPurifier_URIScheme_ftp(); $config = HTMLPurifier_Config::createDefault(); + $this->assertIdentical( $scheme->validateComponents( 'user', 'www.example.com', 21, '/', 's=foobar', $config), array('user', 'www.example.com', null, '/', null) ); + // valid typecode + $this->assertIdentical( + $scheme->validateComponents( + null, 'www.example.com', null, '/file.txt;type=a', null, $config), + array(null, 'www.example.com', null, '/file.txt;type=a', null) + ); + + // remove invalid typecode + $this->assertIdentical( + $scheme->validateComponents( + null, 'www.example.com', null, '/file.txt;type=z', null, $config), + array(null, 'www.example.com', null, '/file.txt', null) + ); + + // encode errant semicolons + $this->assertIdentical( + $scheme->validateComponents( + null, 'www.example.com', null, '/too;many;semicolons=1', null, $config), + array(null, 'www.example.com', null, '/too%3Bmany%3Bsemicolons=1', null) + ); + } function test_news() {