diff --git a/NEWS b/NEWS index 45dd6d3e..8bf79408 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 3.3.0, unknown release date +- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation + does not do the "right thing" with characters not supported in the output + set. +- Spellcheck UTF-8: The Secret To Character Encoding +. Add verbose mode to command line test runner, use (--verbose) +. Turn on unit tests for UnitConverter +. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0) 3.2.0, released 2008-10-31 # Using %Core.CollectErrors forces line number/column tracking on, whereas diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index c2df3132..28259f23 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -401,8 +401,14 @@ class HTMLPurifier_Encoder set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); if (iconv('UTF-8', $encoding, 'a') === false) return false; for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars - $c = chr($i); - if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') { + $c = chr($i); // UTF-8 char + $r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion + if ( + $r === '' || + // This line is needed for iconv implementations that do not + // omit characters that do not exist in the target character set + ($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c) + ) { // Reverse engineer: what's the UTF-8 equiv of this byte // sequence? This assumes that there's no variable width // encoding that doesn't support ASCII.