0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-18 11:41:52 +00:00

Fix bug with testEncodingSupportsASCII() with strange iconv

implementations.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-11-26 15:17:09 -05:00
parent 527f154d3d
commit e128c09132
2 changed files with 15 additions and 2 deletions

7
NEWS
View File

@ -10,6 +10,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
========================== ==========================
3.3.0, unknown release date 3.3.0, unknown release date
- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
does not do the "right thing" with characters not supported in the output
set.
- Spellcheck UTF-8: The Secret To Character Encoding
. Add verbose mode to command line test runner, use (--verbose)
. Turn on unit tests for UnitConverter
. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0)
3.2.0, released 2008-10-31 3.2.0, released 2008-10-31
# Using %Core.CollectErrors forces line number/column tracking on, whereas # Using %Core.CollectErrors forces line number/column tracking on, whereas

View File

@ -401,8 +401,14 @@ class HTMLPurifier_Encoder
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if (iconv('UTF-8', $encoding, 'a') === false) return false; if (iconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i); $c = chr($i); // UTF-8 char
if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') { $r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
if (
$r === '' ||
// This line is needed for iconv implementations that do not
// omit characters that do not exist in the target character set
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
) {
// Reverse engineer: what's the UTF-8 equiv of this byte // Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width // sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII. // encoding that doesn't support ASCII.