mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-31 20:01:52 +00:00
Fix bug with testEncodingSupportsASCII() with strange iconv
implementations. Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
parent
527f154d3d
commit
e128c09132
7
NEWS
7
NEWS
@ -10,6 +10,13 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
==========================
|
==========================
|
||||||
|
|
||||||
3.3.0, unknown release date
|
3.3.0, unknown release date
|
||||||
|
- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
|
||||||
|
does not do the "right thing" with characters not supported in the output
|
||||||
|
set.
|
||||||
|
- Spellcheck UTF-8: The Secret To Character Encoding
|
||||||
|
. Add verbose mode to command line test runner, use (--verbose)
|
||||||
|
. Turn on unit tests for UnitConverter
|
||||||
|
. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0)
|
||||||
|
|
||||||
3.2.0, released 2008-10-31
|
3.2.0, released 2008-10-31
|
||||||
# Using %Core.CollectErrors forces line number/column tracking on, whereas
|
# Using %Core.CollectErrors forces line number/column tracking on, whereas
|
||||||
|
@ -401,8 +401,14 @@ class HTMLPurifier_Encoder
|
|||||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||||
if (iconv('UTF-8', $encoding, 'a') === false) return false;
|
if (iconv('UTF-8', $encoding, 'a') === false) return false;
|
||||||
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
||||||
$c = chr($i);
|
$c = chr($i); // UTF-8 char
|
||||||
if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') {
|
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
|
||||||
|
if (
|
||||||
|
$r === '' ||
|
||||||
|
// This line is needed for iconv implementations that do not
|
||||||
|
// omit characters that do not exist in the target character set
|
||||||
|
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
|
||||||
|
) {
|
||||||
// Reverse engineer: what's the UTF-8 equiv of this byte
|
// Reverse engineer: what's the UTF-8 equiv of this byte
|
||||||
// sequence? This assumes that there's no variable width
|
// sequence? This assumes that there's no variable width
|
||||||
// encoding that doesn't support ASCII.
|
// encoding that doesn't support ASCII.
|
||||||
|
Loading…
Reference in New Issue
Block a user