0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 08:21:52 +00:00

[3.1.0] Encoder optimization, as suggested by Diego

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1680 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2008-04-22 18:14:40 +00:00
parent e9c7873057
commit c4aa3ee40c
3 changed files with 23 additions and 4 deletions

2
NEWS
View File

@ -12,7 +12,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
3.1.0, unknown release date
- InterchangeBuilder now alphabetizes its lists
- Validation error in configdoc output fixed
- Iconv errors muted even with custom error handlers
. Out-of-date documentation revised
. UTF-8 encoding check optimization as suggested by Diego
3.1.0rc1, released 2008-04-22
# Autoload support added. Internal require_once's removed in favor of an

9
TODO
View File

@ -29,11 +29,14 @@ NICE FEATURES
BUGS
- Style attribute height/width limiting for images
- Easy way to blacklist elements and attributes
- Investigate iconv error emitting
- Investigate UTF-8 optimization <http://htmlpurifier.org/phorum/read.php?3,1496>
- Figure out what to do about target="" and name="", since they show up so often
EXTERNAL
- Improve Phorum install documentation
- Mia
- Aliro
- Comparison: http://code.iamcal.com/php/lib_filter/
FUTURE VERSIONS
---------------

View File

@ -14,6 +14,11 @@ class HTMLPurifier_Encoder
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
}
/**
* Error-handler that mutes errors, alternative to shut-up operator.
*/
private static function muteErrorHandler() {}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
@ -57,9 +62,18 @@ class HTMLPurifier_Encoder
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do iconv/php stuff. 99% of the time, this will be the case.
if (preg_match('/^.{1}/us', $str)) {
return strtr($str, $non_sgml_chars);
}
if ($iconv && !$force_php) {
// do the shortcut way
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
$str = iconv('UTF-8', 'UTF-8//IGNORE', $str);
restore_error_handler();
return strtr($str, $non_sgml_chars);
}