diff --git a/NEWS b/NEWS index 57bf7ae3..cdec03e4 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier %HTML.Allowed ! Config object gives more friendly error messages when things go wrong - Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work! +- DOMLex will not emit errors when a custom error handler that does not + honor error_reporting is used . Unit test for ElementDef created, ElementDef behavior modified to be more flexible . Added convenience functions for HTMLModule constructors diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index 9286b023..de9d6871 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -53,20 +53,17 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer '
'.$string.'
'; $doc = new DOMDocument(); - $doc->encoding = 'UTF-8'; // technically does nothing, but whatever + $doc->encoding = 'UTF-8'; // theoretically, the above has this covered - // DOM will toss errors if the HTML its parsing has really big - // problems, so we're going to mute them. This can cause problems - // if a custom error handler that doesn't implement error_reporting - // is set, as noted by a Drupal plugin of HTML Purifier. Consider - // making our own error reporter to temporarily load in - @$doc->loadHTML($string); + set_error_handler(array($this, 'muteErrorHandler')); + $doc->loadHTML($string); + restore_error_handler(); $tokens = array(); $this->tokenizeDOM( - $doc->getElementsByTagName('html')->item(0)-> // html - getElementsByTagName('body')->item(0)-> // body - getElementsByTagName('div')->item(0) // div + $doc->getElementsByTagName('html')->item(0)-> // + getElementsByTagName('body')->item(0)-> // + getElementsByTagName('div')->item(0) //
, $tokens); return $tokens; } @@ -82,7 +79,6 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer * @returns Tokens of node appended to previously passed tokens. */ protected function tokenizeDOM($node, &$tokens, $collect = false) { - // recursive goodness! // intercept non element nodes. WE MUST catch all of them, // but we're not getting the character reference nodes because @@ -147,6 +143,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer return $array; } + /** + * An error handler that mutes all errors + */ + public function muteErrorHandler($errno, $errstr) {} + } ?> \ No newline at end of file