From 8ae26044406e6ddfffeca525f9c739219db0a4fc Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 25 Jun 2007 00:48:26 +0000 Subject: [PATCH] [2.0.1] Start making more moves towards full error reporting. Revise message naming conventions. Fix variable assignment for error collecting. Revise Language interface to be as readable as possible (NOT compact). Add error reporting to DirectLex. Rewrite ErrorCollector. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1227 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier.php | 5 +- library/HTMLPurifier/ErrorCollector.php | 62 ++++++++++------ library/HTMLPurifier/Language.php | 9 +-- library/HTMLPurifier/Language/messages/en.php | 7 ++ library/HTMLPurifier/LanguageFactory.php | 2 +- library/HTMLPurifier/Lexer/DirectLex.php | 42 ++++++++++- tests/HTMLPurifier/ErrorCollectorTest.php | 73 +++++++++++++------ tests/HTMLPurifier/LanguageTest.php | 2 +- tests/HTMLPurifier/Lexer/DirectLexTest.php | 6 ++ 9 files changed, 149 insertions(+), 59 deletions(-) diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index 731e1dfd..e9e8219b 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -51,6 +51,7 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Strategy/Core.php'; require_once 'HTMLPurifier/Encoder.php'; +require_once 'HTMLPurifier/ErrorCollector.php'; require_once 'HTMLPurifier/LanguageFactory.php'; HTMLPurifier_ConfigSchema::define( @@ -146,8 +147,8 @@ class HTMLPurifier $language = $language_factory->create($config->get('Core', 'Language')); $context->register('Locale', $language); - $error_collector = new HTMLPurifier_ErrorCollector(); - $context->register('ErrorCollector', $language); + $error_collector = new HTMLPurifier_ErrorCollector($language); + $context->register('ErrorCollector', $error_collector); } $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); diff --git a/library/HTMLPurifier/ErrorCollector.php b/library/HTMLPurifier/ErrorCollector.php index ddad3d8d..762c8dd6 100644 --- a/library/HTMLPurifier/ErrorCollector.php +++ b/library/HTMLPurifier/ErrorCollector.php @@ -10,16 +10,31 @@ class HTMLPurifier_ErrorCollector { var $errors = array(); + var $locale; + + function HTMLPurifier_ErrorCollector(&$locale) {$this->locale =& $locale;} /** * Sends an error message to the collector for later use - * @param string Error message text - * @param int Error severity, PHP error style (don't use E_USER_) - * @param HTMLPurifier_Token Token that caused error - * @param array Tokens surrounding the offending token above, use true as placeholder + * @param $line Integer line number, or HTMLPurifier_Token that caused error + * @param $severity int Error severity, PHP error style (don't use E_USER_) + * @param $msg string Error message text */ - function send($msg, $severity, $token, $context_tokens = array(true)) { - $this->errors[] = array($msg, $severity, $token, $context_tokens); + function send($line, $severity, $msg) { + $token = false; + if ($line && !is_int($line)) { + $token = $line; + $line = $token->line; + if (!$line) $line = false; + } + if (func_num_args() == 3) $msg = $this->locale->getMessage($msg); + else { + $args = func_get_args(); + array_splice($args, 0, 2); // one less to make 1-based + unset($args[0]); + $msg = $this->locale->formatMessage($msg, $args); + } + $this->errors[] = array($line, $severity, $msg); } /** @@ -35,41 +50,40 @@ class HTMLPurifier_ErrorCollector * Default HTML formatting implementation for error messages * @param $config Configuration array, vital for HTML output nature */ - function getHTMLFormatted($config, &$context) { + function getHTMLFormatted($config) { $generator = new HTMLPurifier_Generator(); - $generator->generateFromTokens(array(), $config, $context); // initialize + $generator->generateFromTokens(array(), $config, $this->context); // initialize $ret = array(); $errors = $this->errors; - $locale = $context->get('Locale'); // sort error array by line if ($config->get('Core', 'MaintainLineNumbers')) { $lines = array(); - foreach ($errors as $error) $lines[] = $error[2]->line; + foreach ($errors as $error) { + $lines[] = $error[0]; + } array_multisort($lines, SORT_ASC, $errors); } foreach ($errors as $error) { - list($msg, $severity, $token, $context_tokens) = $error; + list($line, $severity, $msg) = $error; $string = ''; - $string .= $locale->getErrorName($severity) . ': '; + $string .= $this->locale->getErrorName($severity) . ': '; $string .= $generator->escape($msg); - if (!empty($token->line)) { - $string .= ' at line ' . $token->line; + if ($line) { + $string .= $this->locale->formatMessage( + 'ErrorCollector: At line', array('line' => $line)); } - $string .= ' ('; - foreach ($context_tokens as $context_token) { - if ($context_token !== true) { - $string .= $generator->escape($generator->generateFromToken($context_token)); - } else { - $string .= '' . $generator->escape($generator->generateFromToken($token)) . ''; - } - } - $string .= ')'; $ret[] = $string; } - return $ret; + + if (empty($errors)) { + return '

' . $this->locale->getMessage('ErrorCollector: No errors') . '

'; + } else { + return ''; + } + } } diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php index 51051c97..d7de2ab5 100644 --- a/library/HTMLPurifier/Language.php +++ b/library/HTMLPurifier/Language.php @@ -71,17 +71,16 @@ class HTMLPurifier_Language /** * Formats a localised message with passed parameters * @param $key string identifier of message - * @param $param Parameter to substitute in (arbitrary number) + * @param $args Parameters to substitute in * @return string localised message */ - function formatMessage($key) { + function formatMessage($key, $args = array()) { if (!$this->_loaded) $this->load(); if (!isset($this->messages[$key])) return "[$key]"; $raw = $this->messages[$key]; - $args = func_get_args(); $substitutions = array(); - for ($i = 1; $i < count($args); $i++) { - $substitutions['$' . $i] = $args[$i]; + foreach ($args as $i => $value) { + $substitutions['$' . $i] = $value; } return strtr($raw, $substitutions); } diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index d62a30cb..11c1d842 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -7,7 +7,14 @@ $messages = array( 'htmlpurifier' => 'HTML Purifier', 'pizza' => 'Pizza', // for unit testing purposes +'ErrorCollector: No errors' => 'No errors', +'ErrorCollector: At line' => ' at line $line', +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Missing attribute key' => 'Attribute declaration has no key', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', ); diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php index c8891ffd..4acbf0e4 100644 --- a/library/HTMLPurifier/LanguageFactory.php +++ b/library/HTMLPurifier/LanguageFactory.php @@ -24,7 +24,7 @@ class HTMLPurifier_LanguageFactory * variables to slurp out of a message file. * @value array list */ - var $keys = array('fallback', 'messages'); + var $keys = array('fallback', 'messages', 'errorNames'); /** * Instance of HTMLPurifier_AttrDef_Lang to validate language codes diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 892f7d64..1cdc1a64 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -44,12 +44,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $array = array(); // result array $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers'); - $current_line = 1; + if ($maintain_line_numbers) $current_line = 1; + else $current_line = false; + $context->register('CurrentLine', $current_line); $nl = PHP_EOL; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval'); + $e = $l = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + // infinite loop protection // has to be pretty big, since html docs can be big // we're allow two hundred thousand tags... more than enough? @@ -131,6 +138,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // uh oh, we have a comment that extends to // infinity. Can't be helped: set comment // end position to end of string + if ($e) $e->send($current_line, E_WARNING, 'Lexer: Unclosed comment'); $position_comment_end = strlen($html); $end = true; } else { @@ -173,6 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // have accidently grabbed an emoticon. Translate into // text and go our merry way if (!ctype_alnum($segment[0])) { + if ($e) $e->send($current_line, E_NOTICE, 'Lexer: Unescaped lt'); $token = new HTMLPurifier_Token_Text( '<' . @@ -251,6 +260,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $inside_tag = false; continue; } else { + // inside tag, but there's no ending > sign + if ($e) $e->send($current_line, E_WARNING, 'Lexer: Missing gt'); $token = new HTMLPurifier_Token_Text( '<' . @@ -265,6 +276,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } break; } + + $context->destroy('CurrentLine'); + return $array; } @@ -295,6 +309,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($string == '') return array(); // no attributes + $e = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + $current_line =& $context->get('CurrentLine'); + } + // let's see if we can abort as quickly as possible // one equal sign, no spaces => one attribute $num_equal = substr_count($string, '='); @@ -306,7 +326,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // only one attribute list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); - if (!$key) return array(); + if (!$key) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); + return array(); + } if (!$quoted_value) return array($key => ''); $first_char = @$quoted_value[0]; $last_char = @$quoted_value[strlen($quoted_value)-1]; @@ -320,6 +343,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } else { // not well behaved if ($open_quote) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing end quote'); $value = substr($quoted_value, 1); } else { $value = $quoted_value; @@ -343,7 +367,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer while(true) { // infinite loop protection - if (++$loops > 1000) return array(); + if (++$loops > 1000) { + trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING); + return array(); + } if ($cursor >= $size) { break; @@ -362,7 +389,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $key = substr($string, $key_begin, $key_end - $key_begin); - if (!$key) continue; // empty key + if (!$key) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); + $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + continue; // empty key + } // scroll past all whitespace $cursor += strspn($string, $this->_whitespace, $cursor); @@ -407,6 +438,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // boolattr if ($key !== '') { $array[$key] = $key; + } else { + // purely theoretical + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); } } diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php index f25579be..95c0578e 100644 --- a/tests/HTMLPurifier/ErrorCollectorTest.php +++ b/tests/HTMLPurifier/ErrorCollectorTest.php @@ -5,45 +5,74 @@ require_once 'HTMLPurifier/ErrorCollector.php'; class HTMLPurifier_ErrorCollectorTest extends UnitTestCase { + function setup() { + generate_mock_once('HTMLPurifier_Language'); + } + function test() { - $tok1 = new HTMLPurifier_Token_Text('Token that caused error'); - $tok1->line = 23; - $tok2 = new HTMLPurifier_Token_Start('a'); // also caused error - $tok2->line = 3; - $tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2 - $tok3->line = 3; - $tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2 - $tok4->line = 3; + $tok = new HTMLPurifier_Token_Start('a'); // also caused error + $tok->line = 3; - $collector = new HTMLPurifier_ErrorCollector(); - $collector->send('Big fat error', E_ERROR, $tok1); - $collector->send('Another ', E_WARNING, $tok2, array($tok3, true, $tok4)); + $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getErrorName', 'Error', array(E_ERROR)); + $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING)); + $language->setReturnValue('getMessage', 'Message 1', array('message-1')); + $language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param'))); + $language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23))); + $language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3))); + + $collector = new HTMLPurifier_ErrorCollector($language); + $collector->send(23, E_ERROR, 'message-1'); + $collector->send($tok, E_WARNING, 'message-2', 'param'); $result = array( - 0 => array('Big fat error', E_ERROR, $tok1, array(true)), - 1 => array('Another ', E_WARNING, $tok2, array($tok3, true, $tok4)) + 0 => array(23, E_ERROR, 'Message 1'), + 1 => array(3, E_WARNING, 'Message 2') ); $this->assertIdentical($collector->getRaw(), $result); - $formatted_result = array( - 0 => 'Warning: Another <warning> at line 3 (Context before<a>Context after)', - 1 => 'Error: Big fat error at line 23 (Token that caused error)' - ); + $formatted_result = + '
  • Warning: Message 2 at line 3
  • '. + '
  • Error: Message 1 at line 23
'; $config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true)); - $context = new HTMLPurifier_Context(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); - generate_mock_once('HTMLPurifier_Language'); + } + + function testNoErrors() { $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors')); + $collector = new HTMLPurifier_ErrorCollector($language); + $formatted_result = '

No errors

'; + $config = HTMLPurifier_Config::createDefault(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); + } + + function testNoLineNumbers() { + $token = new HTMLPurifier_Token_Start('a'); // no line number! + $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getMessage', 'Message 1', array('message-1')); + $language->setReturnValue('getMessage', 'Message 2', array('message-2')); $language->setReturnValue('getErrorName', 'Error', array(E_ERROR)); - $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING)); - $context->register('Locale', $language); + $collector = new HTMLPurifier_ErrorCollector($language); + $collector->send(false, E_ERROR, 'message-1'); + $collector->send($token, E_ERROR, 'message-2'); - $this->assertIdentical($collector->getHTMLFormatted($config, $context), $formatted_result); + $result = array( + 0 => array(false, E_ERROR, 'Message 1'), + 1 => array(false, E_ERROR, 'Message 2') + ); + $this->assertIdentical($collector->getRaw(), $result); + $formatted_result = + '
  • Error: Message 1
  • '. + '
  • Error: Message 2
'; + $config = HTMLPurifier_Config::createDefault(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); } } diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php index 21e55206..63f37fbd 100644 --- a/tests/HTMLPurifier/LanguageTest.php +++ b/tests/HTMLPurifier/LanguageTest.php @@ -19,7 +19,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase $lang = new HTMLPurifier_Language(); $lang->_loaded = true; $lang->messages['error'] = 'Error is $1 on line $2'; - $this->assertIdentical($lang->formatMessage('error', 'fatal', 32), 'Error is fatal on line 32'); + $this->assertIdentical($lang->formatMessage('error', array(1=>'fatal', 32)), 'Error is fatal on line 32'); } } diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php index 37c516f3..92e230c3 100644 --- a/tests/HTMLPurifier/Lexer/DirectLexTest.php +++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php @@ -53,6 +53,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase $input[10] = 'name="input" selected'; $expect[10] = array('name' => 'input', 'selected' => 'selected'); + $input[11] = '=""'; + $expect[11] = array(); + + $input[12] = '="" =""'; + $expect[12] = array('"' => ''); // tough to say, just don't throw a loop + $config = HTMLPurifier_Config::createDefault(); $context = new HTMLPurifier_Context(); $size = count($input);