0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-19 18:55:19 +00:00

[2.0.1] Start making more moves towards full error reporting. Revise message naming conventions. Fix variable assignment for error collecting. Revise Language interface to be as readable as possible (NOT compact). Add error reporting to DirectLex. Rewrite ErrorCollector.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1227 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-25 00:48:26 +00:00
parent 7b087c7bbe
commit 8ae2604440
9 changed files with 149 additions and 59 deletions

View File

@ -51,6 +51,7 @@ require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Strategy/Core.php';
require_once 'HTMLPurifier/Encoder.php';
require_once 'HTMLPurifier/ErrorCollector.php';
require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
@ -146,8 +147,8 @@ class HTMLPurifier
$language = $language_factory->create($config->get('Core', 'Language'));
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector();
$context->register('ErrorCollector', $language);
$error_collector = new HTMLPurifier_ErrorCollector($language);
$context->register('ErrorCollector', $error_collector);
}
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);

View File

@ -10,16 +10,31 @@ class HTMLPurifier_ErrorCollector
{
var $errors = array();
var $locale;
function HTMLPurifier_ErrorCollector(&$locale) {$this->locale =& $locale;}
/**
* Sends an error message to the collector for later use
* @param string Error message text
* @param int Error severity, PHP error style (don't use E_USER_)
* @param HTMLPurifier_Token Token that caused error
* @param array Tokens surrounding the offending token above, use true as placeholder
* @param $line Integer line number, or HTMLPurifier_Token that caused error
* @param $severity int Error severity, PHP error style (don't use E_USER_)
* @param $msg string Error message text
*/
function send($msg, $severity, $token, $context_tokens = array(true)) {
$this->errors[] = array($msg, $severity, $token, $context_tokens);
function send($line, $severity, $msg) {
$token = false;
if ($line && !is_int($line)) {
$token = $line;
$line = $token->line;
if (!$line) $line = false;
}
if (func_num_args() == 3) $msg = $this->locale->getMessage($msg);
else {
$args = func_get_args();
array_splice($args, 0, 2); // one less to make 1-based
unset($args[0]);
$msg = $this->locale->formatMessage($msg, $args);
}
$this->errors[] = array($line, $severity, $msg);
}
/**
@ -35,41 +50,40 @@ class HTMLPurifier_ErrorCollector
* Default HTML formatting implementation for error messages
* @param $config Configuration array, vital for HTML output nature
*/
function getHTMLFormatted($config, &$context) {
function getHTMLFormatted($config) {
$generator = new HTMLPurifier_Generator();
$generator->generateFromTokens(array(), $config, $context); // initialize
$generator->generateFromTokens(array(), $config, $this->context); // initialize
$ret = array();
$errors = $this->errors;
$locale = $context->get('Locale');
// sort error array by line
if ($config->get('Core', 'MaintainLineNumbers')) {
$lines = array();
foreach ($errors as $error) $lines[] = $error[2]->line;
foreach ($errors as $error) {
$lines[] = $error[0];
}
array_multisort($lines, SORT_ASC, $errors);
}
foreach ($errors as $error) {
list($msg, $severity, $token, $context_tokens) = $error;
list($line, $severity, $msg) = $error;
$string = '';
$string .= $locale->getErrorName($severity) . ': ';
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= $generator->escape($msg);
if (!empty($token->line)) {
$string .= ' at line ' . $token->line;
if ($line) {
$string .= $this->locale->formatMessage(
'ErrorCollector: At line', array('line' => $line));
}
$string .= ' (<code>';
foreach ($context_tokens as $context_token) {
if ($context_token !== true) {
$string .= $generator->escape($generator->generateFromToken($context_token));
} else {
$string .= '<strong>' . $generator->escape($generator->generateFromToken($token)) . '</strong>';
}
}
$string .= '</code>)';
$ret[] = $string;
}
return $ret;
if (empty($errors)) {
return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
} else {
return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
}
}
}

View File

@ -71,17 +71,16 @@ class HTMLPurifier_Language
/**
* Formats a localised message with passed parameters
* @param $key string identifier of message
* @param $param Parameter to substitute in (arbitrary number)
* @param $args Parameters to substitute in
* @return string localised message
*/
function formatMessage($key) {
function formatMessage($key, $args = array()) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return "[$key]";
$raw = $this->messages[$key];
$args = func_get_args();
$substitutions = array();
for ($i = 1; $i < count($args); $i++) {
$substitutions['$' . $i] = $args[$i];
foreach ($args as $i => $value) {
$substitutions['$' . $i] = $value;
}
return strtr($raw, $substitutions);
}

View File

@ -7,7 +7,14 @@ $messages = array(
'htmlpurifier' => 'HTML Purifier',
'pizza' => 'Pizza', // for unit testing purposes
'ErrorCollector: No errors' => 'No errors',
'ErrorCollector: At line' => ' at line $line',
'Lexer: Unclosed comment' => 'Unclosed comment',
'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be &lt;',
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
);

View File

@ -24,7 +24,7 @@ class HTMLPurifier_LanguageFactory
* variables to slurp out of a message file.
* @value array list
*/
var $keys = array('fallback', 'messages');
var $keys = array('fallback', 'messages', 'errorNames');
/**
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes

View File

@ -44,12 +44,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$array = array(); // result array
$maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
$current_line = 1;
if ($maintain_line_numbers) $current_line = 1;
else $current_line = false;
$context->register('CurrentLine', $current_line);
$nl = PHP_EOL;
// how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
$e = $l = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
// infinite loop protection
// has to be pretty big, since html docs can be big
// we're allow two hundred thousand tags... more than enough?
@ -131,6 +138,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// uh oh, we have a comment that extends to
// infinity. Can't be helped: set comment
// end position to end of string
if ($e) $e->send($current_line, E_WARNING, 'Lexer: Unclosed comment');
$position_comment_end = strlen($html);
$end = true;
} else {
@ -173,6 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// have accidently grabbed an emoticon. Translate into
// text and go our merry way
if (!ctype_alnum($segment[0])) {
if ($e) $e->send($current_line, E_NOTICE, 'Lexer: Unescaped lt');
$token = new
HTMLPurifier_Token_Text(
'<' .
@ -251,6 +260,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$inside_tag = false;
continue;
} else {
// inside tag, but there's no ending > sign
if ($e) $e->send($current_line, E_WARNING, 'Lexer: Missing gt');
$token = new
HTMLPurifier_Token_Text(
'<' .
@ -265,6 +276,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
break;
}
$context->destroy('CurrentLine');
return $array;
}
@ -295,6 +309,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($string == '') return array(); // no attributes
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
$current_line =& $context->get('CurrentLine');
}
// let's see if we can abort as quickly as possible
// one equal sign, no spaces => one attribute
$num_equal = substr_count($string, '=');
@ -306,7 +326,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// only one attribute
list($key, $quoted_value) = explode('=', $string);
$quoted_value = trim($quoted_value);
if (!$key) return array();
if (!$key) {
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
return array();
}
if (!$quoted_value) return array($key => '');
$first_char = @$quoted_value[0];
$last_char = @$quoted_value[strlen($quoted_value)-1];
@ -320,6 +343,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
} else {
// not well behaved
if ($open_quote) {
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing end quote');
$value = substr($quoted_value, 1);
} else {
$value = $quoted_value;
@ -343,7 +367,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
while(true) {
// infinite loop protection
if (++$loops > 1000) return array();
if (++$loops > 1000) {
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
return array();
}
if ($cursor >= $size) {
break;
@ -362,7 +389,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$key = substr($string, $key_begin, $key_end - $key_begin);
if (!$key) continue; // empty key
if (!$key) {
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
continue; // empty key
}
// scroll past all whitespace
$cursor += strspn($string, $this->_whitespace, $cursor);
@ -407,6 +438,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// boolattr
if ($key !== '') {
$array[$key] = $key;
} else {
// purely theoretical
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
}
}

View File

@ -5,45 +5,74 @@ require_once 'HTMLPurifier/ErrorCollector.php';
class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
{
function setup() {
generate_mock_once('HTMLPurifier_Language');
}
function test() {
$tok1 = new HTMLPurifier_Token_Text('Token that caused error');
$tok1->line = 23;
$tok2 = new HTMLPurifier_Token_Start('a'); // also caused error
$tok2->line = 3;
$tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2
$tok3->line = 3;
$tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2
$tok4->line = 3;
$tok = new HTMLPurifier_Token_Start('a'); // also caused error
$tok->line = 3;
$collector = new HTMLPurifier_ErrorCollector();
$collector->send('Big fat error', E_ERROR, $tok1);
$collector->send('Another <warning>', E_WARNING, $tok2, array($tok3, true, $tok4));
$language = new HTMLPurifier_LanguageMock();
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
$language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
$language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param')));
$language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23)));
$language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3)));
$collector = new HTMLPurifier_ErrorCollector($language);
$collector->send(23, E_ERROR, 'message-1');
$collector->send($tok, E_WARNING, 'message-2', 'param');
$result = array(
0 => array('Big fat error', E_ERROR, $tok1, array(true)),
1 => array('Another <warning>', E_WARNING, $tok2, array($tok3, true, $tok4))
0 => array(23, E_ERROR, 'Message 1'),
1 => array(3, E_WARNING, 'Message 2')
);
$this->assertIdentical($collector->getRaw(), $result);
$formatted_result = array(
0 => 'Warning: Another &lt;warning&gt; at line 3 (<code>Context before<strong>&lt;a&gt;</strong>Context after</code>)',
1 => 'Error: Big fat error at line 23 (<code><strong>Token that caused error</strong></code>)'
);
$formatted_result =
'<ul><li>Warning: Message 2 at line 3</li>'.
'<li>Error: Message 1 at line 23</li></ul>';
$config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
$context = new HTMLPurifier_Context();
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
generate_mock_once('HTMLPurifier_Language');
}
function testNoErrors() {
$language = new HTMLPurifier_LanguageMock();
$language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors'));
$collector = new HTMLPurifier_ErrorCollector($language);
$formatted_result = '<p>No errors</p>';
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
}
function testNoLineNumbers() {
$token = new HTMLPurifier_Token_Start('a'); // no line number!
$language = new HTMLPurifier_LanguageMock();
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
$language->setReturnValue('getMessage', 'Message 2', array('message-2'));
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
$language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
$context->register('Locale', $language);
$collector = new HTMLPurifier_ErrorCollector($language);
$collector->send(false, E_ERROR, 'message-1');
$collector->send($token, E_ERROR, 'message-2');
$this->assertIdentical($collector->getHTMLFormatted($config, $context), $formatted_result);
$result = array(
0 => array(false, E_ERROR, 'Message 1'),
1 => array(false, E_ERROR, 'Message 2')
);
$this->assertIdentical($collector->getRaw(), $result);
$formatted_result =
'<ul><li>Error: Message 1</li>'.
'<li>Error: Message 2</li></ul>';
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
}
}

View File

@ -19,7 +19,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
$lang = new HTMLPurifier_Language();
$lang->_loaded = true;
$lang->messages['error'] = 'Error is $1 on line $2';
$this->assertIdentical($lang->formatMessage('error', 'fatal', 32), 'Error is fatal on line 32');
$this->assertIdentical($lang->formatMessage('error', array(1=>'fatal', 32)), 'Error is fatal on line 32');
}
}

View File

@ -53,6 +53,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
$input[10] = 'name="input" selected';
$expect[10] = array('name' => 'input', 'selected' => 'selected');
$input[11] = '=""';
$expect[11] = array();
$input[12] = '="" =""';
$expect[12] = array('"' => ''); // tough to say, just don't throw a loop
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$size = count($input);