mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-11-09 23:28:42 +00:00
[2.0.1] Start making more moves towards full error reporting. Revise message naming conventions. Fix variable assignment for error collecting. Revise Language interface to be as readable as possible (NOT compact). Add error reporting to DirectLex. Rewrite ErrorCollector.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1227 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
7b087c7bbe
commit
8ae2604440
@ -51,6 +51,7 @@ require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
require_once 'HTMLPurifier/ErrorCollector.php';
|
||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@ -146,8 +147,8 @@ class HTMLPurifier
|
||||
$language = $language_factory->create($config->get('Core', 'Language'));
|
||||
$context->register('Locale', $language);
|
||||
|
||||
$error_collector = new HTMLPurifier_ErrorCollector();
|
||||
$context->register('ErrorCollector', $language);
|
||||
$error_collector = new HTMLPurifier_ErrorCollector($language);
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
@ -10,16 +10,31 @@ class HTMLPurifier_ErrorCollector
|
||||
{
|
||||
|
||||
var $errors = array();
|
||||
var $locale;
|
||||
|
||||
function HTMLPurifier_ErrorCollector(&$locale) {$this->locale =& $locale;}
|
||||
|
||||
/**
|
||||
* Sends an error message to the collector for later use
|
||||
* @param string Error message text
|
||||
* @param int Error severity, PHP error style (don't use E_USER_)
|
||||
* @param HTMLPurifier_Token Token that caused error
|
||||
* @param array Tokens surrounding the offending token above, use true as placeholder
|
||||
* @param $line Integer line number, or HTMLPurifier_Token that caused error
|
||||
* @param $severity int Error severity, PHP error style (don't use E_USER_)
|
||||
* @param $msg string Error message text
|
||||
*/
|
||||
function send($msg, $severity, $token, $context_tokens = array(true)) {
|
||||
$this->errors[] = array($msg, $severity, $token, $context_tokens);
|
||||
function send($line, $severity, $msg) {
|
||||
$token = false;
|
||||
if ($line && !is_int($line)) {
|
||||
$token = $line;
|
||||
$line = $token->line;
|
||||
if (!$line) $line = false;
|
||||
}
|
||||
if (func_num_args() == 3) $msg = $this->locale->getMessage($msg);
|
||||
else {
|
||||
$args = func_get_args();
|
||||
array_splice($args, 0, 2); // one less to make 1-based
|
||||
unset($args[0]);
|
||||
$msg = $this->locale->formatMessage($msg, $args);
|
||||
}
|
||||
$this->errors[] = array($line, $severity, $msg);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -35,41 +50,40 @@ class HTMLPurifier_ErrorCollector
|
||||
* Default HTML formatting implementation for error messages
|
||||
* @param $config Configuration array, vital for HTML output nature
|
||||
*/
|
||||
function getHTMLFormatted($config, &$context) {
|
||||
function getHTMLFormatted($config) {
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
$generator->generateFromTokens(array(), $config, $context); // initialize
|
||||
$generator->generateFromTokens(array(), $config, $this->context); // initialize
|
||||
$ret = array();
|
||||
|
||||
$errors = $this->errors;
|
||||
$locale = $context->get('Locale');
|
||||
|
||||
// sort error array by line
|
||||
if ($config->get('Core', 'MaintainLineNumbers')) {
|
||||
$lines = array();
|
||||
foreach ($errors as $error) $lines[] = $error[2]->line;
|
||||
foreach ($errors as $error) {
|
||||
$lines[] = $error[0];
|
||||
}
|
||||
array_multisort($lines, SORT_ASC, $errors);
|
||||
}
|
||||
|
||||
foreach ($errors as $error) {
|
||||
list($msg, $severity, $token, $context_tokens) = $error;
|
||||
list($line, $severity, $msg) = $error;
|
||||
$string = '';
|
||||
$string .= $locale->getErrorName($severity) . ': ';
|
||||
$string .= $this->locale->getErrorName($severity) . ': ';
|
||||
$string .= $generator->escape($msg);
|
||||
if (!empty($token->line)) {
|
||||
$string .= ' at line ' . $token->line;
|
||||
if ($line) {
|
||||
$string .= $this->locale->formatMessage(
|
||||
'ErrorCollector: At line', array('line' => $line));
|
||||
}
|
||||
$string .= ' (<code>';
|
||||
foreach ($context_tokens as $context_token) {
|
||||
if ($context_token !== true) {
|
||||
$string .= $generator->escape($generator->generateFromToken($context_token));
|
||||
} else {
|
||||
$string .= '<strong>' . $generator->escape($generator->generateFromToken($token)) . '</strong>';
|
||||
}
|
||||
}
|
||||
$string .= '</code>)';
|
||||
$ret[] = $string;
|
||||
}
|
||||
return $ret;
|
||||
|
||||
if (empty($errors)) {
|
||||
return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
|
||||
} else {
|
||||
return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -71,17 +71,16 @@ class HTMLPurifier_Language
|
||||
/**
|
||||
* Formats a localised message with passed parameters
|
||||
* @param $key string identifier of message
|
||||
* @param $param Parameter to substitute in (arbitrary number)
|
||||
* @param $args Parameters to substitute in
|
||||
* @return string localised message
|
||||
*/
|
||||
function formatMessage($key) {
|
||||
function formatMessage($key, $args = array()) {
|
||||
if (!$this->_loaded) $this->load();
|
||||
if (!isset($this->messages[$key])) return "[$key]";
|
||||
$raw = $this->messages[$key];
|
||||
$args = func_get_args();
|
||||
$substitutions = array();
|
||||
for ($i = 1; $i < count($args); $i++) {
|
||||
$substitutions['$' . $i] = $args[$i];
|
||||
foreach ($args as $i => $value) {
|
||||
$substitutions['$' . $i] = $value;
|
||||
}
|
||||
return strtr($raw, $substitutions);
|
||||
}
|
||||
|
@ -7,7 +7,14 @@ $messages = array(
|
||||
'htmlpurifier' => 'HTML Purifier',
|
||||
'pizza' => 'Pizza', // for unit testing purposes
|
||||
|
||||
'ErrorCollector: No errors' => 'No errors',
|
||||
'ErrorCollector: At line' => ' at line $line',
|
||||
|
||||
'Lexer: Unclosed comment' => 'Unclosed comment',
|
||||
'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <',
|
||||
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
|
||||
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
|
||||
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
|
||||
|
||||
);
|
||||
|
||||
|
@ -24,7 +24,7 @@ class HTMLPurifier_LanguageFactory
|
||||
* variables to slurp out of a message file.
|
||||
* @value array list
|
||||
*/
|
||||
var $keys = array('fallback', 'messages');
|
||||
var $keys = array('fallback', 'messages', 'errorNames');
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
|
||||
|
@ -44,12 +44,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$array = array(); // result array
|
||||
|
||||
$maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
|
||||
$current_line = 1;
|
||||
if ($maintain_line_numbers) $current_line = 1;
|
||||
else $current_line = false;
|
||||
$context->register('CurrentLine', $current_line);
|
||||
$nl = PHP_EOL;
|
||||
// how often to manually recalculate. This will ALWAYS be right,
|
||||
// but it's pretty wasteful. Set to 0 to turn off
|
||||
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
|
||||
|
||||
$e = $l = false;
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
$e =& $context->get('ErrorCollector');
|
||||
}
|
||||
|
||||
// infinite loop protection
|
||||
// has to be pretty big, since html docs can be big
|
||||
// we're allow two hundred thousand tags... more than enough?
|
||||
@ -131,6 +138,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// uh oh, we have a comment that extends to
|
||||
// infinity. Can't be helped: set comment
|
||||
// end position to end of string
|
||||
if ($e) $e->send($current_line, E_WARNING, 'Lexer: Unclosed comment');
|
||||
$position_comment_end = strlen($html);
|
||||
$end = true;
|
||||
} else {
|
||||
@ -173,6 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// have accidently grabbed an emoticon. Translate into
|
||||
// text and go our merry way
|
||||
if (!ctype_alnum($segment[0])) {
|
||||
if ($e) $e->send($current_line, E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
@ -251,6 +260,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$inside_tag = false;
|
||||
continue;
|
||||
} else {
|
||||
// inside tag, but there's no ending > sign
|
||||
if ($e) $e->send($current_line, E_WARNING, 'Lexer: Missing gt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
@ -265,6 +276,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$context->destroy('CurrentLine');
|
||||
|
||||
return $array;
|
||||
}
|
||||
|
||||
@ -295,6 +309,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
|
||||
if ($string == '') return array(); // no attributes
|
||||
|
||||
$e = false;
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
$e =& $context->get('ErrorCollector');
|
||||
$current_line =& $context->get('CurrentLine');
|
||||
}
|
||||
|
||||
// let's see if we can abort as quickly as possible
|
||||
// one equal sign, no spaces => one attribute
|
||||
$num_equal = substr_count($string, '=');
|
||||
@ -306,7 +326,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// only one attribute
|
||||
list($key, $quoted_value) = explode('=', $string);
|
||||
$quoted_value = trim($quoted_value);
|
||||
if (!$key) return array();
|
||||
if (!$key) {
|
||||
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
|
||||
return array();
|
||||
}
|
||||
if (!$quoted_value) return array($key => '');
|
||||
$first_char = @$quoted_value[0];
|
||||
$last_char = @$quoted_value[strlen($quoted_value)-1];
|
||||
@ -320,6 +343,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
} else {
|
||||
// not well behaved
|
||||
if ($open_quote) {
|
||||
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing end quote');
|
||||
$value = substr($quoted_value, 1);
|
||||
} else {
|
||||
$value = $quoted_value;
|
||||
@ -343,7 +367,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
while(true) {
|
||||
|
||||
// infinite loop protection
|
||||
if (++$loops > 1000) return array();
|
||||
if (++$loops > 1000) {
|
||||
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
|
||||
return array();
|
||||
}
|
||||
|
||||
if ($cursor >= $size) {
|
||||
break;
|
||||
@ -362,7 +389,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
|
||||
$key = substr($string, $key_begin, $key_end - $key_begin);
|
||||
|
||||
if (!$key) continue; // empty key
|
||||
if (!$key) {
|
||||
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
|
||||
$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
|
||||
continue; // empty key
|
||||
}
|
||||
|
||||
// scroll past all whitespace
|
||||
$cursor += strspn($string, $this->_whitespace, $cursor);
|
||||
@ -407,6 +438,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// boolattr
|
||||
if ($key !== '') {
|
||||
$array[$key] = $key;
|
||||
} else {
|
||||
// purely theoretical
|
||||
if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,45 +5,74 @@ require_once 'HTMLPurifier/ErrorCollector.php';
|
||||
class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
|
||||
{
|
||||
|
||||
function setup() {
|
||||
generate_mock_once('HTMLPurifier_Language');
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$tok1 = new HTMLPurifier_Token_Text('Token that caused error');
|
||||
$tok1->line = 23;
|
||||
$tok2 = new HTMLPurifier_Token_Start('a'); // also caused error
|
||||
$tok2->line = 3;
|
||||
$tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2
|
||||
$tok3->line = 3;
|
||||
$tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2
|
||||
$tok4->line = 3;
|
||||
$tok = new HTMLPurifier_Token_Start('a'); // also caused error
|
||||
$tok->line = 3;
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector();
|
||||
$collector->send('Big fat error', E_ERROR, $tok1);
|
||||
$collector->send('Another <warning>', E_WARNING, $tok2, array($tok3, true, $tok4));
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
|
||||
$language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
|
||||
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param')));
|
||||
$language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23)));
|
||||
$language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3)));
|
||||
|
||||
$collector = new HTMLPurifier_ErrorCollector($language);
|
||||
$collector->send(23, E_ERROR, 'message-1');
|
||||
$collector->send($tok, E_WARNING, 'message-2', 'param');
|
||||
|
||||
$result = array(
|
||||
0 => array('Big fat error', E_ERROR, $tok1, array(true)),
|
||||
1 => array('Another <warning>', E_WARNING, $tok2, array($tok3, true, $tok4))
|
||||
0 => array(23, E_ERROR, 'Message 1'),
|
||||
1 => array(3, E_WARNING, 'Message 2')
|
||||
);
|
||||
|
||||
$this->assertIdentical($collector->getRaw(), $result);
|
||||
|
||||
$formatted_result = array(
|
||||
0 => 'Warning: Another <warning> at line 3 (<code>Context before<strong><a></strong>Context after</code>)',
|
||||
1 => 'Error: Big fat error at line 23 (<code><strong>Token that caused error</strong></code>)'
|
||||
);
|
||||
$formatted_result =
|
||||
'<ul><li>Warning: Message 2 at line 3</li>'.
|
||||
'<li>Error: Message 1 at line 23</li></ul>';
|
||||
|
||||
$config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
|
||||
|
||||
generate_mock_once('HTMLPurifier_Language');
|
||||
}
|
||||
|
||||
function testNoErrors() {
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors'));
|
||||
$collector = new HTMLPurifier_ErrorCollector($language);
|
||||
$formatted_result = '<p>No errors</p>';
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
|
||||
}
|
||||
|
||||
function testNoLineNumbers() {
|
||||
$token = new HTMLPurifier_Token_Start('a'); // no line number!
|
||||
$language = new HTMLPurifier_LanguageMock();
|
||||
$language->setReturnValue('getMessage', 'Message 1', array('message-1'));
|
||||
$language->setReturnValue('getMessage', 'Message 2', array('message-2'));
|
||||
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
|
||||
$language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
|
||||
$context->register('Locale', $language);
|
||||
$collector = new HTMLPurifier_ErrorCollector($language);
|
||||
$collector->send(false, E_ERROR, 'message-1');
|
||||
$collector->send($token, E_ERROR, 'message-2');
|
||||
|
||||
$this->assertIdentical($collector->getHTMLFormatted($config, $context), $formatted_result);
|
||||
$result = array(
|
||||
0 => array(false, E_ERROR, 'Message 1'),
|
||||
1 => array(false, E_ERROR, 'Message 2')
|
||||
);
|
||||
$this->assertIdentical($collector->getRaw(), $result);
|
||||
|
||||
$formatted_result =
|
||||
'<ul><li>Error: Message 1</li>'.
|
||||
'<li>Error: Message 2</li></ul>';
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
|
||||
$lang = new HTMLPurifier_Language();
|
||||
$lang->_loaded = true;
|
||||
$lang->messages['error'] = 'Error is $1 on line $2';
|
||||
$this->assertIdentical($lang->formatMessage('error', 'fatal', 32), 'Error is fatal on line 32');
|
||||
$this->assertIdentical($lang->formatMessage('error', array(1=>'fatal', 32)), 'Error is fatal on line 32');
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -53,6 +53,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
|
||||
$input[10] = 'name="input" selected';
|
||||
$expect[10] = array('name' => 'input', 'selected' => 'selected');
|
||||
|
||||
$input[11] = '=""';
|
||||
$expect[11] = array();
|
||||
|
||||
$input[12] = '="" =""';
|
||||
$expect[12] = array('"' => ''); // tough to say, just don't throw a loop
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
$size = count($input);
|
||||
|
Loading…
Reference in New Issue
Block a user