From 6a06b92f0c3c1ce57a1978ad64aa196660cd840f Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 15 Sep 2008 19:08:58 -0400 Subject: [PATCH] Setup ErrorCollector to maintain new error format, and output that HTML. Also changed: - DirectLex keeps track of column numbers in context - New class HTMLPurifier_ErrorStruct Signed-off-by: Edward Z. Yang --- configdoc/usage.xml | 43 ++++---- library/HTMLPurifier.includes.php | 1 + library/HTMLPurifier.safe-includes.php | 1 + library/HTMLPurifier/AttrValidator.php | 4 +- library/HTMLPurifier/ErrorCollector.php | 125 +++++++++++++++++----- library/HTMLPurifier/ErrorStruct.php | 58 ++++++++++ library/HTMLPurifier/Lexer/DirectLex.php | 2 + tests/HTMLPurifier/ErrorCollectorTest.php | 7 ++ 8 files changed, 187 insertions(+), 54 deletions(-) create mode 100644 library/HTMLPurifier/ErrorStruct.php diff --git a/configdoc/usage.xml b/configdoc/usage.xml index e10560d6..800f9418 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -5,12 +5,12 @@ 131 - 85 + 81 - 50 - 62 - 319 + 53 + 73 + 348 47 @@ -83,17 +83,6 @@ 304 - - - 148 - - - 82 - - - 45 - - 45 @@ -154,13 +143,13 @@ 202 - 233 + 252 27 - 34 + 36 23 @@ -178,17 +167,17 @@ - 224 + 221 - 229 + 226 - 232 + 229 @@ -203,12 +192,20 @@ - 70 + 76 + + + + + 80 + + + 48 - 242 + 261 @@ -361,7 +358,7 @@ - 59 + 70 diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 4d787e8b..2700b8df 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -41,6 +41,7 @@ require 'HTMLPurifier/Encoder.php'; require 'HTMLPurifier/EntityLookup.php'; require 'HTMLPurifier/EntityParser.php'; require 'HTMLPurifier/ErrorCollector.php'; +require 'HTMLPurifier/ErrorStruct.php'; require 'HTMLPurifier/Exception.php'; require 'HTMLPurifier/Filter.php'; require 'HTMLPurifier/Generator.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index 13d3e6d2..f91cdbd8 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -35,6 +35,7 @@ require_once $__dir . '/HTMLPurifier/Encoder.php'; require_once $__dir . '/HTMLPurifier/EntityLookup.php'; require_once $__dir . '/HTMLPurifier/EntityParser.php'; require_once $__dir . '/HTMLPurifier/ErrorCollector.php'; +require_once $__dir . '/HTMLPurifier/ErrorStruct.php'; require_once $__dir . '/HTMLPurifier/Exception.php'; require_once $__dir . '/HTMLPurifier/Filter.php'; require_once $__dir . '/HTMLPurifier/Generator.php'; diff --git a/library/HTMLPurifier/AttrValidator.php b/library/HTMLPurifier/AttrValidator.php index 9e1e8521..fb913fc0 100644 --- a/library/HTMLPurifier/AttrValidator.php +++ b/library/HTMLPurifier/AttrValidator.php @@ -35,8 +35,8 @@ class HTMLPurifier_AttrValidator if (!$current_token) $context->register('CurrentToken', $token); if ( - !$token instanceof HTMLPurifier_Token_Start && - !$token instanceof HTMLPurifier_Token_Empty + !$token instanceof HTMLPurifier_Token_Start && + !$token instanceof HTMLPurifier_Token_Empty ) return $token; // create alias to global definition array, see also $defs diff --git a/library/HTMLPurifier/ErrorCollector.php b/library/HTMLPurifier/ErrorCollector.php index 3e7a6eed..16cc89c6 100644 --- a/library/HTMLPurifier/ErrorCollector.php +++ b/library/HTMLPurifier/ErrorCollector.php @@ -23,6 +23,8 @@ class HTMLPurifier_ErrorCollector protected $generator; protected $context; + protected $lines = array(); + public function __construct($context) { $this->locale =& $context->get('Locale'); $this->context = $context; @@ -48,6 +50,7 @@ class HTMLPurifier_ErrorCollector $token = $this->context->get('CurrentToken', true); $line = $token ? $token->line : $this->context->get('CurrentLine', true); + $col = $token ? $token->col : $this->context->get('CurrentCol', true); $attr = $this->context->get('CurrentAttr', true); // perform special substitutions, also add custom parameters @@ -69,12 +72,58 @@ class HTMLPurifier_ErrorCollector if (!empty($subst)) $msg = strtr($msg, $subst); // (numerically indexed) - $this->_current[] = array( + $error = array( self::LINENO => $line, self::SEVERITY => $severity, self::MESSAGE => $msg, self::CHILDREN => array() ); + $this->_current[] = $error; + + + // NEW CODE BELOW ... + + $struct = null; + // Top-level errors are either: + // TOKEN type, if $value is set appropriately, or + // "syntax" type, if $value is null + $new_struct = new HTMLPurifier_ErrorStruct(); + $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; + if ($token) $new_struct->value = clone $token; + if (is_int($line) && is_int($col)) { + if (isset($this->lines[$line][$col])) { + $struct = $this->lines[$line][$col]; + } else { + $struct = $this->lines[$line][$col] = $new_struct; + } + // These ksorts may present a performance problem + ksort($this->lines[$line], SORT_NUMERIC); + } else { + if (isset($this->lines[-1])) { + $struct = $this->lines[-1]; + } else { + $struct = $this->lines[-1] = $new_struct; + } + } + ksort($this->lines, SORT_NUMERIC); + + // Now, check if we need to operate on a lower structure + if (!empty($attr)) { + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); + if (!$struct->value) { + $struct->value = array($attr, 'PUT VALUE HERE'); + } + } + if (!empty($cssprop)) { + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); + if (!$struct->value) { + // if we tokenize CSS this might be a little more difficult to do + $struct->value = array($cssprop, 'PUT VALUE HERE'); + } + } + + // Ok, structs are all setup, now time to register the error + $struct->addError($severity, $msg); } /** @@ -95,38 +144,20 @@ class HTMLPurifier_ErrorCollector public function getHTMLFormatted($config, $errors = null) { $ret = array(); - $generator = new HTMLPurifier_Generator($config, $this->context); + $this->generator = new HTMLPurifier_Generator($config, $this->context); if ($errors === null) $errors = $this->errors; - // sort error array by line - // line numbers are enabled if they aren't explicitly disabled - if ($config->get('Core', 'MaintainLineNumbers') !== false) { - $has_line = array(); - $lines = array(); - $original_order = array(); - foreach ($errors as $i => $error) { - $has_line[] = (int) (bool) $error[self::LINENO]; - $lines[] = $error[self::LINENO]; - $original_order[] = $i; - } - array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors); - } + // 'At line' message needs to be removed - foreach ($errors as $error) { - list($line, $severity, $msg, $children) = $error; - $string = ''; - $string .= '' . $this->locale->getErrorName($severity) . ': '; - $string .= $generator->escape($msg); - if ($line) { - // have javascript link generation that causes - // textarea to skip to the specified line - $string .= $this->locale->formatMessage( - 'ErrorCollector: At line', array('line' => $line)); + // generation code for new structure goes here. It needs to be recursive. + foreach ($this->lines as $line => $col_array) { + if ($line == -1) continue; + foreach ($col_array as $col => $struct) { + $this->_renderStruct($ret, $struct, $line, $col); } - if ($children) { - $string .= $this->getHTMLFormatted($config, $children); - } - $ret[] = $string; + } + if (isset($this->lines[-1])) { + $this->_renderStruct($ret, $this->lines[-1]); } if (empty($errors)) { @@ -137,5 +168,41 @@ class HTMLPurifier_ErrorCollector } + private function _renderStruct(&$ret, $struct, $line = null, $col = null) { + $stack = array($struct); + $context_stack = array(array()); + while ($current = array_pop($stack)) { + $context = array_pop($context_stack); + foreach ($current->errors as $error) { + list($severity, $msg) = $error; + $string = ''; + $string .= '
'; + // W3C uses an icon to indicate the severity of the error. + $error = $this->locale->getErrorName($severity); + $string .= "$error "; + if (!is_null($line) && !is_null($col)) { + $string .= "Line $line, Column $col: "; + } else { + $string .= 'End of Document: '; + } + $string .= '' . $this->generator->escape($msg) . ' '; + $string .= '
'; + // Here, have a marker for the character on the column appropriate. + // Be sure to clip extremely long lines. + //$string .= '
';
+                //$string .= '';
+                //$string .= '
'; + $ret[] = $string; + } + foreach ($current->children as $type => $array) { + $context[] = $current; + $stack = array_merge($stack, array_reverse($array, true)); + for ($i = count($array); $i > 0; $i--) { + $context_stack[] = $context; + } + } + } + } + } diff --git a/library/HTMLPurifier/ErrorStruct.php b/library/HTMLPurifier/ErrorStruct.php new file mode 100644 index 00000000..9da712b7 --- /dev/null +++ b/library/HTMLPurifier/ErrorStruct.php @@ -0,0 +1,58 @@ +children[$type][$id])) { + $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); + $this->children[$type][$id]->type = $type; + } + return $this->children[$type][$id]; + } + + public function addError($severity, $message) { + $this->errors[] = array($severity, $message); + } + +} diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 1f92b0cb..45ddc876 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -63,6 +63,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $length = false; } $context->register('CurrentLine', $current_line); + $context->register('CurrentCol', $current_col); $nl = "\n"; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off @@ -312,6 +313,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } $context->destroy('CurrentLine'); + $context->destroy('CurrentCol'); return $array; } diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php index 977a9758..0fbd2e34 100644 --- a/tests/HTMLPurifier/ErrorCollectorTest.php +++ b/tests/HTMLPurifier/ErrorCollectorTest.php @@ -1,5 +1,8 @@ assertIdentical($this->collector->getRaw(), $result); + /* $formatted_result = '
  • Warning: Message 2 at line 3
  • '. '
  • Error: Message 1 at line 23
'; $this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result); + */ } @@ -75,10 +80,12 @@ class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness ); $this->assertIdentical($this->collector->getRaw(), $result); + /* $formatted_result = '
  • Error: Message 1
  • '. '
  • Error: Message 2
'; $this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result); + */ } function testContextSubstitutions() {