0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00

Setup ErrorCollector to maintain new error format, and output that HTML.

Also changed:
    - DirectLex keeps track of column numbers in context
    - New class HTMLPurifier_ErrorStruct

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-09-15 19:08:58 -04:00
parent 3184fee468
commit 6a06b92f0c
8 changed files with 187 additions and 54 deletions

View File

@ -5,12 +5,12 @@
<line>131</line> <line>131</line>
</file> </file>
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>85</line> <line>81</line>
</file> </file>
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>50</line> <line>53</line>
<line>62</line> <line>73</line>
<line>319</line> <line>348</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>47</line> <line>47</line>
@ -83,17 +83,6 @@
<line>304</line> <line>304</line>
</file> </file>
</directive> </directive>
<directive id="Core.MaintainLineNumbers">
<file name="HTMLPurifier/ErrorCollector.php">
<line>148</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>82</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>45</line>
</file>
</directive>
<directive id="Output.CommentScriptContents"> <directive id="Output.CommentScriptContents">
<file name="HTMLPurifier/Generator.php"> <file name="HTMLPurifier/Generator.php">
<line>45</line> <line>45</line>
@ -154,13 +143,13 @@
<line>202</line> <line>202</line>
</file> </file>
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>233</line> <line>252</line>
</file> </file>
<file name="HTMLPurifier/HTMLModule/Image.php"> <file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line> <line>27</line>
</file> </file>
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>34</line> <line>36</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>23</line> <line>23</line>
@ -178,17 +167,17 @@
</directive> </directive>
<directive id="HTML.Proprietary"> <directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>224</line> <line>221</line>
</file> </file>
</directive> </directive>
<directive id="HTML.SafeObject"> <directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line> <line>226</line>
</file> </file>
</directive> </directive>
<directive id="HTML.SafeEmbed"> <directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>232</line> <line>229</line>
</file> </file>
</directive> </directive>
<directive id="Attr.IDBlacklist"> <directive id="Attr.IDBlacklist">
@ -203,12 +192,20 @@
</directive> </directive>
<directive id="Core.LexerImpl"> <directive id="Core.LexerImpl">
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>70</line> <line>76</line>
</file>
</directive>
<directive id="Core.MaintainLineNumbers">
<file name="HTMLPurifier/Lexer.php">
<line>80</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>48</line>
</file> </file>
</directive> </directive>
<directive id="Core.ConvertDocumentToFragment"> <directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>242</line> <line>261</line>
</file> </file>
</directive> </directive>
<directive id="URI.Host"> <directive id="URI.Host">
@ -361,7 +358,7 @@
</directive> </directive>
<directive id="Core.DirectLexLineNumberSyncInterval"> <directive id="Core.DirectLexLineNumberSyncInterval">
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>59</line> <line>70</line>
</file> </file>
</directive> </directive>
<directive id="Core.EscapeInvalidTags"> <directive id="Core.EscapeInvalidTags">

View File

@ -41,6 +41,7 @@ require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php'; require 'HTMLPurifier/EntityLookup.php';
require 'HTMLPurifier/EntityParser.php'; require 'HTMLPurifier/EntityParser.php';
require 'HTMLPurifier/ErrorCollector.php'; require 'HTMLPurifier/ErrorCollector.php';
require 'HTMLPurifier/ErrorStruct.php';
require 'HTMLPurifier/Exception.php'; require 'HTMLPurifier/Exception.php';
require 'HTMLPurifier/Filter.php'; require 'HTMLPurifier/Filter.php';
require 'HTMLPurifier/Generator.php'; require 'HTMLPurifier/Generator.php';

View File

@ -35,6 +35,7 @@ require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php'; require_once $__dir . '/HTMLPurifier/EntityLookup.php';
require_once $__dir . '/HTMLPurifier/EntityParser.php'; require_once $__dir . '/HTMLPurifier/EntityParser.php';
require_once $__dir . '/HTMLPurifier/ErrorCollector.php'; require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
require_once $__dir . '/HTMLPurifier/Exception.php'; require_once $__dir . '/HTMLPurifier/Exception.php';
require_once $__dir . '/HTMLPurifier/Filter.php'; require_once $__dir . '/HTMLPurifier/Filter.php';
require_once $__dir . '/HTMLPurifier/Generator.php'; require_once $__dir . '/HTMLPurifier/Generator.php';

View File

@ -23,6 +23,8 @@ class HTMLPurifier_ErrorCollector
protected $generator; protected $generator;
protected $context; protected $context;
protected $lines = array();
public function __construct($context) { public function __construct($context) {
$this->locale =& $context->get('Locale'); $this->locale =& $context->get('Locale');
$this->context = $context; $this->context = $context;
@ -48,6 +50,7 @@ class HTMLPurifier_ErrorCollector
$token = $this->context->get('CurrentToken', true); $token = $this->context->get('CurrentToken', true);
$line = $token ? $token->line : $this->context->get('CurrentLine', true); $line = $token ? $token->line : $this->context->get('CurrentLine', true);
$col = $token ? $token->col : $this->context->get('CurrentCol', true);
$attr = $this->context->get('CurrentAttr', true); $attr = $this->context->get('CurrentAttr', true);
// perform special substitutions, also add custom parameters // perform special substitutions, also add custom parameters
@ -69,12 +72,58 @@ class HTMLPurifier_ErrorCollector
if (!empty($subst)) $msg = strtr($msg, $subst); if (!empty($subst)) $msg = strtr($msg, $subst);
// (numerically indexed) // (numerically indexed)
$this->_current[] = array( $error = array(
self::LINENO => $line, self::LINENO => $line,
self::SEVERITY => $severity, self::SEVERITY => $severity,
self::MESSAGE => $msg, self::MESSAGE => $msg,
self::CHILDREN => array() self::CHILDREN => array()
); );
$this->_current[] = $error;
// NEW CODE BELOW ...
$struct = null;
// Top-level errors are either:
// TOKEN type, if $value is set appropriately, or
// "syntax" type, if $value is null
$new_struct = new HTMLPurifier_ErrorStruct();
$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
if ($token) $new_struct->value = clone $token;
if (is_int($line) && is_int($col)) {
if (isset($this->lines[$line][$col])) {
$struct = $this->lines[$line][$col];
} else {
$struct = $this->lines[$line][$col] = $new_struct;
}
// These ksorts may present a performance problem
ksort($this->lines[$line], SORT_NUMERIC);
} else {
if (isset($this->lines[-1])) {
$struct = $this->lines[-1];
} else {
$struct = $this->lines[-1] = $new_struct;
}
}
ksort($this->lines, SORT_NUMERIC);
// Now, check if we need to operate on a lower structure
if (!empty($attr)) {
$struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
if (!$struct->value) {
$struct->value = array($attr, 'PUT VALUE HERE');
}
}
if (!empty($cssprop)) {
$struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
if (!$struct->value) {
// if we tokenize CSS this might be a little more difficult to do
$struct->value = array($cssprop, 'PUT VALUE HERE');
}
}
// Ok, structs are all setup, now time to register the error
$struct->addError($severity, $msg);
} }
/** /**
@ -95,38 +144,20 @@ class HTMLPurifier_ErrorCollector
public function getHTMLFormatted($config, $errors = null) { public function getHTMLFormatted($config, $errors = null) {
$ret = array(); $ret = array();
$generator = new HTMLPurifier_Generator($config, $this->context); $this->generator = new HTMLPurifier_Generator($config, $this->context);
if ($errors === null) $errors = $this->errors; if ($errors === null) $errors = $this->errors;
// sort error array by line // 'At line' message needs to be removed
// line numbers are enabled if they aren't explicitly disabled
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
$has_line = array();
$lines = array();
$original_order = array();
foreach ($errors as $i => $error) {
$has_line[] = (int) (bool) $error[self::LINENO];
$lines[] = $error[self::LINENO];
$original_order[] = $i;
}
array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors);
}
foreach ($errors as $error) { // generation code for new structure goes here. It needs to be recursive.
list($line, $severity, $msg, $children) = $error; foreach ($this->lines as $line => $col_array) {
$string = ''; if ($line == -1) continue;
$string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: '; foreach ($col_array as $col => $struct) {
$string .= $generator->escape($msg); $this->_renderStruct($ret, $struct, $line, $col);
if ($line) {
// have javascript link generation that causes
// textarea to skip to the specified line
$string .= $this->locale->formatMessage(
'ErrorCollector: At line', array('line' => $line));
} }
if ($children) {
$string .= $this->getHTMLFormatted($config, $children);
} }
$ret[] = $string; if (isset($this->lines[-1])) {
$this->_renderStruct($ret, $this->lines[-1]);
} }
if (empty($errors)) { if (empty($errors)) {
@ -137,5 +168,41 @@ class HTMLPurifier_ErrorCollector
} }
private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
$stack = array($struct);
$context_stack = array(array());
while ($current = array_pop($stack)) {
$context = array_pop($context_stack);
foreach ($current->errors as $error) {
list($severity, $msg) = $error;
$string = '';
$string .= '<div>';
// W3C uses an icon to indicate the severity of the error.
$error = $this->locale->getErrorName($severity);
$string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
if (!is_null($line) && !is_null($col)) {
$string .= "<em class=\"location\">Line $line, Column $col: </em> ";
} else {
$string .= '<em class="location">End of Document: </em> ';
}
$string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
$string .= '</div>';
// Here, have a marker for the character on the column appropriate.
// Be sure to clip extremely long lines.
//$string .= '<pre>';
//$string .= '';
//$string .= '</pre>';
$ret[] = $string;
}
foreach ($current->children as $type => $array) {
$context[] = $current;
$stack = array_merge($stack, array_reverse($array, true));
for ($i = count($array); $i > 0; $i--) {
$context_stack[] = $context;
}
}
}
}
} }

View File

@ -0,0 +1,58 @@
<?php
/**
* Records errors for particular segments of an HTML document such as tokens,
* attributes or CSS properties. They can contain error structs (which apply
* to components of what they represent), but their main purpose is to hold
* errors applying to whatever struct is being used.
*/
class HTMLPurifier_ErrorStruct
{
/**
* Possible values for $children first-key. Note that top-level structures
* are automatically token-level.
*/
const TOKEN = 0;
const ATTR = 1;
const CSSPROP = 2;
/**
* Type of this struct.
*/
public $type;
/**
* Value of the struct we are recording errors for. There are various
* values for this:
* - TOKEN: Instance of HTMLPurifier_Token
* - ATTR: array('attr-name', 'value')
* - CSSPROP: array('prop-name', 'value')
*/
public $value;
/**
* Errors registered for this structure.
*/
public $errors = array();
/**
* Child ErrorStructs that are from this structure. For example, a TOKEN
* ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional
* array in structure: [TYPE]['identifier']
*/
public $children = array();
public function getChild($type, $id) {
if (!isset($this->children[$type][$id])) {
$this->children[$type][$id] = new HTMLPurifier_ErrorStruct();
$this->children[$type][$id]->type = $type;
}
return $this->children[$type][$id];
}
public function addError($severity, $message) {
$this->errors[] = array($severity, $message);
}
}

View File

@ -63,6 +63,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$length = false; $length = false;
} }
$context->register('CurrentLine', $current_line); $context->register('CurrentLine', $current_line);
$context->register('CurrentCol', $current_col);
$nl = "\n"; $nl = "\n";
// how often to manually recalculate. This will ALWAYS be right, // how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off // but it's pretty wasteful. Set to 0 to turn off
@ -312,6 +313,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
} }
$context->destroy('CurrentLine'); $context->destroy('CurrentLine');
$context->destroy('CurrentCol');
return $array; return $array;
} }

View File

@ -1,5 +1,8 @@
<?php <?php
/**
* @warning HTML output is in flux, but eventually needs to be stabilized.
*/
class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness
{ {
@ -44,11 +47,13 @@ class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness
$this->assertIdentical($this->collector->getRaw(), $result); $this->assertIdentical($this->collector->getRaw(), $result);
/*
$formatted_result = $formatted_result =
'<ul><li><strong>Warning</strong>: Message 2 at line 3</li>'. '<ul><li><strong>Warning</strong>: Message 2 at line 3</li>'.
'<li><strong>Error</strong>: Message 1 at line 23</li></ul>'; '<li><strong>Error</strong>: Message 1 at line 23</li></ul>';
$this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result); $this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result);
*/
} }
@ -75,10 +80,12 @@ class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness
); );
$this->assertIdentical($this->collector->getRaw(), $result); $this->assertIdentical($this->collector->getRaw(), $result);
/*
$formatted_result = $formatted_result =
'<ul><li><strong>Error</strong>: Message 1</li>'. '<ul><li><strong>Error</strong>: Message 1</li>'.
'<li><strong>Error</strong>: Message 2</li></ul>'; '<li><strong>Error</strong>: Message 2</li></ul>';
$this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result); $this->assertIdentical($this->collector->getHTMLFormatted($this->config), $formatted_result);
*/
} }
function testContextSubstitutions() { function testContextSubstitutions() {