From 0e5491b20c04aa3aeb52a9a27c7db372745f63bd Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 18 Jun 2007 03:05:18 +0000 Subject: [PATCH] [1.7.0] Wire in Language and ErrorCollector to main class, now, the only thing to do is actually implement the stuff git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1157 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 + library/HTMLPurifier.php | 29 ++++++++ library/HTMLPurifier/ErrorCollector.php | 73 +++++++++++++++++++ library/HTMLPurifier/Language.php | 2 - library/HTMLPurifier/Language/messages/en.php | 2 + library/HTMLPurifier/LanguageFactory.php | 2 - tests/HTMLPurifier/ErrorCollectorTest.php | 42 +++++++++++ tests/test_files.php | 1 + 8 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 library/HTMLPurifier/ErrorCollector.php create mode 100644 tests/HTMLPurifier/ErrorCollectorTest.php diff --git a/NEWS b/NEWS index def75d62..645ce91f 100644 --- a/NEWS +++ b/NEWS @@ -54,6 +54,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier PHP5 only version. . HTMLDefinition and CSSDefinition have a common parent class: Definition. . DirectLex can now track line-numbers +. Preliminary error collector is in place, although no code actually reports + errors yet 1.6.1, released 2007-05-05 ! Support for more deprecated attributes via transformations: diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index f21f199c..9ba6929b 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -51,6 +51,23 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Strategy/Core.php'; require_once 'HTMLPurifier/Encoder.php'; +require_once 'HTMLPurifier/LanguageFactory.php'; + +HTMLPurifier_ConfigSchema::define( + 'Core', 'Language', 'en', 'string', ' +ISO 639 language code for localizable things in HTML Purifier to use, +which is mainly error reporting. There is currently only an English (en) +translation, so this directive is currently useless. +This directive has been available since 1.7.0. +'); + +HTMLPurifier_ConfigSchema::define( + 'Core', 'CollectErrors', false, 'bool', ' +Whether or not to collect errors found while filtering the document. This +is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED. +This directive has been available since 1.7.0. +'); + /** * Main library execution class. * @@ -121,6 +138,18 @@ class HTMLPurifier $lexer = HTMLPurifier_Lexer::create($config); $context = new HTMLPurifier_Context(); + + // set up global context variables + if ($config->get('Core', 'CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config->get('Core', 'Language')); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector(); + $context->register('ErrorCollector', $language); + } + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { diff --git a/library/HTMLPurifier/ErrorCollector.php b/library/HTMLPurifier/ErrorCollector.php new file mode 100644 index 00000000..db0c091e --- /dev/null +++ b/library/HTMLPurifier/ErrorCollector.php @@ -0,0 +1,73 @@ +errors[] = array($msg, $token, $context_tokens); + } + + /** + * Retrieves raw error data for custom formatter to use + * @param List of arrays in format of array(Error message text, + * token that caused error, tokens surrounding token) + */ + function getRaw() { + return $this->errors; + } + + /** + * Default HTML formatting implementation for error messages + * @param $config Configuration array, vital for HTML output nature + */ + function getHTMLFormatted($config) { + $generator = new HTMLPurifier_Generator(); + $context = new HTMLPurifier_Context(); + $generator->generateFromTokens(array(), $config, $context); // initialize + $ret = array(); + + $errors = $this->errors; + + // sort error array by line + if ($config->get('Core', 'MaintainLineNumbers')) { + $lines = array(); + foreach ($errors as $error) $lines[] = $error[1]->line; + array_multisort($lines, SORT_ASC, $errors); + } + + foreach ($errors as $error) { + $string = $generator->escape($error[0]); // message + if (!empty($error[1]->line)) { + $string .= ' at line ' . $error[1]->line; + } + $string .= ' ('; + foreach ($error[2] as $token) { + if ($token !== true) { + $string .= $generator->escape($generator->generateFromToken($token)); + } else { + $string .= '' . $generator->escape($generator->generateFromToken($error[1])) . ''; + } + } + $string .= ')'; + $ret[] = $string; + } + return $ret; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php index 722a9264..6b0845fb 100644 --- a/library/HTMLPurifier/Language.php +++ b/library/HTMLPurifier/Language.php @@ -2,8 +2,6 @@ require_once 'HTMLPurifier/LanguageFactory.php'; -// UNUSED - class HTMLPurifier_Language { diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index 7650b818..3327460c 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -7,6 +7,8 @@ $messages = array( 'htmlpurifier' => 'HTML Purifier', 'pizza' => 'Pizza', // for unit testing purposes + + ); ?> \ No newline at end of file diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php index 3a3bf08b..bf79efea 100644 --- a/library/HTMLPurifier/LanguageFactory.php +++ b/library/HTMLPurifier/LanguageFactory.php @@ -3,8 +3,6 @@ require_once 'HTMLPurifier/Language.php'; require_once 'HTMLPurifier/AttrDef/Lang.php'; -// UNUSED - /** * Class responsible for generating HTMLPurifier_Language objects, managing * caching and fallbacks. diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php new file mode 100644 index 00000000..31ac3052 --- /dev/null +++ b/tests/HTMLPurifier/ErrorCollectorTest.php @@ -0,0 +1,42 @@ +line = 23; + $tok2 = new HTMLPurifier_Token_Start('a'); // also caused error + $tok2->line = 3; + $tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2 + $tok3->line = 3; + $tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2 + $tok4->line = 3; + + $collector = new HTMLPurifier_ErrorCollector(); + $collector->send('Big fat error', $tok1); + $collector->send('Another ', $tok2, array($tok3, true, $tok4)); + + $result = array( + 0 => array('Big fat error', $tok1, array(true)), + 1 => array('Another ', $tok2, array($tok3, true, $tok4)) + ); + + $this->assertIdentical($collector->getRaw(), $result); + + $formatted_result = array( + 0 => 'Another <error> at line 3 (Context before<a>Context after)', + 1 => 'Big fat error at line 23 (Token that caused error)' + ); + + $config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true)); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); + + } + +} + +?> \ No newline at end of file diff --git a/tests/test_files.php b/tests/test_files.php index 58a1d127..c2c8bada 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -70,6 +70,7 @@ $test_files[] = 'HTMLPurifier/DefinitionCache/SerializerTest.php'; $test_files[] = 'HTMLPurifier/DefinitionTest.php'; $test_files[] = 'HTMLPurifier/DoctypeRegistryTest.php'; $test_files[] = 'HTMLPurifier/ElementDefTest.php'; +$test_files[] = 'HTMLPurifier/ErrorCollectorTest.php'; $test_files[] = 'HTMLPurifier/EncoderTest.php'; $test_files[] = 'HTMLPurifier/EntityLookupTest.php'; $test_files[] = 'HTMLPurifier/EntityParserTest.php';