From 5b3431d889aa249a448f253e604955bbfa320d0e Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 12 Dec 2007 21:46:30 +0000 Subject: [PATCH] [3.0.0] Fully implement CSS extraction and cleaning. See NEWS for more information, it is now a Filter. - Some Lexer things were moved around git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1469 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 6 +- .../Filter/ExtractStyleBlocks.php | 104 ++++++++++++++++ library/HTMLPurifier/Filter/YouTube.php | 2 +- library/HTMLPurifier/Lexer.php | 36 ------ test-settings.sample.php | 3 + .../Filter/ExtractStyleBlocksTest.php | 112 ++++++++++++++++++ tests/HTMLPurifier/LexerTest.php | 53 --------- tests/index.php | 6 + tests/test_files.php | 1 + 9 files changed, 231 insertions(+), 92 deletions(-) create mode 100644 library/HTMLPurifier/Filter/ExtractStyleBlocks.php create mode 100644 tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php diff --git a/NEWS b/NEWS index c78f781f..bc632c29 100644 --- a/NEWS +++ b/NEWS @@ -18,8 +18,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier + PHP4 reference/foreach cruft removed (in progress) ! CSS properties are no case-insensitive ! DefinitionCacheFactory now can register new implementations -! #isU', array($this, 'styleCallback'), $html); + $style_blocks = $this->_styleMatches; + $this->_styleMatches = array(); // reset + $context->register('StyleBlocks', $style_blocks); // $context must not be reused + if ($this->_tidy) { + foreach ($style_blocks as &$style) { + $style = $this->cleanCSS($style, $config, $context); + } + } + return $html; + } + + /** + * Takes CSS (the stuff found in in a font-family prop). + if (!$this->_disableCharacterEscaping) { + $css = str_replace( + array('<', '>', '&'), + array('\3C ', '\3E ', '\26 '), + $css + ); + } + return $css; + } + +} + diff --git a/library/HTMLPurifier/Filter/YouTube.php b/library/HTMLPurifier/Filter/YouTube.php index 94924637..efd8b06b 100644 --- a/library/HTMLPurifier/Filter/YouTube.php +++ b/library/HTMLPurifier/Filter/YouTube.php @@ -5,7 +5,7 @@ require_once 'HTMLPurifier/Filter.php'; class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter { - public $name = 'YouTube preservation'; + public $name = 'YouTube'; public function preFilter($html, $config, &$context) { $pre_regex = '#]+>.+?'. diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 2f8601bb..8307a54f 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -79,14 +79,6 @@ It is not necessary and will have no effect for PHP 4. This directive has been available since 2.1.0. '); -HTMLPurifier_ConfigSchema::define( - 'HTML', 'ExtractStyleBlocks', false, 'bool', ' -This directive enables extraction of style tags contents so -that they can be incorporated in the head of the document, -after sufficient validation. -This directive has been available since 3.0.0. -'); - /** * Forgivingly lexes HTML (SGML-style) markup into tokens. * @@ -346,34 +338,6 @@ class HTMLPurifier_Lexer // represent non-SGML characters (horror, horror!) $html = HTMLPurifier_Encoder::cleanUTF8($html); - if ($config->get('HTML', 'ExtractStyleBlocks')) { - // extract #isU', array($this, 'styleCallback'), $html); - $style_blocks = $this->_styleMatches; - $this->_styleMatches = array(); // reset - // this is a persistent context, so we have to overwrite it with every call - if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks'); - $context->register('StyleBlocks', $style_blocks); return $html; } diff --git a/test-settings.sample.php b/test-settings.sample.php index 74e2de54..30e985ea 100644 --- a/test-settings.sample.php +++ b/test-settings.sample.php @@ -14,6 +14,9 @@ set_time_limit(0); // Where is SimpleTest located? $simpletest_location = '/path/to/simpletest/'; +// Where is CSSTidy located? +$csstidy_location = '/path/to/csstidy/'; + // How many times should profiling scripts iterate over the function? More runs // means more accurate results, but they'll take longer to perform. $GLOBALS['HTMLPurifierTest']['Runs'] = 2; diff --git a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php new file mode 100644 index 00000000..7ba464e3 --- /dev/null +++ b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php @@ -0,0 +1,112 @@ +config); + $purifier->addFilter(new HTMLPurifier_Filter_ExtractStyleBlocks()); + $result = $purifier->purify('Test'); + $this->assertIdentical($result, 'Test'); + $this->assertIdentical($purifier->context->get('StyleBlocks'), + array( + ".foo {\ntext-align:center;\n}", + "* {\nfont-size:12pt;\n}" + ) + ); + } + + function assertExtractStyleBlocks($html, $expect = true, $styles = array()) { + $filter = new HTMLPurifier_Filter_ExtractStyleBlocks(false); // disable cleaning + if ($expect === true) $expect = $html; + $result = $filter->preFilter($html, $this->config, $this->context); + $this->assertIdentical($result, $expect); + $this->assertIdentical($this->context->get('StyleBlocks'), $styles); + } + + function test_extractStyleBlocks_preserve() { + $this->assertExtractStyleBlocks('Foobar'); + } + + function test_extractStyleBlocks_allStyle() { + $this->assertExtractStyleBlocks('', '', array('foo')); + } + + function test_extractStyleBlocks_multipleBlocks() { + $this->assertExtractStyleBlocks( + "NOP", + "NOP", + array('1', '2', '4') + ); + } + + function test_extractStyleBlocks_blockWithAttributes() { + $this->assertExtractStyleBlocks( + '', + '', + array('css') + ); + } + + function test_extractStyleBlocks_styleWithPadding() { + $this->assertExtractStyleBlocks( + "AlasAwesome\n Trendy!", + "AlasAwesome\n Trendy!", + array('foo') + ); + } + + function assertCleanCSS($input, $expect = true) { + $filter = new HTMLPurifier_Filter_ExtractStyleBlocks(); + if ($expect === true) $expect = $input; + $result = $filter->cleanCSS($input, $this->config, $this->context); + $this->assertIdentical($result, $expect); + } + + function test_cleanCSS_malformed() { + $this->assertCleanCSS('', ''); + } + + function test_cleanCSS_selector() { + $this->assertCleanCSS("a .foo #id div.cl#foo {\nfont-weight:700;\n}"); + } + + function test_cleanCSS_angledBrackets() { + $this->assertCleanCSS( + ".class {\nfont-family:'';\n}", + ".class {\nfont-family:'\\3C /style\\3E ';\n}" + ); + } + + function test_cleanCSS_angledBrackets2() { + // CSSTidy's behavior in this case is wrong, and should be fixed + //$this->assertCleanCSS( + // "span[title=\"\"] {\nfont-size:12pt;\n}", + // "span[title=\"\\3C /style\\3E \"] {\nfont-size:12pt;\n}" + //); + } + + function test_cleanCSS_bogus() { + $this->assertCleanCSS("div {bogus:tree;}", "div {\n}"); + } + + function test_cleanCSS_escapeCodes() { + $this->assertCleanCSS( + ".class {\nfont-family:'\\3C /style\\3E ';\n}" + ); + } + + function test_cleanCSS_noEscapeCodes() { + $filter = new HTMLPurifier_Filter_ExtractStyleBlocks(null, true); + $input = ".class {\nfont-family:'';\n}"; + $result = $filter->cleanCSS($input, $this->config, $this->context); + $this->assertIdentical($result, $input); + } + +} \ No newline at end of file diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index df89d137..ec24b431 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -31,48 +31,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex'); } - // HTMLPurifier_Lexer->extractStyleBlocks() -------------------------------- - - function assertExtractStyleBlocks($html, $expect = true, $styles = array()) { - $lexer = HTMLPurifier_Lexer::create($this->config); - if ($expect === true) $expect = $html; - $result = $lexer->extractStyleBlocks($html, $this->config, $this->context); - $this->assertIdentical($result, $expect); - $this->assertIdentical($this->context->get('StyleBlocks'), $styles); - } - - function test_extractStyleBlocks_preserve() { - $this->assertExtractStyleBlocks('Foobar'); - } - - function test_extractStyleBlocks_allStyle() { - $this->assertExtractStyleBlocks('', '', array('foo')); - } - - function test_extractStyleBlocks_multipleBlocks() { - $this->assertExtractStyleBlocks( - "NOP", - "NOP", - array('1', '2', '4') - ); - } - - function test_extractStyleBlocks_blockWithAttributes() { - $this->assertExtractStyleBlocks( - '', - '', - array('css') - ); - } - - function test_extractStyleBlocks_styleWithPadding() { - $this->assertExtractStyleBlocks( - "AlasAwesome\n Trendy!", - "AlasAwesome\n Trendy!", - array('foo') - ); - } - // HTMLPurifier_Lexer->parseData() ----------------------------------------- function assertParseData($input, $expect = true) { @@ -553,17 +511,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness ); } - function test_tokenizeHTML_extractStyleBlocks() { - $this->config->set('HTML', 'ExtractStyleBlocks', true); - $this->assertTokenization( - 'Test', - array( - new HTMLPurifier_Token_Text('Test') - ) - ); - $this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}')); - } - /* function test_tokenizeHTML_() { diff --git a/tests/index.php b/tests/index.php index 04ed8852..c7bf7852 100755 --- a/tests/index.php +++ b/tests/index.php @@ -16,6 +16,7 @@ $GLOBALS['HTMLPurifierTest'] = array(); $GLOBALS['HTMLPurifierTest']['PEAR'] = false; // do PEAR tests $GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument'); $simpletest_location = 'simpletest/'; // reasonable guess +$csstidy_location = false; // load SimpleTest if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php'; @@ -24,6 +25,11 @@ require_once $simpletest_location . 'unit_tester.php'; require_once $simpletest_location . 'reporter.php'; require_once $simpletest_location . 'mock_objects.php'; +if ($csstidy_location !== false) { + require_once $csstidy_location . 'class.csstidy.php'; + require_once $csstidy_location . 'class.csstidy_print.php'; +} + error_reporting(E_ALL | E_STRICT); // after SimpleTest is loaded, turn on compile time errors // load Debugger diff --git a/tests/test_files.php b/tests/test_files.php index adb0df4c..3e7126f8 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -75,6 +75,7 @@ $test_files[] = 'HTMLPurifier/ErrorCollectorTest.php'; $test_files[] = 'HTMLPurifier/EncoderTest.php'; $test_files[] = 'HTMLPurifier/EntityLookupTest.php'; $test_files[] = 'HTMLPurifier/EntityParserTest.php'; +$test_files[] = 'HTMLPurifier/Filter/ExtractStyleBlocksTest.php'; $test_files[] = 'HTMLPurifier/GeneratorTest.php'; $test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php'; $test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';