[3.1.0] [BACKPORT] Fix bug with comments in styles, and some associated issues

- Restore printTokens() git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1570 48356398-32a2-884e-a903-53898d9a118a
2025-01-03 05:11:52 +00:00 · 2008-02-20 00:15:44 +00:00 · 2008-02-20 00:15:44 +00:00 · 6c9c8f2380
commit 6c9c8f2380
parent fbc595ebed
11 changed files with 98 additions and 42 deletions
--- a/6
+++ b/6
@ -32,6 +32,8 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
  $schema which defines what HTMLPurifier_ConfigSchema to use besides the
  global default.
 - Fix bug with trusted script handling in libxml versions later than 2.6.28.
 - Fix bug in ExtractStyleBlocks with comments in style tags
 - Fix bug in comment parsing for DirectLex
 . Plugins now get their own changelogs according to project conventions.
 . Convert tokens to use instanceof, reducing memory footprint and
  improving comparison speed.
@ -53,6 +55,10 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
 . Debugger class is deprecated and will be removed soon.
 . Command line argument parsing for testing scripts revamped, now --opt value
  format is supported.
 . Smoketests now cleanup after magic quotes
 . Generator now can output comments (however, comments are still stripped
  from HTML Purifier output)
 . substr_count PHP4 compatibility cludge removed
 3.0.0, released 2008-01-06
 # HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained
--- a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
@ -72,6 +72,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
        } else {
            $scopes = array();
        }
        // remove comments from CSS
        $css = trim($css);
        if (strncmp('<!--', $css, 4) === 0) {
            $css = substr($css, 4);
        }
        if (strlen($css) > 3 && substr($css, -3) == '-->') {
            $css = substr($css, 0, -3);
        }
        $css = trim($css);
        $this->_tidy->parse($css);
        $css_definition = $config->getDefinition('CSS');
        foreach ($this->_tidy->css as $k => $decls) {
--- a/library/HTMLPurifier/Generator.php
+++ b/library/HTMLPurifier/Generator.php
@ -112,6 +112,8 @@ class HTMLPurifier_Generator
        } elseif ($token instanceof HTMLPurifier_Token_Text) {
            return $this->escape($token->data);
        } elseif ($token instanceof HTMLPurifier_Token_Comment) {
            return '<!--' . $token->data . '-->';
        } else {
            return '';
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@ -112,7 +112,6 @@ class HTMLPurifier_Lexer
            case 'DirectLex':
                return new HTMLPurifier_Lexer_DirectLex();
            case 'PH5P':
                // experimental Lexer that must be manually included
                return new HTMLPurifier_Lexer_PH5P();
            default:
                trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@ -91,7 +91,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
            $last = end($tokens);
            $data = $node->data;
            // (note $node->tagname is already normalized)
-            if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
+            if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) {
                $new_data = trim($data);
                if (substr($new_data, 0, 4) === '<!--') {
                    $data = substr($new_data, 4);
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@ -81,7 +81,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                $cursor > 0 &&            // cursor is further than zero
                $loops % $synchronize_interval === 0 // time to synchronize!
            ) {
-                $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
+                $current_line = 1 + substr_count($html, $nl, 0, $cursor);
            }
            $position_next_lt = strpos($html, '<', $cursor);
@ -106,7 +106,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    );
                if ($maintain_line_numbers) {
                    $token->line = $current_line;
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
+                    $current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
                }
                $array[] = $token;
                $cursor  = $position_next_lt + 1;
@ -150,7 +150,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                // Check if it's a comment
                if (
-                    substr($segment, 0, 3) === '!--'
+                    strncmp('!--', $segment, 3) === 0
                ) {
                    // re-determine segment length, looking for -->
                    $position_comment_end = strpos($html, '-->', $cursor);
@ -168,13 +168,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    $segment = substr($html, $cursor, $strlen_segment);
                    $token = new
                        HTMLPurifier_Token_Comment(
-                            substr(
+                            substr($segment, 3)
                                $segment, 3, $strlen_segment - 3
                            )
                        );
                    if ($maintain_line_numbers) {
                        $token->line = $current_line;
-                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
+                        $current_line += substr_count($html, $nl, $cursor, $strlen_segment);
                    }
                    $array[] = $token;
                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
@ -189,7 +187,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    $token = new HTMLPurifier_Token_End($type);
                    if ($maintain_line_numbers) {
                        $token->line = $current_line;
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
@ -213,7 +211,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                        );
                    if ($maintain_line_numbers) {
                        $token->line = $current_line;
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $cursor = $position_next_gt + 1;
@ -242,7 +240,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                    }
                    if ($maintain_line_numbers) {
                        $token->line = $current_line;
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                    }
                    $array[] = $token;
                    $inside_tag = false;
@ -274,7 +272,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                }
                if ($maintain_line_numbers) {
                    $token->line = $current_line;
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                    $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
                }
                $array[] = $token;
                $cursor = $position_next_gt + 1;
@ -302,22 +300,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
        return $array;
    }
    /**
     * PHP 4 compatible substr_count that implements offset and length
     */
    protected function substrCount($haystack, $needle, $offset, $length) {
        static $oldVersion;
        if ($oldVersion === null) {
            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
        }
        if ($oldVersion) {
            $haystack = substr($haystack, $offset, $length);
            return substr_count($haystack, $needle);
        } else {
            return substr_count($haystack, $needle, $offset, $length);
        }
    }
    /**
     * Takes the inside of an HTML tag and makes an assoc array of attributes.
     * 
--- a/smoketests/common.php
+++ b/smoketests/common.php
@ -15,3 +15,21 @@ function escapeHTML($string) {
    return $string;
 }
 if (function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc()) {
    function fix_magic_quotes(&$array) {
        foreach ($array as $k => $val) {
            if (!is_array($val)) {
                $array[$k] = stripslashes($val);
            } else {
                fix_magic_quotes($array[$k]);
            }
        }
    }
    fix_magic_quotes($_GET);
    fix_magic_quotes($_POST);
    fix_magic_quotes($_COOKIE);
    fix_magic_quotes($_REQUEST);
    fix_magic_quotes($_ENV);
    fix_magic_quotes($_SERVER);
 }
--- a/tests/Debugger.php
+++ b/tests/Debugger.php
@ -55,18 +55,6 @@ function isInScopes($array = array()) {
 }
 /**#@-*/
 function printTokens($tokens, $index = null) {
    $string = '<pre>';
    $generator = new HTMLPurifier_Generator();
    foreach ($tokens as $i => $token) {
        if ($index === $i) $string .= '[<strong>';
        $string .= "<sup>$i</sup>";
        $string .= $generator->escape($generator->generateFromToken($token));
        if ($index === $i) $string .= '</strong>]';
    }
    $string .= '</pre>';
    echo $string;
 }
 /**
 * The debugging singleton. Most interesting stuff happens here.
--- a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
+++ b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
@ -168,6 +168,19 @@ text-align:right;
 p p div {
 text-align:left;
 }"
        );
    }
    function test_removeComments() {
        $this->assertCleanCSS(
 "<!--
 div {
 text-align:right;
 }
 -->",
 "div {
 text-align:right;
 }"
        );
    }
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@ -509,6 +509,29 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
        );
    }
    function test_tokenizeHTML_() {
        $this->assertTokenization(
 '<style type="text/css"><!--
 div {}
 --></style>',
            array(
                new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                new HTMLPurifier_Token_Text("\ndiv {}\n"),
                new HTMLPurifier_Token_End('style'),
            ),
            array(
                // PH5P doesn't seem to like style tags
                'PH5P' => false,
                // DirectLex defers to RemoveForeignElements for textification
                'DirectLex' => array(
                    new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
                    new HTMLPurifier_Token_Comment("\ndiv {}\n"),
                    new HTMLPurifier_Token_End('style'),
                ),
            )
        );
    }
    /*
    function test_tokenizeHTML_() {
--- a/tests/common.php
+++ b/tests/common.php
@ -159,4 +159,20 @@ function htmlpurifier_add_test($test, $test_file, $only_phpt = false) {
        default:
            trigger_error("$test_file is an invalid file for testing", E_USER_ERROR);
    }
-}
+}
 /**
 * Debugging function that prints tokens in a user-friendly manner.
 */
 function printTokens($tokens, $index = null) {
    $string = '<pre>';
    $generator = new HTMLPurifier_Generator();
    foreach ($tokens as $i => $token) {
        if ($index === $i) $string .= '[<strong>';
        $string .= "<sup>$i</sup>";
        $string .= $generator->escape($generator->generateFromToken($token));
        if ($index === $i) $string .= '</strong>]';
    }
    $string .= '</pre>';
    echo $string;
 }