Fix Internet Explorer innerHTML bug.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2024-11-09 15:28:40 +00:00 · 2011-03-27 11:50:52 +01:00 · 2011-03-27 11:50:52 +01:00 · 0dd9e4faf4
commit 0dd9e4faf4
parent 94ed3b1231
7 changed files with 164 additions and 1 deletions
--- a/5
+++ b/5
@ -14,6 +14,11 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
  API change.  The old API still works but will emit a warning,
  see http://htmlpurifier.org/docs/enduser-customize.html#optimized
  for how to upgrade your code.
 # Protect against Internet Explorer innerHTML behavior by specially
  treating attributes with backticks but no angled brackets, quotes or
  spaces.  This constitutes a slight semantic change, which can be
  reverted using %Output.FixInnerHTML.  Reported by Neike Taika-Tessaro
  and Mario Heiderich.
 ! Added %HTML.Nofollow to add rel="nofollow" to external links.
 ! More types of SPL autoloaders allowed on later versions of PHP.
 ! Implementations for position, top, left, right, bottom, z-index
--- a/library/HTMLPurifier/ConfigSchema/schema.ser
+++ b/library/HTMLPurifier/ConfigSchema/schema.ser
--- a/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt
+++ b/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt
@ -0,0 +1,15 @@
 Output.FixInnerHTML
 TYPE: bool
 VERSION: 4.3.0
 DEFAULT: true
 --DESCRIPTION--
 <p>
  If true, HTML Purifier will protect against Internet Explorer's
  mishandling of the <code>innerHTML</code> attribute by appending
  a space to any attribute that does not contain angled brackets, spaces
  or quotes, but contains a backtick.  This slightly changes the
  semantics of any given attribute, so if this is unacceptable and
  you do not use <code>innerHTML</code> on any of your pages, you can
  turn this directive off.
 </p>
 --# vim: et sw=4 sts=4
--- a/library/HTMLPurifier/Generator.php
+++ b/library/HTMLPurifier/Generator.php
@ -36,6 +36,11 @@ class HTMLPurifier_Generator
     */
    private $_flashCompat;
    /**
     * Cache of %Output.FixInnerHTML
     */
    private $_innerHTMLFix;
    /**
     * Stack for keeping track of object information when outputting IE
     * compatibility code.
@ -54,6 +59,7 @@ class HTMLPurifier_Generator
    public function __construct($config, $context) {
        $this->config = $config;
        $this->_scriptFix = $config->get('Output.CommentScriptContents');
        $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
        $this->_sortAttr = $config->get('Output.SortAttr');
        $this->_flashCompat = $config->get('Output.FlashCompat');
        $this->_def = $config->getHTMLDefinition();
@ -190,6 +196,37 @@ class HTMLPurifier_Generator
                    continue;
                }
            }
            // Workaround for Internet Explorer innerHTML bug.
            // Essentially, Internet Explorer, when calculating
            // innerHTML, omits quotes if there are no instances of
            // angled brackets, quotes or spaces.  However, when parsing
            // HTML (for example, when you assign to innerHTML), it
            // treats backticks as quotes.  Thus,
            //      <img alt="``" />
            // becomes
            //      <img alt=`` />
            // becomes
            //      <img alt='' />
            // Fortunately, all we need to do is trigger an appropriate
            // quoting style, which we do by adding an extra space.
            // This also is consistent with the W3C spec, which states
            // that user agents may ignore leading or trailing
            // whitespace (in fact, most don't, at least for attributes
            // like alt, but an extra space at the end is barely
            // noticeable).  Still, we have a configuration knob for
            // this, since this transformation is not necesary if you
            // don't process user input with innerHTML or you don't plan
            // on supporting Internet Explorer.
            if ($this->_innerHTMLFix) {
                if (strpos($value, '`') !== false) {
                    // check if correct quoting style would not already be
                    // triggered
                    if (strcspn($value, '"\' <>') === strlen($value)) {
                        // protect!
                        $value .= ' ';
                    }
                }
            }
            $html .= $key.'="'.$this->escape($value).'" ';
        }
        return rtrim($html);
--- a/smoketests/innerHTML.html
+++ b/smoketests/innerHTML.html
@ -0,0 +1,33 @@
 <html>
 <head>
    <title>innerHTML smoketest</title>
 </head>
 <body>
 <!--
 What we're going to do is use JavaScript to calculate
 fixpoints of innerHTML parse and reparsing.  We start with
 an input value, encoded in a JavaScript string.
 x.innerHTML = input
 We then snapshot the DOM state of x, and then perform the
 iteration:
 intermediate = x.innerHTML
 x.innerHTML = intermediate
 What inputs are we going to test?
 We will generate using the following alphabet:
    a01~!@#$%^&*()_+`-=[]\{}|;':",./<>? (and <space>)
 -->
 <textarea id="out" style="width:100%;height:100%;"></textarea>
 <div id="testContainer" style="display:none"></div>
 <script src="innerHTML.js" type="text/javascript"></script>
 </body>
 </html>
--- a/smoketests/innerHTML.js
+++ b/smoketests/innerHTML.js
@ -0,0 +1,51 @@
 var alphabet = 'a!`=[]\\;\':"/<> &';
 var out             = document.getElementById('out');
 var testContainer   = document.getElementById('testContainer');
 function print(s) {
    out.value += s + "\n";
 }
 function testImage() {
    return testContainer.firstChild;
 }
 function test(input) {
    var count = 0;
    var oldInput, newInput;
    testContainer.innerHTML = "<img />";
    testImage().setAttribute("alt", input);
    print("------");
    print("Test input: " + input);
    do {
        oldInput = testImage().getAttribute("alt");
        var intermediate = testContainer.innerHTML;
        print("Render: " + intermediate);
        testContainer.innerHTML = intermediate;
        if (testImage() == null) {
            print("Image disappeared...");
            break;
        }
        newInput = testImage().getAttribute("alt");
        print("New value: " + newInput);
        count++;
    } while (count < 5 && newInput != oldInput);
    if (count == 5) {
        print("Failed to achieve fixpoint");
    }
    testContainer.innerHTML = "";
 }
 print("Go!");
 test("`` ");
 test("'' ");
 for (var i = 0; i < alphabet.length; i++) {
    for (var j = 0; j < alphabet.length; j++) {
        test(alphabet.charAt(i) + alphabet.charAt(j));
    }
 }
 // document.getElementById('out').textContent = alphabet;
--- a/tests/HTMLPurifier/GeneratorTest.php
+++ b/tests/HTMLPurifier/GeneratorTest.php
@ -82,7 +82,7 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
        $this->assertGenerateFromToken( null, '' );
    }
-    function test_generateFromToken_() {
+    function test_generateFromToken_unicode() {
        $theta_char = $this->_entity_lookup->table['theta'];
        $this->assertGenerateFromToken(
            new HTMLPurifier_Token_Text($theta_char),
@ -90,6 +90,28 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
        );
    }
    function test_generateFromToken_backtick() {
        $this->assertGenerateFromToken(
            new HTMLPurifier_Token_Start('img', array('alt' => '`foo')),
            '<img alt="`foo ">'
        );
    }
    function test_generateFromToken_backtickDisabled() {
        $this->config->set('Output.FixInnerHTML', false);
        $this->assertGenerateFromToken(
            new HTMLPurifier_Token_Start('img', array('alt' => '`')),
            '<img alt="`">'
        );
    }
    function test_generateFromToken_backtickNoChange() {
        $this->assertGenerateFromToken(
            new HTMLPurifier_Token_Start('img', array('alt' => '`foo` bar')),
            '<img alt="`foo` bar">'
        );
    }
    function assertGenerateAttributes($attr, $expect, $element = false) {
        $generator = $this->createGenerator();
        $result = $generator->generateAttributes($attr, $element);