From 0dd9e4faf43e69ef05d30902317b09597eb14291 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 27 Mar 2011 11:50:52 +0100 Subject: [PATCH] Fix Internet Explorer innerHTML bug. Signed-off-by: Edward Z. Yang --- NEWS | 5 ++ library/HTMLPurifier/ConfigSchema/schema.ser | Bin 14047 -> 14140 bytes .../schema/Output.FixInnerHTML.txt | 15 ++++++ library/HTMLPurifier/Generator.php | 37 +++++++++++++ smoketests/innerHTML.html | 33 ++++++++++++ smoketests/innerHTML.js | 51 ++++++++++++++++++ tests/HTMLPurifier/GeneratorTest.php | 24 ++++++++- 7 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt create mode 100644 smoketests/innerHTML.html create mode 100644 smoketests/innerHTML.js diff --git a/NEWS b/NEWS index 9ec10c5c..7cb1117c 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier API change. The old API still works but will emit a warning, see http://htmlpurifier.org/docs/enduser-customize.html#optimized for how to upgrade your code. +# Protect against Internet Explorer innerHTML behavior by specially + treating attributes with backticks but no angled brackets, quotes or + spaces. This constitutes a slight semantic change, which can be + reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro + and Mario Heiderich. ! Added %HTML.Nofollow to add rel="nofollow" to external links. ! More types of SPL autoloaders allowed on later versions of PHP. ! Implementations for position, top, left, right, bottom, z-index diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser index 0e220c8ed8ec444e65e78a554be34331f807542e..245ba5d2d09c71529c7e1357160a3393ff280a4d 100644 GIT binary patch delta 128 zcmcbgyC-jgIiuxfEx}*BVwP4){-q@ar6qc9nH8RSd8tJnA-+DFrR8@pO@1igK~ULC iCPt&lT7t5hm6X;9N@F)o$vV@@e6oVP^ya6=znB0?5i9Qi delta 53 zcmdm!cRzQ6Iitm9Pr+Zjo0H_%GffioKyV*0F&a+x6qMbZptMGCa+19GW+#(hOaLP| B6FdL_ diff --git a/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt b/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt new file mode 100644 index 00000000..d6f0d9f2 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt @@ -0,0 +1,15 @@ +Output.FixInnerHTML +TYPE: bool +VERSION: 4.3.0 +DEFAULT: true +--DESCRIPTION-- +

+ If true, HTML Purifier will protect against Internet Explorer's + mishandling of the innerHTML attribute by appending + a space to any attribute that does not contain angled brackets, spaces + or quotes, but contains a backtick. This slightly changes the + semantics of any given attribute, so if this is unacceptable and + you do not use innerHTML on any of your pages, you can + turn this directive off. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index 5e589315..fee1a5f8 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -36,6 +36,11 @@ class HTMLPurifier_Generator */ private $_flashCompat; + /** + * Cache of %Output.FixInnerHTML + */ + private $_innerHTMLFix; + /** * Stack for keeping track of object information when outputting IE * compatibility code. @@ -54,6 +59,7 @@ class HTMLPurifier_Generator public function __construct($config, $context) { $this->config = $config; $this->_scriptFix = $config->get('Output.CommentScriptContents'); + $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); $this->_sortAttr = $config->get('Output.SortAttr'); $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); @@ -190,6 +196,37 @@ class HTMLPurifier_Generator continue; } } + // Workaround for Internet Explorer innerHTML bug. + // Essentially, Internet Explorer, when calculating + // innerHTML, omits quotes if there are no instances of + // angled brackets, quotes or spaces. However, when parsing + // HTML (for example, when you assign to innerHTML), it + // treats backticks as quotes. Thus, + // `` + // becomes + // `` + // becomes + // + // Fortunately, all we need to do is trigger an appropriate + // quoting style, which we do by adding an extra space. + // This also is consistent with the W3C spec, which states + // that user agents may ignore leading or trailing + // whitespace (in fact, most don't, at least for attributes + // like alt, but an extra space at the end is barely + // noticeable). Still, we have a configuration knob for + // this, since this transformation is not necesary if you + // don't process user input with innerHTML or you don't plan + // on supporting Internet Explorer. + if ($this->_innerHTMLFix) { + if (strpos($value, '`') !== false) { + // check if correct quoting style would not already be + // triggered + if (strcspn($value, '"\' <>') === strlen($value)) { + // protect! + $value .= ' '; + } + } + } $html .= $key.'="'.$this->escape($value).'" '; } return rtrim($html); diff --git a/smoketests/innerHTML.html b/smoketests/innerHTML.html new file mode 100644 index 00000000..0b840f18 --- /dev/null +++ b/smoketests/innerHTML.html @@ -0,0 +1,33 @@ + + + innerHTML smoketest + + + + + + + + diff --git a/smoketests/innerHTML.js b/smoketests/innerHTML.js new file mode 100644 index 00000000..74ccbb68 --- /dev/null +++ b/smoketests/innerHTML.js @@ -0,0 +1,51 @@ +var alphabet = 'a!`=[]\\;\':"/<> &'; + +var out = document.getElementById('out'); +var testContainer = document.getElementById('testContainer'); + +function print(s) { + out.value += s + "\n"; +} + +function testImage() { + return testContainer.firstChild; +} + +function test(input) { + var count = 0; + var oldInput, newInput; + testContainer.innerHTML = ""; + testImage().setAttribute("alt", input); + print("------"); + print("Test input: " + input); + do { + oldInput = testImage().getAttribute("alt"); + var intermediate = testContainer.innerHTML; + print("Render: " + intermediate); + testContainer.innerHTML = intermediate; + if (testImage() == null) { + print("Image disappeared..."); + break; + } + newInput = testImage().getAttribute("alt"); + print("New value: " + newInput); + count++; + } while (count < 5 && newInput != oldInput); + if (count == 5) { + print("Failed to achieve fixpoint"); + } + testContainer.innerHTML = ""; +} + +print("Go!"); + +test("`` "); +test("'' "); + +for (var i = 0; i < alphabet.length; i++) { + for (var j = 0; j < alphabet.length; j++) { + test(alphabet.charAt(i) + alphabet.charAt(j)); + } +} + +// document.getElementById('out').textContent = alphabet; diff --git a/tests/HTMLPurifier/GeneratorTest.php b/tests/HTMLPurifier/GeneratorTest.php index 67016e40..46b1dcf6 100644 --- a/tests/HTMLPurifier/GeneratorTest.php +++ b/tests/HTMLPurifier/GeneratorTest.php @@ -82,7 +82,7 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness $this->assertGenerateFromToken( null, '' ); } - function test_generateFromToken_() { + function test_generateFromToken_unicode() { $theta_char = $this->_entity_lookup->table['theta']; $this->assertGenerateFromToken( new HTMLPurifier_Token_Text($theta_char), @@ -90,6 +90,28 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness ); } + function test_generateFromToken_backtick() { + $this->assertGenerateFromToken( + new HTMLPurifier_Token_Start('img', array('alt' => '`foo')), + '`foo ' + ); + } + + function test_generateFromToken_backtickDisabled() { + $this->config->set('Output.FixInnerHTML', false); + $this->assertGenerateFromToken( + new HTMLPurifier_Token_Start('img', array('alt' => '`')), + '`' + ); + } + + function test_generateFromToken_backtickNoChange() { + $this->assertGenerateFromToken( + new HTMLPurifier_Token_Start('img', array('alt' => '`foo` bar')), + '`foo` bar' + ); + } + function assertGenerateAttributes($attr, $expect, $element = false) { $generator = $this->createGenerator(); $result = $generator->generateAttributes($attr, $element);