From 15d1a3003a516f46b3f253747c116ffc830e1d9c Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 31 Aug 2014 08:50:33 +0100 Subject: [PATCH] Don't truncate in DOMLex when seeing closing div Signed-off-by: Edward Z. Yang --- NEWS | 4 ++++ library/HTMLPurifier/Lexer/DOMLex.php | 5 ++--- library/HTMLPurifier/Lexer/PH5P.php | 3 +-- tests/HTMLPurifier/LexerTest.php | 18 +++++++++++++++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index 90a05462..e6e0b9b0 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +4.7.0, unknown release date +- Don't truncate upon encountering when using DOMLex. Thanks + Myrto Christina for finally convincing me to fix this. + 4.6.0, released 2013-11-30 # Secure URI munge hashing algorithm has changed to hash_hmac("sha256", $url, $secret). Please update any verification scripts you may have. diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index 72075445..b8181929 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -75,8 +75,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer $tokens = array(); $this->tokenizeDOM( $doc->getElementsByTagName('html')->item(0)-> // - getElementsByTagName('body')->item(0)-> // - getElementsByTagName('div')->item(0), //
+ getElementsByTagName('body')->item(0), // $tokens ); return $tokens; @@ -272,7 +271,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer $ret .= ''; $ret .= ''; // No protection if $html contains a stray
! - $ret .= '
' . $html . '
'; + $ret .= '' . $html . ''; return $ret; } } diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php index a4587e4c..ff4fa218 100644 --- a/library/HTMLPurifier/Lexer/PH5P.php +++ b/library/HTMLPurifier/Lexer/PH5P.php @@ -34,8 +34,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex $tokens = array(); $this->tokenizeDOM( $doc->getElementsByTagName('html')->item(0)-> // - getElementsByTagName('body')->item(0)-> // - getElementsByTagName('div')->item(0) //
+ getElementsByTagName('body')->item(0) // , $tokens ); diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index a2438a31..ecdbe1b8 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -264,7 +264,8 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness new HTMLPurifier_Token_End('poolasdf'), new HTMLPurifier_Token_End('pooloka'), ), - 'PH5P' => $alt, + // 20140831: Weird, but whatever... + 'PH5P' => array(new HTMLPurifier_Token_Empty('asdf')), ) ); } @@ -800,6 +801,21 @@ div {} ); } + public function test_tokenizeHTML_prematureDivClose() + { + $this->assertTokenization( + '
dontdie', + array( + new HTMLPurifier_Token_End('div'), + new HTMLPurifier_Token_Text('dontdie') + ), + array( + 'DOMLex' => $alt = array(new HTMLPurifier_Token_Text('dontdie')), + 'PH5P' => $alt + ) + ); + } + /*