mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-24 06:47:02 +00:00
parent
353c96f156
commit
5662efc936
6
NEWS
6
NEWS
@ -22,6 +22,12 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
- We accidentally dropped certain Unicode characters if there was
|
- We accidentally dropped certain Unicode characters if there was
|
||||||
one or more invalid characters. This has been fixed, thanks
|
one or more invalid characters. This has been fixed, thanks
|
||||||
to mpyw <ryosuke_i_628@yahoo.co.jp>
|
to mpyw <ryosuke_i_628@yahoo.co.jp>
|
||||||
|
- Fix for "Don't truncate upon encountering </div> when using DOMLex"
|
||||||
|
caused a regression with HTML 4.01 Strict parsing with libxml 2.9.1
|
||||||
|
(and maybe later versions, but known OK with libxml 2.9.4). The
|
||||||
|
fix is to go about handling truncation a bit more cleverly so that
|
||||||
|
we can wrap with divs (sidestepping the bug) but slurping out the
|
||||||
|
rest of the text in case it ran off the end. (#78)
|
||||||
# By default, when a link has a target attribute associated
|
# By default, when a link has a target attribute associated
|
||||||
with it, we now also add rel="noopener" in order to
|
with it, we now also add rel="noopener" in order to
|
||||||
prevent the new window from being able to overwrite
|
prevent the new window from being able to overwrite
|
||||||
|
@ -72,12 +72,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
$doc->loadHTML($html);
|
$doc->loadHTML($html);
|
||||||
restore_error_handler();
|
restore_error_handler();
|
||||||
|
|
||||||
|
$body = $doc->getElementsByTagName('html')->item(0)-> // <html>
|
||||||
|
getElementsByTagName('body')->item(0); // <body>
|
||||||
|
|
||||||
|
$div = $body->getElementsByTagName('div')->item(0); // <div>
|
||||||
$tokens = array();
|
$tokens = array();
|
||||||
$this->tokenizeDOM(
|
$this->tokenizeDOM($div, $tokens);
|
||||||
$doc->getElementsByTagName('html')->item(0)-> // <html>
|
// If the div has a sibling, that means we tripped across
|
||||||
getElementsByTagName('body')->item(0), // <body>
|
// a premature </div> tag. So remove the div we parsed,
|
||||||
$tokens
|
// and then tokenize the rest of body. We can't tokenize
|
||||||
);
|
// the sibling directly as we'll lose the tags in that case.
|
||||||
|
if ($div->nextSibling) {
|
||||||
|
$body->removeChild($div);
|
||||||
|
$this->tokenizeDOM($body, $tokens);
|
||||||
|
}
|
||||||
return $tokens;
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,7 +260,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
* @param HTMLPurifier_Context $context
|
* @param HTMLPurifier_Context $context
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
protected function wrapHTML($html, $config, $context)
|
protected function wrapHTML($html, $config, $context, $use_div = true)
|
||||||
{
|
{
|
||||||
$def = $config->getDefinition('HTML');
|
$def = $config->getDefinition('HTML');
|
||||||
$ret = '';
|
$ret = '';
|
||||||
@ -271,7 +279,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
$ret .= '<html><head>';
|
$ret .= '<html><head>';
|
||||||
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
||||||
// No protection if $html contains a stray </div>!
|
// No protection if $html contains a stray </div>!
|
||||||
$ret .= '</head><body>' . $html . '</body></html>';
|
$ret .= '</head><body>';
|
||||||
|
if ($use_div) $ret .= '<div>';
|
||||||
|
$ret .= $html;
|
||||||
|
if ($use_div) $ret .= '</div>';
|
||||||
|
$ret .= '</body></html>';
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
|
|||||||
public function tokenizeHTML($html, $config, $context)
|
public function tokenizeHTML($html, $config, $context)
|
||||||
{
|
{
|
||||||
$new_html = $this->normalize($html, $config, $context);
|
$new_html = $this->normalize($html, $config, $context);
|
||||||
$new_html = $this->wrapHTML($new_html, $config, $context);
|
$new_html = $this->wrapHTML($new_html, $config, $context, false /* no div */);
|
||||||
try {
|
try {
|
||||||
$parser = new HTML5($new_html);
|
$parser = new HTML5($new_html);
|
||||||
$doc = $parser->save();
|
$doc = $parser->save();
|
||||||
|
7
tests/HTMLPurifier/HTMLT/t78.htmlt
Normal file
7
tests/HTMLPurifier/HTMLT/t78.htmlt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
--INI--
|
||||||
|
HTML.Doctype = HTML 4.01 Strict
|
||||||
|
--HTML--
|
||||||
|
<b>Vetgedrukt</b> <i>Schuingedrukt</i> <span>Hou</span><iframe></iframe><script></script> jij ook zo van vakjesdenken?
|
||||||
|
--EXPECT--
|
||||||
|
<b>Vetgedrukt</b> <i>Schuingedrukt</i> <span>Hou</span> jij ook zo van vakjesdenken?
|
||||||
|
--# vim: et sw=4 sts=4
|
@ -814,13 +814,21 @@ div {}
|
|||||||
public function test_tokenizeHTML_prematureDivClose()
|
public function test_tokenizeHTML_prematureDivClose()
|
||||||
{
|
{
|
||||||
$this->assertTokenization(
|
$this->assertTokenization(
|
||||||
'</div>dontdie',
|
'</div>dont<b>die</b>',
|
||||||
array(
|
array(
|
||||||
new HTMLPurifier_Token_End('div'),
|
new HTMLPurifier_Token_End('div'),
|
||||||
new HTMLPurifier_Token_Text('dontdie')
|
new HTMLPurifier_Token_Text('dont'),
|
||||||
|
new HTMLPurifier_Token_Start('b'),
|
||||||
|
new HTMLPurifier_Token_Text('die'),
|
||||||
|
new HTMLPurifier_Token_End('b'),
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'DOMLex' => $alt = array(new HTMLPurifier_Token_Text('dontdie')),
|
'DOMLex' => $alt = array(
|
||||||
|
new HTMLPurifier_Token_Text('dont'),
|
||||||
|
new HTMLPurifier_Token_Start('b'),
|
||||||
|
new HTMLPurifier_Token_Text('die'),
|
||||||
|
new HTMLPurifier_Token_End('b')
|
||||||
|
),
|
||||||
'PH5P' => $alt
|
'PH5P' => $alt
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user