mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-18 11:41:52 +00:00
[2.1.0] True emoticon < fix.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1260 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
4476745003
commit
a6ede3804e
4
NEWS
4
NEWS
@ -10,7 +10,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
==========================
|
==========================
|
||||||
|
|
||||||
2.1.0, unknown release date
|
2.1.0, unknown release date
|
||||||
(none)
|
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
||||||
|
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
||||||
|
for PHP4 (DirectLex).
|
||||||
|
|
||||||
2.0.2, unknown release date
|
2.0.2, unknown release date
|
||||||
(none)
|
(none)
|
||||||
|
@ -3,6 +3,16 @@
|
|||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/TokenFactory.php';
|
require_once 'HTMLPurifier/TokenFactory.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'AggressivelyFixLt', false, 'bool', '
|
||||||
|
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||||
|
perform in order to ensure that open angled-brackets do not get killed
|
||||||
|
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||||
|
calls and one preg_replace call for every bit of HTML passed through here.
|
||||||
|
It is not necessary and will have no effect for PHP 4.
|
||||||
|
This directive has been available since 2.1.0.
|
||||||
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||||
*
|
*
|
||||||
@ -42,6 +52,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
$html = $this->normalize($html, $config, $context);
|
$html = $this->normalize($html, $config, $context);
|
||||||
|
|
||||||
|
// attempt to armor stray angled brackets that cannot possibly
|
||||||
|
// form tags and thus are probably being used as emoticons
|
||||||
|
if ($config->get('Core', 'AggressivelyFixLt')) {
|
||||||
|
$char = '[^a-z!\/]';
|
||||||
|
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||||
|
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
|
||||||
|
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||||
|
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
|
||||||
|
}
|
||||||
|
|
||||||
// preprocess html, essential for UTF-8
|
// preprocess html, essential for UTF-8
|
||||||
$html =
|
$html =
|
||||||
'<!DOCTYPE html '.
|
'<!DOCTYPE html '.
|
||||||
@ -151,5 +171,21 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
*/
|
*/
|
||||||
public function muteErrorHandler($errno, $errstr) {}
|
public function muteErrorHandler($errno, $errstr) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function for undoing escaping of stray angled brackets
|
||||||
|
* in comments
|
||||||
|
*/
|
||||||
|
function callbackUndoCommentSubst($matches) {
|
||||||
|
return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function that entity-izes ampersands in comments so that
|
||||||
|
* callbackUndoCommentSubst doesn't clobber them
|
||||||
|
*/
|
||||||
|
function callbackArmorCommentEntities($matches) {
|
||||||
|
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,7 +204,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
// Check leading character is alnum, if not, we may
|
// Check leading character is alnum, if not, we may
|
||||||
// have accidently grabbed an emoticon. Translate into
|
// have accidently grabbed an emoticon. Translate into
|
||||||
// text and go our merry way
|
// text and go our merry way
|
||||||
if (!ctype_alnum($segment[0])) {
|
if (!ctype_alpha($segment[0])) {
|
||||||
|
// XML: $segment[0] !== '_' && $segment[0] !== ':'
|
||||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||||
$token = new
|
$token = new
|
||||||
HTMLPurifier_Token_Text(
|
HTMLPurifier_Token_Text(
|
||||||
|
@ -288,16 +288,21 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
|
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
|
||||||
|
|
||||||
// test emoticon protection
|
// test emoticon protection
|
||||||
$input[19] = '<b>Whoa! >.< That\'s not good >.></b>';
|
$input[19] = '<b>Whoa! <3 That\'s not good >.></b>';
|
||||||
$expect[19] = array(
|
$expect[19] = array(
|
||||||
new HTMLPurifier_Token_Start('b'),
|
new HTMLPurifier_Token_Start('b'),
|
||||||
new HTMLPurifier_Token_Text('Whoa! >.'),
|
new HTMLPurifier_Token_Text('Whoa! '),
|
||||||
new HTMLPurifier_Token_Text('< That\'s not good >'),
|
new HTMLPurifier_Token_Text('<3 That\'s not good >'),
|
||||||
new HTMLPurifier_Token_Text('.>'),
|
new HTMLPurifier_Token_Text('.>'),
|
||||||
new HTMLPurifier_Token_End('b'),
|
new HTMLPurifier_Token_End('b'),
|
||||||
);
|
);
|
||||||
|
$dom_expect[19] = array(
|
||||||
|
new HTMLPurifier_Token_Start('b'),
|
||||||
|
new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
|
||||||
|
new HTMLPurifier_Token_End('b'),
|
||||||
|
);
|
||||||
$sax_expect[19] = false; // SAX drops the < character
|
$sax_expect[19] = false; // SAX drops the < character
|
||||||
$dom_expect[19] = false; // DOM drops the entire pseudo-tag
|
$config[19] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||||
|
|
||||||
// test comment parsing with funky characters inside
|
// test comment parsing with funky characters inside
|
||||||
$input[20] = '<!-- This >< comment --><br />';
|
$input[20] = '<!-- This >< comment --><br />';
|
||||||
@ -306,6 +311,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
new HTMLPurifier_Token_Empty('br')
|
new HTMLPurifier_Token_Empty('br')
|
||||||
);
|
);
|
||||||
$sax_expect[20] = false;
|
$sax_expect[20] = false;
|
||||||
|
$config[20] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||||
|
|
||||||
// test comment parsing of missing end
|
// test comment parsing of missing end
|
||||||
$input[21] = '<!-- This >< comment';
|
$input[21] = '<!-- This >< comment';
|
||||||
@ -314,6 +320,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
);
|
);
|
||||||
$sax_expect[21] = false;
|
$sax_expect[21] = false;
|
||||||
$dom_expect[21] = false;
|
$dom_expect[21] = false;
|
||||||
|
$config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||||
|
|
||||||
// test CDATA tags
|
// test CDATA tags
|
||||||
$input[22] = '<script>alert("<foo>");</script>';
|
$input[22] = '<script>alert("<foo>");</script>';
|
||||||
@ -324,7 +331,14 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
);
|
);
|
||||||
$config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
|
$config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
|
||||||
$sax_expect[22] = false;
|
$sax_expect[22] = false;
|
||||||
//$dom_expect[22] = false;
|
|
||||||
|
// test escaping
|
||||||
|
$input[23] = '<!-- This comment < < & -->';
|
||||||
|
$expect[23] = array(
|
||||||
|
new HTMLPurifier_Token_Comment(' This comment < < & ')
|
||||||
|
);
|
||||||
|
$sax_expect[23] = false;
|
||||||
|
$config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
$default_config = HTMLPurifier_Config::createDefault();
|
||||||
$default_context = new HTMLPurifier_Context();
|
$default_context = new HTMLPurifier_Context();
|
||||||
|
Loading…
Reference in New Issue
Block a user