mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-18 11:41:52 +00:00
[2.1.0] True emoticon < fix.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1260 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
4476745003
commit
a6ede3804e
4
NEWS
4
NEWS
@ -10,7 +10,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
==========================
|
||||
|
||||
2.1.0, unknown release date
|
||||
(none)
|
||||
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
||||
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
||||
for PHP4 (DirectLex).
|
||||
|
||||
2.0.2, unknown release date
|
||||
(none)
|
||||
|
@ -3,6 +3,16 @@
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/TokenFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'AggressivelyFixLt', false, 'bool', '
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and one preg_replace call for every bit of HTML passed through here.
|
||||
It is not necessary and will have no effect for PHP 4.
|
||||
This directive has been available since 2.1.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||
*
|
||||
@ -42,6 +52,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
|
||||
$html = $this->normalize($html, $config, $context);
|
||||
|
||||
// attempt to armor stray angled brackets that cannot possibly
|
||||
// form tags and thus are probably being used as emoticons
|
||||
if ($config->get('Core', 'AggressivelyFixLt')) {
|
||||
$char = '[^a-z!\/]';
|
||||
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
$html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
|
||||
}
|
||||
|
||||
// preprocess html, essential for UTF-8
|
||||
$html =
|
||||
'<!DOCTYPE html '.
|
||||
@ -151,5 +171,21 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
*/
|
||||
public function muteErrorHandler($errno, $errstr) {}
|
||||
|
||||
/**
|
||||
* Callback function for undoing escaping of stray angled brackets
|
||||
* in comments
|
||||
*/
|
||||
function callbackUndoCommentSubst($matches) {
|
||||
return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2];
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function that entity-izes ampersands in comments so that
|
||||
* callbackUndoCommentSubst doesn't clobber them
|
||||
*/
|
||||
function callbackArmorCommentEntities($matches) {
|
||||
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -204,7 +204,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// Check leading character is alnum, if not, we may
|
||||
// have accidently grabbed an emoticon. Translate into
|
||||
// text and go our merry way
|
||||
if (!ctype_alnum($segment[0])) {
|
||||
if (!ctype_alpha($segment[0])) {
|
||||
// XML: $segment[0] !== '_' && $segment[0] !== ':'
|
||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
|
@ -288,16 +288,21 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
|
||||
|
||||
// test emoticon protection
|
||||
$input[19] = '<b>Whoa! >.< That\'s not good >.></b>';
|
||||
$input[19] = '<b>Whoa! <3 That\'s not good >.></b>';
|
||||
$expect[19] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Whoa! >.'),
|
||||
new HTMLPurifier_Token_Text('< That\'s not good >'),
|
||||
new HTMLPurifier_Token_Text('Whoa! '),
|
||||
new HTMLPurifier_Token_Text('<3 That\'s not good >'),
|
||||
new HTMLPurifier_Token_Text('.>'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
);
|
||||
$dom_expect[19] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
);
|
||||
$sax_expect[19] = false; // SAX drops the < character
|
||||
$dom_expect[19] = false; // DOM drops the entire pseudo-tag
|
||||
$config[19] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||
|
||||
// test comment parsing with funky characters inside
|
||||
$input[20] = '<!-- This >< comment --><br />';
|
||||
@ -306,6 +311,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
new HTMLPurifier_Token_Empty('br')
|
||||
);
|
||||
$sax_expect[20] = false;
|
||||
$config[20] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||
|
||||
// test comment parsing of missing end
|
||||
$input[21] = '<!-- This >< comment';
|
||||
@ -314,6 +320,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
);
|
||||
$sax_expect[21] = false;
|
||||
$dom_expect[21] = false;
|
||||
$config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||
|
||||
// test CDATA tags
|
||||
$input[22] = '<script>alert("<foo>");</script>';
|
||||
@ -324,7 +331,14 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
);
|
||||
$config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
|
||||
$sax_expect[22] = false;
|
||||
//$dom_expect[22] = false;
|
||||
|
||||
// test escaping
|
||||
$input[23] = '<!-- This comment < < & -->';
|
||||
$expect[23] = array(
|
||||
new HTMLPurifier_Token_Comment(' This comment < < & ')
|
||||
);
|
||||
$sax_expect[23] = false;
|
||||
$config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
$default_context = new HTMLPurifier_Context();
|
||||
|
Loading…
Reference in New Issue
Block a user