0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00

[1.6.1] DirectLex now preserves text in which a < bracket is followed by a non-alphanumeric character. This means that certain emoticons are now preserved.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@939 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-04-04 02:22:27 +00:00
parent 2c330cac73
commit ac3ab2a556
3 changed files with 34 additions and 0 deletions

5
NEWS
View File

@ -11,6 +11,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.7.0, unknown release date 1.7.0, unknown release date
1.6.1, unknown release date
! DirectLex now preserves text in which a < bracket is followed by
a non-alphanumeric character. This means that certain emoticons
are now preserved.
1.6.0, released 2007-04-01 1.6.0, released 2007-04-01
! Support for most common deprecated attributes via transformations: ! Support for most common deprecated attributes via transformations:
+ bgcolor in td, th, tr and table + bgcolor in td, th, tr and table

View File

@ -110,6 +110,23 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
continue; continue;
} }
// Check leading character is alnum, if not, we may
// have accidently grabbed an emoticon. Translate into
// text and go our merry way
if (!ctype_alnum($segment[0])) {
$array[] = new
HTMLPurifier_Token_Text(
'<' .
$this->parseData(
$segment
) .
'>'
);
$cursor = $position_next_gt + 1;
$inside_tag = false;
continue;
}
// Check if it is explicitly self closing, if so, remove // Check if it is explicitly self closing, if so, remove
// trailing slash. Remember, we could have a tag like <br>, so // trailing slash. Remember, we could have a tag like <br>, so
// any later token processing scripts must convert improperly // any later token processing scripts must convert improperly

View File

@ -281,6 +281,18 @@ class HTMLPurifier_LexerTest extends UnitTestCase
$input[18] = '<br test="x &lt; 6" />'; $input[18] = '<br test="x &lt; 6" />';
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) ); $expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
// test emoticon protection
$input[19] = '<b>Whoa! >.< That\'s not good >.></b>';
$expect[19] = array(
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('Whoa! >.'),
new HTMLPurifier_Token_Text('< That\'s not good >'),
new HTMLPurifier_Token_Text('.>'),
new HTMLPurifier_Token_End('b'),
);
$sax_expect[19] = false; // SAX drops the < character
$dom_expect[19] = false; // DOM drops the entire pseudo-tag
$default_config = HTMLPurifier_Config::createDefault(); $default_config = HTMLPurifier_Config::createDefault();
$default_context = new HTMLPurifier_Context(); $default_context = new HTMLPurifier_Context();
foreach($input as $i => $discard) { foreach($input as $i => $discard) {