0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 06:01:52 +00:00

[2.0.1] Normalize newlines to \n for internal processing.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1235 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-25 19:18:55 +00:00
parent 9f996b125a
commit 6f5592ae60
8 changed files with 35 additions and 22 deletions

3
NEWS
View File

@ -15,6 +15,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Experimental AutoFormat functionality added: auto-paragraph and ! Experimental AutoFormat functionality added: auto-paragraph and
linkify your HTML input by setting %AutoFormat.AutoParagraph and linkify your HTML input by setting %AutoFormat.AutoParagraph and
%AutoFormat.Linkify to true %AutoFormat.Linkify to true
! Newlines normalized internally, and then converted back to the
value of PHP_EOL. If this is not desired, set your newline format
using %Output.Newline.
- Clean up special case code for <script> tags - Clean up special case code for <script> tags
- Reorder includes for DefinitionCache decorators, fixes a possible - Reorder includes for DefinitionCache decorators, fixes a possible
missing class error missing class error

View File

@ -35,6 +35,15 @@ HTML
); );
HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat'); HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
<p>
Newline string to format final output with. If left null, HTML Purifier
will auto-detect the default newline type of the system and use that;
you can manually override it here. Remember, \r\n is Windows, \r
is Mac, and \n is Unix. This directive was available since 2.0.1.
</p>
');
/** /**
* Generates HTML from tokens. * Generates HTML from tokens.
* @todo Create a configuration-wide instance that all objects retrieve * @todo Create a configuration-wide instance that all objects retrieve
@ -114,6 +123,10 @@ class HTMLPurifier_Generator
$html = (string) $tidy; $html = (string) $tidy;
} }
} }
// normalize newlines to system
$nl = $config->get('Output', 'Newline');
if ($nl === null) $nl = PHP_EOL;
$html = str_replace("\n", $nl, $html);
return $html; return $html;
} }
@ -153,11 +166,11 @@ class HTMLPurifier_Generator
*/ */
function generateScriptFromToken($token) { function generateScriptFromToken($token) {
if ($token->type != 'text') return $this->generateFromToken($token); if ($token->type != 'text') return $this->generateFromToken($token);
// return '<!--' . PHP_EOL . trim($token->data) . PHP_EOL . '// -->'; // return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
// more advanced version: // more advanced version:
// thanks <http://lachy.id.au/log/2005/05/script-comments> // thanks <http://lachy.id.au/log/2005/05/script-comments>
$data = preg_replace('#//\s*$#', '', $token->data); $data = preg_replace('#//\s*$#', '', $token->data);
return '<!--//--><![CDATA[//><!--' . PHP_EOL . trim($data) . PHP_EOL . '//--><!]]>'; return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
} }
/** /**

View File

@ -40,7 +40,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$this->_splitText($text, $token); $this->_splitText($text, $token);
} elseif ($this->allowsElement('p')) { } elseif ($this->allowsElement('p')) {
// case 3: we're in an element that allows paragraphs // case 3: we're in an element that allows paragraphs
if (strpos($text, PHP_EOL . PHP_EOL) !== false) { if (strpos($text, "\n\n") !== false) {
// case 3.1: this text node has a double-newline // case 3.1: this text node has a double-newline
$token = array(new HTMLPurifier_Token_Start('p')); $token = array(new HTMLPurifier_Token_Start('p'));
$this->_splitText($text, $token); $this->_splitText($text, $token);
@ -57,7 +57,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
} }
if ($this->inputTokens[$i]->type == 'end') break; if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') { if ($this->inputTokens[$i]->type == 'text') {
if (strpos($this->inputTokens[$i]->data, PHP_EOL . PHP_EOL) !== false) { if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true; $ok = true;
} }
if (!$this->inputTokens[$i]->is_whitespace) break; if (!$this->inputTokens[$i]->is_whitespace) break;
@ -104,7 +104,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if ($this->inputTokens[$i]->type == 'start') $j++; if ($this->inputTokens[$i]->type == 'start') $j++;
if ($this->inputTokens[$i]->type == 'end') $j--; if ($this->inputTokens[$i]->type == 'end') $j--;
if ($this->inputTokens[$i]->type == 'text') { if ($this->inputTokens[$i]->type == 'text') {
if (strpos($this->inputTokens[$i]->data, PHP_EOL . PHP_EOL) !== false) { if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true; $ok = true;
break; break;
} }
@ -137,7 +137,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
* @private * @private
*/ */
function _splitText($data, &$result) { function _splitText($data, &$result) {
$raw_paragraphs = explode(PHP_EOL . PHP_EOL, $data); $raw_paragraphs = explode("\n\n", $data);
// remove empty paragraphs // remove empty paragraphs
$paragraphs = array(); $paragraphs = array();

View File

@ -303,6 +303,10 @@ class HTMLPurifier_Lexer
$html = $this->extractBody($html); $html = $this->extractBody($html);
} }
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('HTML', 'Trusted')) { if ($config->get('HTML', 'Trusted')) {
// escape convoluted CDATA // escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html); $html = $this->escapeCommentedCDATA($html);

View File

@ -47,7 +47,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($maintain_line_numbers) $current_line = 1; if ($maintain_line_numbers) $current_line = 1;
else $current_line = false; else $current_line = false;
$context->register('CurrentLine', $current_line); $context->register('CurrentLine', $current_line);
$nl = PHP_EOL; $nl = "\n";
// how often to manually recalculate. This will ALWAYS be right, // how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off // but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval'); $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');

View File

@ -38,10 +38,11 @@ class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
// whitespace sticks to the previous element, last whitespace is // whitespace sticks to the previous element, last whitespace is
// stationary // stationary
$this->assertResult("\n <tr />\n <tr />\n "); $this->assertResult("\n <tr />\n <tr />\n ", true, array('Output.Newline' => "\n"));
$this->assertResult( $this->assertResult(
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t", "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t" "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t",
array('Output.Newline' => "\n")
); );
} }

View File

@ -207,6 +207,7 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
$this->config = HTMLPurifier_Config::createDefault(); $this->config = HTMLPurifier_Config::createDefault();
$this->config->set('Core', 'TidyFormat', true); $this->config->set('Core', 'TidyFormat', true);
$this->config->set('Output', 'Newline', "\n");
// nice wrapping please // nice wrapping please
$this->assertGeneration( $this->assertGeneration(

View File

@ -72,32 +72,23 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
function testLineNumbers() { function testLineNumbers() {
$html = '<b>Line 1</b> $html = "<b>Line 1</b>\n<i>Line 2</i>\nStill Line 2<br\n/>Now Line 4\n\n<br />";
<i>Line 2</i>
Still Line 2<br
/>Now Line 4
<br />';
$expect = array( $expect = array(
// line 1 // line 1
0 => new HTMLPurifier_Token_Start('b') 0 => new HTMLPurifier_Token_Start('b')
,1 => new HTMLPurifier_Token_Text('Line 1') ,1 => new HTMLPurifier_Token_Text('Line 1')
,2 => new HTMLPurifier_Token_End('b') ,2 => new HTMLPurifier_Token_End('b')
,3 => new HTMLPurifier_Token_Text(' ,3 => new HTMLPurifier_Token_Text("\n")
')
// line 2 // line 2
,4 => new HTMLPurifier_Token_Start('i') ,4 => new HTMLPurifier_Token_Start('i')
,5 => new HTMLPurifier_Token_Text('Line 2') ,5 => new HTMLPurifier_Token_Text('Line 2')
,6 => new HTMLPurifier_Token_End('i') ,6 => new HTMLPurifier_Token_End('i')
,7 => new HTMLPurifier_Token_Text(' ,7 => new HTMLPurifier_Token_Text("\nStill Line 2")
Still Line 2')
// line 3 // line 3
,8 => new HTMLPurifier_Token_Empty('br') ,8 => new HTMLPurifier_Token_Empty('br')
// line 4 // line 4
,9 => new HTMLPurifier_Token_Text('Now Line 4 ,9 => new HTMLPurifier_Token_Text("Now Line 4\n\n")
')
// line SIX // line SIX
,10 => new HTMLPurifier_Token_Empty('br') ,10 => new HTMLPurifier_Token_Empty('br')
); );