0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[2.0.1] Normalize newlines to \n for internal processing.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1235 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-25 19:18:55 +00:00
parent 9f996b125a
commit 6f5592ae60
8 changed files with 35 additions and 22 deletions

3
NEWS
View File

@ -15,6 +15,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Experimental AutoFormat functionality added: auto-paragraph and
linkify your HTML input by setting %AutoFormat.AutoParagraph and
%AutoFormat.Linkify to true
! Newlines normalized internally, and then converted back to the
value of PHP_EOL. If this is not desired, set your newline format
using %Output.Newline.
- Clean up special case code for <script> tags
- Reorder includes for DefinitionCache decorators, fixes a possible
missing class error

View File

@ -35,6 +35,15 @@ HTML
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
<p>
Newline string to format final output with. If left null, HTML Purifier
will auto-detect the default newline type of the system and use that;
you can manually override it here. Remember, \r\n is Windows, \r
is Mac, and \n is Unix. This directive was available since 2.0.1.
</p>
');
/**
* Generates HTML from tokens.
* @todo Create a configuration-wide instance that all objects retrieve
@ -114,6 +123,10 @@ class HTMLPurifier_Generator
$html = (string) $tidy;
}
}
// normalize newlines to system
$nl = $config->get('Output', 'Newline');
if ($nl === null) $nl = PHP_EOL;
$html = str_replace("\n", $nl, $html);
return $html;
}
@ -153,11 +166,11 @@ class HTMLPurifier_Generator
*/
function generateScriptFromToken($token) {
if ($token->type != 'text') return $this->generateFromToken($token);
// return '<!--' . PHP_EOL . trim($token->data) . PHP_EOL . '// -->';
// return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
// more advanced version:
// thanks <http://lachy.id.au/log/2005/05/script-comments>
$data = preg_replace('#//\s*$#', '', $token->data);
return '<!--//--><![CDATA[//><!--' . PHP_EOL . trim($data) . PHP_EOL . '//--><!]]>';
return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
}
/**

View File

@ -40,7 +40,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$this->_splitText($text, $token);
} elseif ($this->allowsElement('p')) {
// case 3: we're in an element that allows paragraphs
if (strpos($text, PHP_EOL . PHP_EOL) !== false) {
if (strpos($text, "\n\n") !== false) {
// case 3.1: this text node has a double-newline
$token = array(new HTMLPurifier_Token_Start('p'));
$this->_splitText($text, $token);
@ -57,7 +57,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
}
if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') {
if (strpos($this->inputTokens[$i]->data, PHP_EOL . PHP_EOL) !== false) {
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
}
if (!$this->inputTokens[$i]->is_whitespace) break;
@ -104,7 +104,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if ($this->inputTokens[$i]->type == 'start') $j++;
if ($this->inputTokens[$i]->type == 'end') $j--;
if ($this->inputTokens[$i]->type == 'text') {
if (strpos($this->inputTokens[$i]->data, PHP_EOL . PHP_EOL) !== false) {
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
break;
}
@ -137,7 +137,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
* @private
*/
function _splitText($data, &$result) {
$raw_paragraphs = explode(PHP_EOL . PHP_EOL, $data);
$raw_paragraphs = explode("\n\n", $data);
// remove empty paragraphs
$paragraphs = array();

View File

@ -303,6 +303,10 @@ class HTMLPurifier_Lexer
$html = $this->extractBody($html);
}
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('HTML', 'Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);

View File

@ -47,7 +47,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($maintain_line_numbers) $current_line = 1;
else $current_line = false;
$context->register('CurrentLine', $current_line);
$nl = PHP_EOL;
$nl = "\n";
// how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');

View File

@ -38,10 +38,11 @@ class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
// whitespace sticks to the previous element, last whitespace is
// stationary
$this->assertResult("\n <tr />\n <tr />\n ");
$this->assertResult("\n <tr />\n <tr />\n ", true, array('Output.Newline' => "\n"));
$this->assertResult(
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t",
array('Output.Newline' => "\n")
);
}

View File

@ -207,6 +207,7 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
$this->config = HTMLPurifier_Config::createDefault();
$this->config->set('Core', 'TidyFormat', true);
$this->config->set('Output', 'Newline', "\n");
// nice wrapping please
$this->assertGeneration(

View File

@ -72,32 +72,23 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
function testLineNumbers() {
$html = '<b>Line 1</b>
<i>Line 2</i>
Still Line 2<br
/>Now Line 4
<br />';
$html = "<b>Line 1</b>\n<i>Line 2</i>\nStill Line 2<br\n/>Now Line 4\n\n<br />";
$expect = array(
// line 1
0 => new HTMLPurifier_Token_Start('b')
,1 => new HTMLPurifier_Token_Text('Line 1')
,2 => new HTMLPurifier_Token_End('b')
,3 => new HTMLPurifier_Token_Text('
')
,3 => new HTMLPurifier_Token_Text("\n")
// line 2
,4 => new HTMLPurifier_Token_Start('i')
,5 => new HTMLPurifier_Token_Text('Line 2')
,6 => new HTMLPurifier_Token_End('i')
,7 => new HTMLPurifier_Token_Text('
Still Line 2')
,7 => new HTMLPurifier_Token_Text("\nStill Line 2")
// line 3
,8 => new HTMLPurifier_Token_Empty('br')
// line 4
,9 => new HTMLPurifier_Token_Text('Now Line 4
')
,9 => new HTMLPurifier_Token_Text("Now Line 4\n\n")
// line SIX
,10 => new HTMLPurifier_Token_Empty('br')
);