0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00

[1.1.1] To make up for DOMLex's tendency to drop tags, we've added a configuration option to let Tidy cleanup the HTML afterwards. Good for hand-editors. Also, Tidy is a smart solution for pretty-printed HTML, so we're marking the related TODO wontfix.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@454 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-09-24 21:23:54 +00:00
parent 6adbaf0e5c
commit 24663d65ed
4 changed files with 65 additions and 2 deletions

2
TODO
View File

@ -43,9 +43,9 @@ Unknown release (on a scratch-an-itch basis)
- Fixes for Firefox's inability to handle COL alignment props (Bug 915) - Fixes for Firefox's inability to handle COL alignment props (Bug 915)
- Automatically add non-breaking spaces to empty table cells when - Automatically add non-breaking spaces to empty table cells when
empty-cells:show is applied to have compatibility with Internet Explorer empty-cells:show is applied to have compatibility with Internet Explorer
- Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
- Non-lossy dumb alternate character encoding transformations, achieved by - Non-lossy dumb alternate character encoding transformations, achieved by
numerically encoding all non-ASCII characters numerically encoding all non-ASCII characters
Wontfix Wontfix
- Non-lossy smart alternate character encoding transformations - Non-lossy smart alternate character encoding transformations
- Pretty-printing HTML, users can use Tidy on the output on entire page

View File

@ -21,7 +21,9 @@ if (!empty($_POST['html'])) {
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html']; $html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
$purifier = new HTMLPurifier(); $config = HTMLPurifier_Config::createDefault();
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
$purifier = new HTMLPurifier($config);
$pure_html = $purifier->purify($html); $pure_html = $purifier->purify($html);
?> ?>
@ -65,6 +67,8 @@ if (isset($html)) {
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8'); HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
} }
?></textarea> ?></textarea>
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
<div> <div>
<input type="submit" value="Submit" name="submit" class="button" /> <input type="submit" value="Submit" name="submit" class="button" />
</div> </div>

View File

@ -23,6 +23,19 @@ HTMLPurifier_ConfigSchema::define(
'This directive was available since 1.1.' 'This directive was available since 1.1.'
); );
// extension constraints could be factored into ConfigSchema
HTMLPurifier_ConfigSchema::define(
'Core', 'TidyFormat', false, 'bool',
'Determines whether or not to run Tidy on the final output for pretty '.
'formatting reasons, such as indentation and wrap. This can greatly '.
'improve readability for editors who are hand-editing the HTML, but is '.
'by no means necessary as HTML Purifier has already fixed all major '.
'errors the HTML may have had and could potentially result in data loss '.
'due to bugs in Tidy. Tidy is a non-default extension, and this directive '.
'will silently fail if Tidy is not available. This '.
'directive was available since 1.1.1.'
);
/** /**
* Generates HTML from tokens. * Generates HTML from tokens.
*/ */
@ -56,6 +69,30 @@ class HTMLPurifier_Generator
foreach ($tokens as $token) { foreach ($tokens as $token) {
$html .= $this->generateFromToken($token); $html .= $this->generateFromToken($token);
} }
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
$tidy_options = array(
'indent'=> true,
'output-xhtml' => $this->_xhtml,
'show-body-only' => true,
'indent-spaces' => 2,
'wrap' => 68,
);
if (version_compare(PHP_VERSION, '5', '<')) {
tidy_set_encoding('utf8');
foreach ($tidy_options as $key => $value) {
tidy_setopt($key, $value);
}
tidy_parse_string($html);
tidy_clean_repair();
$html = tidy_get_output();
} else {
$tidy = new Tidy;
$tidy->parseString($html, $tidy_options, 'utf8');
$tidy->cleanRepair();
$html = (string) $tidy;
}
}
return $html; return $html;
} }

View File

@ -123,6 +123,9 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
var $config; var $config;
function assertGeneration($tokens, $expect) { function assertGeneration($tokens, $expect) {
$result = $this->gen->generateFromTokens($tokens, $this->config); $result = $this->gen->generateFromTokens($tokens, $this->config);
// normalized newlines, this probably should be put somewhere else
$result = str_replace("\r\n", "\n", $result);
$result = str_replace("\r", "\n", $result);
$this->assertEqual($expect, $result); $this->assertEqual($expect, $result);
} }
@ -148,6 +151,25 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
} }
function test_generateFromTokens_TidyFormat() {
// abort test if tidy isn't loaded
if (!extension_loaded('tidy')) return;
$this->config = HTMLPurifier_Config::createDefault();
$this->config->set('Core', 'TidyFormat', true);
// nice wrapping please
$this->assertGeneration(
array(
new HTMLPurifier_Token_Start('div'),
new HTMLPurifier_Token_Text('Text'),
new HTMLPurifier_Token_End('div')
),
"<div>\n Text\n</div>\n"
);
}
} }
?> ?>