mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-08 07:01:53 +00:00
Merged 438:439, 440:441, and 442:457 from trunk/ to branches/1.1/, mostly major work done for 1.1.1 release.
- Various documentation updates - Fixed fatal error in benchmark scripts, slightly augmented - As far as possible, whitespace is preserved in-between table children - Configuration option to optionally Tidy up output for indentation to make up for dropped whitespace by DOMLex (pretty-printing for the entire application should be done by a page-wide Tidy) - Sample test-settings.php file included Unrelated unmerged edit: removed irrelevant 1.2.0 release notes, those only exist in the trunk. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.1@458 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
30d75c999d
commit
bc5871f389
8
INSTALL
8
INSTALL
@ -46,7 +46,9 @@ Then, it's a simple matter of including the base file:
|
|||||||
|
|
||||||
require_once 'HTMLPurifier.php';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
...and you're good to go.
|
...and you're good to go. The library/ folder contains all the files you need,
|
||||||
|
so you can get rid of most of everything else when using the library in a
|
||||||
|
production environment.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -98,7 +100,8 @@ XHTML output like this:
|
|||||||
|
|
||||||
However, I strongly recommend that you use XHTML. Currently, we can only
|
However, I strongly recommend that you use XHTML. Currently, we can only
|
||||||
guarantee transitional-complaint output, future versions will also allow strict
|
guarantee transitional-complaint output, future versions will also allow strict
|
||||||
output.
|
output. There are more configuration directives which can be read about
|
||||||
|
here: http://hp.jpsband.org/live/configdoc/plain.html
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -130,6 +133,7 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
|
|||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
If your website is in a different encoding or doctype, use this code:
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
|
12
NEWS
12
NEWS
@ -1,11 +1,15 @@
|
|||||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||
|
|
||||||
1.2.0, unknown projected release date
|
|
||||||
(major feature release)
|
|
||||||
|
|
||||||
1.1.1, unknown projected release date
|
1.1.1, unknown projected release date
|
||||||
(bugfix release)
|
- Various documentation updates
|
||||||
|
- Fixed parse error in configuration documentation script
|
||||||
|
- Fixed fatal error in benchmark scripts, slightly augmented
|
||||||
|
- As far as possible, whitespace is preserved in-between table children
|
||||||
|
- Configuration option to optionally Tidy up output for indentation to make up
|
||||||
|
for dropped whitespace by DOMLex (pretty-printing for the entire application
|
||||||
|
should be done by a page-wide Tidy)
|
||||||
|
- Sample test-settings.php file included
|
||||||
|
|
||||||
1.1.0, released 2006-09-16
|
1.1.0, released 2006-09-16
|
||||||
- Made URI validator more forgiving: will ignore leading and trailing
|
- Made URI validator more forgiving: will ignore leading and trailing
|
||||||
|
13
SLOW
13
SLOW
@ -17,18 +17,23 @@ second tacked on to the load time probably isn't going to be that huge of
|
|||||||
a problem. Then, displaying the content is a simple a manner of outputting
|
a problem. Then, displaying the content is a simple a manner of outputting
|
||||||
it directly from your database/filesystem. The trouble with this method is
|
it directly from your database/filesystem. The trouble with this method is
|
||||||
that your user loses the original text, and when doing edits, will be
|
that your user loses the original text, and when doing edits, will be
|
||||||
handling the filtered text. Of course, maybe that's a good thing. If you
|
handling the filtered text. While this may be a good thing, especially if
|
||||||
don't mind a little extra complexity, you can try...
|
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||||
|
expects a certain to be available but it doesn't.
|
||||||
|
|
||||||
2. Caching the filtered output - accept the submitted text and put it
|
2. Caching the filtered output - accept the submitted text and put it
|
||||||
unaltered into the database, but then also generate a filtered version and
|
unaltered into the database, but then also generate a filtered version and
|
||||||
stash that in the database. Serve the filtered version to readers, and the
|
stash that in the database. Serve the filtered version to readers, and the
|
||||||
unaltered version to editors. If need be, you can invalidate the cache and
|
unaltered version to editors. If need be, you can invalidate the cache and
|
||||||
have the cached filtered version be regenerated on the first page view. Pros?
|
have the cached filtered version be regenerated on the first page view. Pros?
|
||||||
Full data retention. Cons? It's more complicated.
|
Full data retention. Cons? It's more complicated, and opens other editors
|
||||||
|
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
|
||||||
|
be able to get their hands on the *really* original text served in plaintext
|
||||||
|
mode).
|
||||||
|
|
||||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||||
it has some drawbacks which cannot be fixed unless you save both the original
|
it has some drawbacks which cannot be fixed unless you save both the original
|
||||||
and the filtered versions.
|
and the filtered versions.
|
||||||
|
|
||||||
There is a third option: profile and optimize HTMLPurifier yourself. ;-)
|
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||||
|
to tell me if you decide to do that! ;-)
|
||||||
|
28
TODO
28
TODO
@ -6,25 +6,29 @@ Ongoing
|
|||||||
- Plugins for major CMSes (very tricky issue)
|
- Plugins for major CMSes (very tricky issue)
|
||||||
|
|
||||||
1.2 release
|
1.2 release
|
||||||
- Additional support for poorly written HTML
|
|
||||||
- Implement all non-essential attribute transforms
|
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
|
||||||
- Error logging for filtering and cleanup procedures
|
|
||||||
|
|
||||||
1.3 release
|
|
||||||
- Formatters for plaintext
|
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
|
||||||
shouldn't be paragraphed, such as lists and tables).
|
|
||||||
- Make URI validation routines tighter (especially mailto)
|
- Make URI validation routines tighter (especially mailto)
|
||||||
- More extensive URI filtering schemes
|
- More extensive URI filtering schemes
|
||||||
- Allow for background-image and list-style-image (see above)
|
- Allow for background-image and list-style-image (see above)
|
||||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||||
in IMG SRC is nonsensical
|
in IMG SRC is nonsensical
|
||||||
|
- Error logging for filtering/cleanup procedures
|
||||||
|
|
||||||
2.0 release
|
1.3 release
|
||||||
- Add various "levels" of cleaning
|
- Add various "levels" of cleaning
|
||||||
- Related: Allow strict (X)HTML
|
- Related: Allow strict (X)HTML
|
||||||
|
|
||||||
|
1.4 release
|
||||||
|
- Additional support for poorly written HTML
|
||||||
|
- Implement all non-essential attribute transforms
|
||||||
|
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||||
|
|
||||||
|
2.0 release
|
||||||
|
- Formatters for plaintext
|
||||||
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
|
- Linkify URLs
|
||||||
|
- Smileys
|
||||||
|
|
||||||
3.0 release
|
3.0 release
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms
|
- Extended HTML capabilities based on namespacing and tag transforms
|
||||||
- Hooks for adding custom processors to custom namespaced tags and
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
@ -39,11 +43,9 @@ Unknown release (on a scratch-an-itch basis)
|
|||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
- Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
|
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
numerically encoding all non-ASCII characters
|
numerically encoding all non-ASCII characters
|
||||||
- Preservation of indentation in tables (tricky since the contents can be
|
|
||||||
shuffled around)
|
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations
|
- Non-lossy smart alternate character encoding transformations
|
||||||
|
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||||
|
@ -3,15 +3,24 @@
|
|||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
@include_once '../test-settings.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
|
||||||
|
|
||||||
$LEXERS = array(
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
require_once 'HTMLPurifier/Config.php';
|
||||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
|
||||||
);
|
$LEXERS = array();
|
||||||
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
|
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
|
||||||
|
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||||
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
} else {
|
||||||
|
exit('PEAR required to perform benchmark.');
|
||||||
|
}
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
@ -56,9 +65,12 @@ class RowTimer extends Benchmark_Timer
|
|||||||
if ($standard == false) $standard = $v['diff'];
|
if ($standard == false) $standard = $v['diff'];
|
||||||
|
|
||||||
$perc = $v['diff'] * 100 / $standard;
|
$perc = $v['diff'] * 100 / $standard;
|
||||||
|
$bad_run = ($v['diff'] < 0);
|
||||||
|
|
||||||
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
$out .= '<td align="right"'.
|
||||||
'%</td>';
|
($bad_run ? ' style="color:#AAA;"' : '').
|
||||||
|
'>' . number_format($perc, 2, '.', '') .
|
||||||
|
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,13 +91,13 @@ function print_lexers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +115,7 @@ function do_benchmark($name, $document) {
|
|||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><?php
|
<tr><th>Case</th><?php
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
|
||||||
}
|
}
|
||||||
?></tr>
|
?></tr>
|
||||||
<?php
|
<?php
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ function appendHTMLDiv($document, $node, $html) {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||||
|
|
||||||
$definition = HTMLPurifier_ConfigDef::instance();
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ $types_document = new DOMDocument('1.0', 'UTF-8');
|
|||||||
$types_root = $types_document->createElement('types');
|
$types_root = $types_document->createElement('types');
|
||||||
$types_document->appendChild($types_root);
|
$types_document->appendChild($types_root);
|
||||||
$types_document->formatOutput = true;
|
$types_document->formatOutput = true;
|
||||||
foreach ($definition->types as $name => $expanded_name) {
|
foreach ($schema->types as $name => $expanded_name) {
|
||||||
$types_type = $types_document->createElement('type', $expanded_name);
|
$types_type = $types_document->createElement('type', $expanded_name);
|
||||||
$types_type->setAttribute('id', $name);
|
$types_type->setAttribute('id', $name);
|
||||||
$types_root->appendChild($types_type);
|
$types_root->appendChild($types_type);
|
||||||
@ -88,7 +88,7 @@ TODO for XML format:
|
|||||||
- create a definition (DTD or other) once interface stabilizes
|
- create a definition (DTD or other) once interface stabilizes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
foreach($definition->info as $namespace_name => $namespace_info) {
|
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||||
|
|
||||||
$dom_namespace = $dom_document->createElement('namespace');
|
$dom_namespace = $dom_document->createElement('namespace');
|
||||||
$dom_root->appendChild($dom_namespace);
|
$dom_root->appendChild($dom_namespace);
|
||||||
@ -100,7 +100,7 @@ foreach($definition->info as $namespace_name => $namespace_info) {
|
|||||||
$dom_namespace_description = $dom_document->createElement('description');
|
$dom_namespace_description = $dom_document->createElement('description');
|
||||||
$dom_namespace->appendChild($dom_namespace_description);
|
$dom_namespace->appendChild($dom_namespace_description);
|
||||||
appendHTMLDiv($dom_document, $dom_namespace_description,
|
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||||
$definition->info_namespace[$namespace_name]->description);
|
$schema->info_namespace[$namespace_name]->description);
|
||||||
|
|
||||||
foreach ($namespace_info as $name => $info) {
|
foreach ($namespace_info as $name => $info) {
|
||||||
|
|
||||||
@ -128,14 +128,14 @@ foreach($definition->info as $namespace_name => $namespace_info) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$raw_default = $definition->defaults[$namespace_name][$name];
|
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||||
if (is_bool($raw_default)) {
|
if (is_bool($raw_default)) {
|
||||||
$default = $raw_default ? 'true' : 'false';
|
$default = $raw_default ? 'true' : 'false';
|
||||||
} elseif (is_string($raw_default)) {
|
} elseif (is_string($raw_default)) {
|
||||||
$default = "\"$raw_default\"";
|
$default = "\"$raw_default\"";
|
||||||
} else {
|
} else {
|
||||||
$default = print_r(
|
$default = print_r(
|
||||||
$definition->defaults[$namespace_name][$name], true
|
$schema->defaults[$namespace_name][$name], true
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
$dom_constraints->appendChild(
|
$dom_constraints->appendChild(
|
||||||
|
@ -21,7 +21,9 @@ if (!empty($_POST['html'])) {
|
|||||||
|
|
||||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
$pure_html = $purifier->purify($html);
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
?>
|
?>
|
||||||
@ -65,6 +67,8 @@ if (isset($html)) {
|
|||||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||||
}
|
}
|
||||||
?></textarea>
|
?></textarea>
|
||||||
|
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||||
|
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||||
<div>
|
<div>
|
||||||
<input type="submit" value="Submit" name="submit" class="button" />
|
<input type="submit" value="Submit" name="submit" class="button" />
|
||||||
</div>
|
</div>
|
||||||
|
@ -86,7 +86,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
Well-supported values are: disc, circle, square,
|
Well-supported values are: disc, circle, square,
|
||||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||||
CSS 3. Mostly IE lack of support.</td></tr>
|
CSS 3. Mostly IE lack of support.</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND, target milestone 1.0</td></tr>
|
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||||
<percentage>, auto)</td></tr>
|
<percentage>, auto)</td></tr>
|
||||||
@ -134,7 +134,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Unknown</th></tr>
|
<tr><th colspan="2">Unknown</th></tr>
|
||||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||||
Depends on background-image</td></tr>
|
Depends on background-image</td></tr>
|
||||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||||
@ -144,7 +144,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||||
for Mozilla. Unknown target milestone.</td></tr>
|
for Mozilla. Unknown target milestone.</td></tr>
|
||||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||||
<tr class="impl-no"><td>min-height</td></tr>
|
<tr class="impl-no"><td>min-height</td></tr>
|
||||||
<tr class="impl-no"><td>max-width</td></tr>
|
<tr class="impl-no"><td>max-width</td></tr>
|
||||||
@ -254,7 +254,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
|||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Transform, target milestone 1.2</th></tr>
|
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||||
<tr><td>TABLE</td></tr>
|
<tr><td>TABLE</td></tr>
|
||||||
|
@ -11,9 +11,14 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instances of HTMLPurifier_AttrDef_IPv4 and HTMLPurifier_AttrDef_IPv6
|
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv4, $ipv6;
|
var $ipv4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||||
|
*/
|
||||||
|
var $ipv6;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Host() {
|
function HTMLPurifier_AttrDef_Host() {
|
||||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||||
|
@ -327,6 +327,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
$is_collecting = false; // are we globbing together tokens to package
|
$is_collecting = false; // are we globbing together tokens to package
|
||||||
// into one of the collectors?
|
// into one of the collectors?
|
||||||
$collection = array(); // collected nodes
|
$collection = array(); // collected nodes
|
||||||
|
$tag_index = 0; // the first node might be whitespace,
|
||||||
|
// so this tells us where the start tag is
|
||||||
|
|
||||||
foreach ($tokens_of_children as $token) {
|
foreach ($tokens_of_children as $token) {
|
||||||
$is_child = ($nesting == 0);
|
$is_child = ($nesting == 0);
|
||||||
@ -344,7 +346,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
if ($is_child) {
|
if ($is_child) {
|
||||||
// okay, let's stash the tokens away
|
// okay, let's stash the tokens away
|
||||||
// first token tells us the type of the collection
|
// first token tells us the type of the collection
|
||||||
switch ($collection[0]->name) {
|
switch ($collection[$tag_index]->name) {
|
||||||
case 'tr':
|
case 'tr':
|
||||||
case 'tbody':
|
case 'tbody':
|
||||||
$content[] = $collection;
|
$content[] = $collection;
|
||||||
@ -356,13 +358,13 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
case 'thead':
|
case 'thead':
|
||||||
case 'tfoot':
|
case 'tfoot':
|
||||||
// access the appropriate variable, $thead or $tfoot
|
// access the appropriate variable, $thead or $tfoot
|
||||||
$var = $collection[0]->name;
|
$var = $collection[$tag_index]->name;
|
||||||
if ($$var === false) {
|
if ($$var === false) {
|
||||||
$$var = $collection;
|
$$var = $collection;
|
||||||
} else {
|
} else {
|
||||||
// transmutate the first and less entries into
|
// transmutate the first and less entries into
|
||||||
// tbody tags, and then put into content
|
// tbody tags, and then put into content
|
||||||
$collection[0]->name = 'tbody';
|
$collection[$tag_index]->name = 'tbody';
|
||||||
$collection[count($collection)-1]->name = 'tbody';
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
$content[] = $collection;
|
$content[] = $collection;
|
||||||
}
|
}
|
||||||
@ -373,6 +375,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
$collection = array();
|
$collection = array();
|
||||||
$is_collecting = false;
|
$is_collecting = false;
|
||||||
|
$tag_index = 0;
|
||||||
} else {
|
} else {
|
||||||
// add the node to the collection
|
// add the node to the collection
|
||||||
$collection[] = $token;
|
$collection[] = $token;
|
||||||
@ -387,7 +390,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
if ($token->name == 'col') {
|
if ($token->name == 'col') {
|
||||||
// the only empty tag in the possie, we can handle it
|
// the only empty tag in the possie, we can handle it
|
||||||
// immediately
|
// immediately
|
||||||
$cols[] = array($token);
|
$cols[] = array_merge($collection, array($token));
|
||||||
|
$collection = array();
|
||||||
|
$tag_index = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
switch($token->name) {
|
switch($token->name) {
|
||||||
@ -401,7 +406,10 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
$collection[] = $token;
|
$collection[] = $token;
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
// unrecognized, drop silently
|
if ($token->type == 'text' && $token->is_whitespace) {
|
||||||
|
$collection[] = $token;
|
||||||
|
$tag_index++;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -415,6 +423,10 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if (!empty($collection) && $is_collecting == false){
|
||||||
|
// grab the trailing space
|
||||||
|
$ret = array_merge($ret, $collection);
|
||||||
|
}
|
||||||
|
|
||||||
array_pop($tokens_of_children); // remove phantom token
|
array_pop($tokens_of_children); // remove phantom token
|
||||||
|
|
||||||
@ -423,4 +435,4 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@ -88,7 +88,7 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv && !$force_php) {
|
if ($iconv && !$force_php) {
|
||||||
// do the shortcut way
|
// do the shortcut way
|
||||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
||||||
return strtr($str, $non_sgml_chars);;
|
return strtr($str, $non_sgml_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
$mState = 0; // cached expected number of octets after the current octet
|
$mState = 0; // cached expected number of octets after the current octet
|
||||||
|
@ -23,6 +23,21 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'This directive was available since 1.1.'
|
'This directive was available since 1.1.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// extension constraints could be factored into ConfigSchema
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'TidyFormat', false, 'bool',
|
||||||
|
'<p>Determines whether or not to run Tidy on the final output for pretty '.
|
||||||
|
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
|
||||||
|
'improve readability for editors who are hand-editing the HTML, but is '.
|
||||||
|
'by no means necessary as HTML Purifier has already fixed all major '.
|
||||||
|
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
|
||||||
|
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
|
||||||
|
'the overall look of your page\'s source better, I recommend running Tidy '.
|
||||||
|
'on the entire page rather than just user-content (after all, the '.
|
||||||
|
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
|
||||||
|
'directive was available since 1.1.1.</p>'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from tokens.
|
* Generates HTML from tokens.
|
||||||
*/
|
*/
|
||||||
@ -56,6 +71,30 @@ class HTMLPurifier_Generator
|
|||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
}
|
}
|
||||||
|
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
|
||||||
|
|
||||||
|
$tidy_options = array(
|
||||||
|
'indent'=> true,
|
||||||
|
'output-xhtml' => $this->_xhtml,
|
||||||
|
'show-body-only' => true,
|
||||||
|
'indent-spaces' => 2,
|
||||||
|
'wrap' => 68,
|
||||||
|
);
|
||||||
|
if (version_compare(PHP_VERSION, '5', '<')) {
|
||||||
|
tidy_set_encoding('utf8');
|
||||||
|
foreach ($tidy_options as $key => $value) {
|
||||||
|
tidy_setopt($key, $value);
|
||||||
|
}
|
||||||
|
tidy_parse_string($html);
|
||||||
|
tidy_clean_repair();
|
||||||
|
$html = tidy_get_output();
|
||||||
|
} else {
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html, $tidy_options, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html = (string) $tidy;
|
||||||
|
}
|
||||||
|
}
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,6 +18,13 @@ require_once 'HTMLPurifier/TokenFactory.php';
|
|||||||
*
|
*
|
||||||
* @note PHP's DOM extension does not actually parse any entities, we use
|
* @note PHP's DOM extension does not actually parse any entities, we use
|
||||||
* our own function to do that.
|
* our own function to do that.
|
||||||
|
*
|
||||||
|
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||||
|
* If this is a huge problem, due to the fact that HTML is hand
|
||||||
|
* edited and youa re unable to get a parser cache that caches the
|
||||||
|
* the output of HTML Purifier while keeping the original HTML lying
|
||||||
|
* around, you may want to run Tidy on the resulting output or use
|
||||||
|
* HTMLPurifier_DirectLex
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||||
|
17
test-settings.sample.php
Normal file
17
test-settings.sample.php
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// This file is necessary to run the unit tests and profiling
|
||||||
|
// scripts.
|
||||||
|
|
||||||
|
// Is PEAR available on your system? If it isn't, set to false. If PEAR
|
||||||
|
// is not part of the default include_path, add it.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['PEAR'] = true;
|
||||||
|
|
||||||
|
// How many times should profiling scripts iterate over the function? More runs
|
||||||
|
// means more accurate results, but they'll take longer to perform.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
||||||
|
|
||||||
|
// Where is SimpleTest located?
|
||||||
|
$simpletest_location = '/path/to/simpletest/';
|
||||||
|
|
||||||
|
?>
|
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||||
@ -12,7 +12,8 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function HTMLPurifier_ChildDefTest() {
|
function HTMLPurifier_ChildDefTest() {
|
||||||
$this->lex = HTMLPurifier_Lexer::create();
|
// it is vital that the tags be treated as literally as possible
|
||||||
|
$this->lex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
@ -98,6 +99,14 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
$inputs[6] = 'foo';
|
$inputs[6] = 'foo';
|
||||||
$expect[6] = false;
|
$expect[6] = false;
|
||||||
|
|
||||||
|
// whitespace sticks to the previous element, last whitespace is
|
||||||
|
// stationary
|
||||||
|
$inputs[7] = "\n <tr />\n <tr />\n ";
|
||||||
|
$expect[7] = true;
|
||||||
|
|
||||||
|
$inputs[8] = "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t";
|
||||||
|
$expect[8] = "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t";
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $config);
|
$this->assertSeries($inputs, $expect, $config);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -209,4 +218,4 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@ -123,6 +123,9 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
var $config;
|
var $config;
|
||||||
function assertGeneration($tokens, $expect) {
|
function assertGeneration($tokens, $expect) {
|
||||||
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||||
|
// normalized newlines, this probably should be put somewhere else
|
||||||
|
$result = str_replace("\r\n", "\n", $result);
|
||||||
|
$result = str_replace("\r", "\n", $result);
|
||||||
$this->assertEqual($expect, $result);
|
$this->assertEqual($expect, $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,6 +151,25 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_TidyFormat() {
|
||||||
|
// abort test if tidy isn't loaded
|
||||||
|
if (!extension_loaded('tidy')) return;
|
||||||
|
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'TidyFormat', true);
|
||||||
|
|
||||||
|
// nice wrapping please
|
||||||
|
$this->assertGeneration(
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('div'),
|
||||||
|
new HTMLPurifier_Token_Text('Text'),
|
||||||
|
new HTMLPurifier_Token_End('div')
|
||||||
|
),
|
||||||
|
"<div>\n Text\n</div>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue
Block a user