0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00
- Update documentation
- Fix parse error in configuration documentation

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@444 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-09-24 02:06:12 +00:00
parent c046da638a
commit 6c04bbdac1
4 changed files with 24 additions and 11 deletions

3
NEWS
View File

@ -5,7 +5,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
(major feature release) (major feature release)
1.1.1, unknown projected release date 1.1.1, unknown projected release date
(bugfix release) - Various documentation updates
- Fixed parse error in configuration documentation script
1.1.0, released 2006-09-16 1.1.0, released 2006-09-16
- Made URI validator more forgiving: will ignore leading and trailing - Made URI validator more forgiving: will ignore leading and trailing

13
SLOW
View File

@ -17,18 +17,23 @@ second tacked on to the load time probably isn't going to be that huge of
a problem. Then, displaying the content is a simple a manner of outputting a problem. Then, displaying the content is a simple a manner of outputting
it directly from your database/filesystem. The trouble with this method is it directly from your database/filesystem. The trouble with this method is
that your user loses the original text, and when doing edits, will be that your user loses the original text, and when doing edits, will be
handling the filtered text. Of course, maybe that's a good thing. If you handling the filtered text. While this may be a good thing, especially if
don't mind a little extra complexity, you can try... you're using a WYSIWYG editor, it can also result in data-loss if a user
expects a certain to be available but it doesn't.
2. Caching the filtered output - accept the submitted text and put it 2. Caching the filtered output - accept the submitted text and put it
unaltered into the database, but then also generate a filtered version and unaltered into the database, but then also generate a filtered version and
stash that in the database. Serve the filtered version to readers, and the stash that in the database. Serve the filtered version to readers, and the
unaltered version to editors. If need be, you can invalidate the cache and unaltered version to editors. If need be, you can invalidate the cache and
have the cached filtered version be regenerated on the first page view. Pros? have the cached filtered version be regenerated on the first page view. Pros?
Full data retention. Cons? It's more complicated. Full data retention. Cons? It's more complicated, and opens other editors
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
be able to get their hands on the *really* original text served in plaintext
mode).
In short, inbound filtering is almost as simple as outbound filtering, but In short, inbound filtering is almost as simple as outbound filtering, but
it has some drawbacks which cannot be fixed unless you save both the original it has some drawbacks which cannot be fixed unless you save both the original
and the filtered versions. and the filtered versions.
There is a third option: profile and optimize HTMLPurifier yourself. ;-) There is a third option: profile and optimize HTMLPurifier yourself. Be sure
to tell me if you decide to do that! ;-)

View File

@ -50,7 +50,7 @@ function appendHTMLDiv($document, $node, $html) {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier // Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
$definition = HTMLPurifier_ConfigDef::instance(); $schema = HTMLPurifier_ConfigSchema::instance();
$purifier = new HTMLPurifier(); $purifier = new HTMLPurifier();
@ -61,7 +61,7 @@ $types_document = new DOMDocument('1.0', 'UTF-8');
$types_root = $types_document->createElement('types'); $types_root = $types_document->createElement('types');
$types_document->appendChild($types_root); $types_document->appendChild($types_root);
$types_document->formatOutput = true; $types_document->formatOutput = true;
foreach ($definition->types as $name => $expanded_name) { foreach ($schema->types as $name => $expanded_name) {
$types_type = $types_document->createElement('type', $expanded_name); $types_type = $types_document->createElement('type', $expanded_name);
$types_type->setAttribute('id', $name); $types_type->setAttribute('id', $name);
$types_root->appendChild($types_type); $types_root->appendChild($types_type);
@ -88,7 +88,7 @@ TODO for XML format:
- create a definition (DTD or other) once interface stabilizes - create a definition (DTD or other) once interface stabilizes
*/ */
foreach($definition->info as $namespace_name => $namespace_info) { foreach($schema->info as $namespace_name => $namespace_info) {
$dom_namespace = $dom_document->createElement('namespace'); $dom_namespace = $dom_document->createElement('namespace');
$dom_root->appendChild($dom_namespace); $dom_root->appendChild($dom_namespace);
@ -100,7 +100,7 @@ foreach($definition->info as $namespace_name => $namespace_info) {
$dom_namespace_description = $dom_document->createElement('description'); $dom_namespace_description = $dom_document->createElement('description');
$dom_namespace->appendChild($dom_namespace_description); $dom_namespace->appendChild($dom_namespace_description);
appendHTMLDiv($dom_document, $dom_namespace_description, appendHTMLDiv($dom_document, $dom_namespace_description,
$definition->info_namespace[$namespace_name]->description); $schema->info_namespace[$namespace_name]->description);
foreach ($namespace_info as $name => $info) { foreach ($namespace_info as $name => $info) {
@ -128,14 +128,14 @@ foreach($definition->info as $namespace_name => $namespace_info) {
} }
} }
$raw_default = $definition->defaults[$namespace_name][$name]; $raw_default = $schema->defaults[$namespace_name][$name];
if (is_bool($raw_default)) { if (is_bool($raw_default)) {
$default = $raw_default ? 'true' : 'false'; $default = $raw_default ? 'true' : 'false';
} elseif (is_string($raw_default)) { } elseif (is_string($raw_default)) {
$default = "\"$raw_default\""; $default = "\"$raw_default\"";
} else { } else {
$default = print_r( $default = print_r(
$definition->defaults[$namespace_name][$name], true $schema->defaults[$namespace_name][$name], true
); );
} }
$dom_constraints->appendChild( $dom_constraints->appendChild(

View File

@ -18,6 +18,13 @@ require_once 'HTMLPurifier/TokenFactory.php';
* *
* @note PHP's DOM extension does not actually parse any entities, we use * @note PHP's DOM extension does not actually parse any entities, we use
* our own function to do that. * our own function to do that.
*
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
* If this is a huge problem, due to the fact that HTML is hand
* edited and youa re unable to get a parser cache that caches the
* the output of HTML Purifier while keeping the original HTML lying
* around, you may want to run Tidy on the resulting output or use
* HTMLPurifier_DirectLex
*/ */
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer