0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00

Add support for unrecognized elements in MakeWellFormed.

The MakeWellFormed strategy uses metadata from HTMLDefinition in order to
determine whether or not tokens need to be converted or tags need to be
auto-closed. While this functionality is good to have, it is by no means
essential, and MakeWellFormed should not error when this information is not
available.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-07-05 03:11:29 -04:00
parent 700d5bcbfc
commit 965be3bd73
3 changed files with 23 additions and 8 deletions

2
NEWS
View File

@ -20,6 +20,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
is heavy (for all doctypes).
! %AutoFormat.RemoveEmpty to remove some empty tags from documents. Please don't
use on hand-written HTML.
! Add error-cases for unsupported elements in MakeWellFormed. This enables
the strategy to be used, standalone, on untrusted input.
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
for more interesting filter-backtracking
. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind

View File

@ -110,15 +110,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
continue;
}
$info = $definition->info[$token->name]->child;
if (isset($definition->info[$token->name])) {
$type = $definition->info[$token->name]->child->type;
} else {
$type = false; // Type is unknown, treat accordingly
}
// quick tag checks: anything that's *not* an end tag
$ok = false;
if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// test if it claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
$ok = true;
} elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$token = array(
new HTMLPurifier_Token_Start($token->name, $token->attr),
@ -135,12 +139,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if (!empty($this->currentNesting)) {
$parent = array_pop($this->currentNesting);
$parent_info = $definition->info[$parent->name];
if (isset($definition->info[$parent->name])) {
$elements = $definition->info[$parent->name]->child->elements;
$autoclose = !isset($elements[$token->name]);
} else {
$autoclose = false;
}
// this can be replaced with a more general algorithm:
// if the token is not allowed by the parent, auto-close
// the parent
if (!isset($parent_info->child->elements[$token->name])) {
if ($autoclose) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// insert parent end tag before this tag;
// end tag isn't processed, but this tag is processed again

View File

@ -86,5 +86,12 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
);
}
function testUnrecognized() {
$this->assertResult(
'<asdf><foobar /><biddles>foo</asdf>',
'<asdf><foobar /><biddles>foo</biddles></asdf>'
);
}
}