mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-23 14:27:02 +00:00
Massively refactored Definition, moved MakeWellFormed HTML specific code out.
Add table functionality for nesting, don't know how I missed that. It's still broken though. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@135 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
2b5589c884
commit
9c6ae16764
@ -33,30 +33,6 @@ class HTMLPurifier_Definition
|
|||||||
|
|
||||||
var $info = array();
|
var $info = array();
|
||||||
|
|
||||||
// used solely by HTMLPurifier_Strategy_MakeWellFormed
|
|
||||||
var $info_closes_p = array(
|
|
||||||
// these are all block elements: blocks aren't allowed in P
|
|
||||||
'address' => true,
|
|
||||||
'blockquote' => true,
|
|
||||||
'dd' => true,
|
|
||||||
'dir' => true,
|
|
||||||
'div' => true,
|
|
||||||
'dl' => true,
|
|
||||||
'dt' => true,
|
|
||||||
'h1' => true,
|
|
||||||
'h2' => true,
|
|
||||||
'h3' => true,
|
|
||||||
'h4' => true,
|
|
||||||
'h5' => true,
|
|
||||||
'h6' => true,
|
|
||||||
'hr' => true,
|
|
||||||
'ol' => true,
|
|
||||||
'p' => true,
|
|
||||||
'pre' => true,
|
|
||||||
'table' => true,
|
|
||||||
'ul' => true
|
|
||||||
);
|
|
||||||
|
|
||||||
// used solely by HTMLPurifier_Strategy_ValidateAttributes
|
// used solely by HTMLPurifier_Strategy_ValidateAttributes
|
||||||
var $info_global_attr = array();
|
var $info_global_attr = array();
|
||||||
|
|
||||||
@ -75,7 +51,23 @@ class HTMLPurifier_Definition
|
|||||||
function HTMLPurifier_Definition() {}
|
function HTMLPurifier_Definition() {}
|
||||||
|
|
||||||
function setup() {
|
function setup() {
|
||||||
|
|
||||||
// emulates the structure of the DTD
|
// emulates the structure of the DTD
|
||||||
|
// these are condensed, however, with bad stuff taken out
|
||||||
|
// screening process was done by hand
|
||||||
|
|
||||||
|
// The code makes certain assumptions about the structure of this
|
||||||
|
// definition for optimization reasons:
|
||||||
|
//
|
||||||
|
// FixNesting - There will never be a need for cascading removal
|
||||||
|
// of tags, usually triggered by a node requiring the
|
||||||
|
// existence of another node that may be deleted.
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info[] : initializes the definition objects
|
||||||
|
|
||||||
|
// if you attempt to define rules later on for a tag not in this array
|
||||||
|
// PHP will create an stdclass
|
||||||
|
|
||||||
$allowed_tags =
|
$allowed_tags =
|
||||||
array(
|
array(
|
||||||
@ -84,28 +76,23 @@ class HTMLPurifier_Definition
|
|||||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
|
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
|
||||||
'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
||||||
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
||||||
'pre', 'a'
|
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
||||||
|
'colgroup', 'col', 'td', 'th', 'tr'
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ($allowed_tags as $tag) {
|
foreach ($allowed_tags as $tag) {
|
||||||
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info[]->child : defines allowed children for elements
|
||||||
|
|
||||||
// entities: prefixed with e_ and _ replaces .
|
// entities: prefixed with e_ and _ replaces .
|
||||||
|
|
||||||
// we don't use an array because that complicates interpolation
|
// we don't use an array because that complicates interpolation
|
||||||
// strings are used instead of arrays because if you use arrays,
|
// strings are used instead of arrays because if you use arrays,
|
||||||
// you have to do some hideous manipulation with array_merge()
|
// you have to do some hideous manipulation with array_merge()
|
||||||
|
|
||||||
// these are condensed, remember, with bad stuff taken out
|
|
||||||
|
|
||||||
// transforms: font, menu, dir, center
|
|
||||||
|
|
||||||
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
|
||||||
// and also know the assumptions the code makes about what this
|
|
||||||
// contains for optimization purposes (see fixNesting)
|
|
||||||
|
|
||||||
// child info
|
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
$e_special_extra = 'img';
|
||||||
$e_special_basic = 'br | span | bdo';
|
$e_special_basic = 'br | span | bdo';
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
$e_special = "$e_special_basic | $e_special_extra";
|
||||||
@ -177,10 +164,12 @@ class HTMLPurifier_Definition
|
|||||||
$this->info['h5']->child =
|
$this->info['h5']->child =
|
||||||
$this->info['h6']->child = $e_Inline;
|
$this->info['h6']->child = $e_Inline;
|
||||||
|
|
||||||
|
// the only three required definitions, besides custom table code
|
||||||
$this->info['ol']->child =
|
$this->info['ol']->child =
|
||||||
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
|
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
|
||||||
|
|
||||||
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
||||||
|
|
||||||
$this->info['address']->child =
|
$this->info['address']->child =
|
||||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||||
" | $e_misc_inline");
|
" | $e_misc_inline");
|
||||||
@ -193,7 +182,23 @@ class HTMLPurifier_Definition
|
|||||||
|
|
||||||
$this->info['a']->child = $e_a_content;
|
$this->info['a']->child = $e_a_content;
|
||||||
|
|
||||||
// attribute info
|
$this->info['table']->child = new HTMLPurifier_ChildDef(
|
||||||
|
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||||
|
|
||||||
|
// not a real entity, watch the double underscore
|
||||||
|
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||||
|
$this->info['thead']->child = $e__row;
|
||||||
|
$this->info['tfoot']->child = $e__row;
|
||||||
|
$this->info['tbody']->child = $e__row;
|
||||||
|
$this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
|
||||||
|
$this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
|
||||||
|
$this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
|
||||||
|
$this->info['th']->child = $e_Flow;
|
||||||
|
$this->info['td']->child = $e_Flow;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info[]->attr : defines allowed attributes for elements
|
||||||
|
|
||||||
// this doesn't include REQUIRED declarations, those are handled
|
// this doesn't include REQUIRED declarations, those are handled
|
||||||
// by the transform classes
|
// by the transform classes
|
||||||
|
|
||||||
@ -205,6 +210,39 @@ class HTMLPurifier_Definition
|
|||||||
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
|
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// UNIMP : info_tag_transform : transformations of tags
|
||||||
|
|
||||||
|
// font -> span / attributes: size color face
|
||||||
|
// css: font-size color font-family
|
||||||
|
// menu -> ul
|
||||||
|
// dir -> ul
|
||||||
|
// center -> div / css: text-align: center;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info[]->auto_close : tags that automatically close another
|
||||||
|
|
||||||
|
// these are all block elements: blocks aren't allowed in P
|
||||||
|
$this->info['p']->auto_close = array_flip(array(
|
||||||
|
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
||||||
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
||||||
|
'table', 'ul'
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->info['li']->auto_close = array('li' => true);
|
||||||
|
|
||||||
|
// we need TABLE and heading mismatch code
|
||||||
|
// we may need to make this more flexible for heading mismatch,
|
||||||
|
// or we can just create another info
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// UNIMP : info[]->attr_transform : attribute transformations in elements
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// UNIMP : info_attr_transform : global attribute transform (for xml:lang)
|
||||||
|
|
||||||
|
// this might have bad implications for performance
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -212,8 +250,9 @@ class HTMLPurifier_Definition
|
|||||||
class HTMLPurifier_ElementDef
|
class HTMLPurifier_ElementDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $child;
|
|
||||||
var $attr = array();
|
var $attr = array();
|
||||||
|
var $auto_close = array();
|
||||||
|
var $child;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,35 +59,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// if there's a parent, check for special case
|
// if there's a parent, check for special case
|
||||||
if (!empty($current_nesting)) {
|
if (!empty($current_nesting)) {
|
||||||
$current_parent = array_pop($current_nesting);
|
|
||||||
|
|
||||||
// this ought to be moved to definition
|
$parent = array_pop($current_nesting);
|
||||||
|
$parent_name = $parent->name;
|
||||||
|
$parent_info = $this->definition->info[$parent_name];
|
||||||
|
|
||||||
// check if we're closing a P tag
|
if (isset($parent_info->auto_close[$token->name])) {
|
||||||
if ($current_parent->name == 'p' &&
|
$result[] = new HTMLPurifier_Token_End($parent_name);
|
||||||
isset($this->definition->info_closes_p[$token->name])
|
|
||||||
) {
|
|
||||||
$result[] = new HTMLPurifier_Token_End('p');
|
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
$current_nesting[] = $token;
|
$current_nesting[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we're closing a LI tag
|
$current_nesting[] = $parent; // undo the pop
|
||||||
if ($current_parent->name == 'li' &&
|
|
||||||
$token->name == 'li'
|
|
||||||
) {
|
|
||||||
$result[] = new HTMLPurifier_Token_End('li');
|
|
||||||
$result[] = $token;
|
|
||||||
$current_nesting[] = $token;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// this is more TIDY stuff
|
|
||||||
// we should also get some TABLE related code
|
|
||||||
// mismatched h#
|
|
||||||
|
|
||||||
$current_nesting[] = $current_parent; // undo the pop
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
|
@ -37,6 +37,10 @@ class HTMLPurifier_Strategy_FixNestingTest
|
|||||||
$inputs[4] = '<ul>Illegal text<li>Legal item</li></ul>';
|
$inputs[4] = '<ul>Illegal text<li>Legal item</li></ul>';
|
||||||
$expect[4] = '<ul><li>Legal item</li></ul>';
|
$expect[4] = '<ul><li>Legal item</li></ul>';
|
||||||
|
|
||||||
|
// test custom table definition
|
||||||
|
$inputs[5] = '<table><tr><td>Cell 1</td></tr></table>';
|
||||||
|
$expect[5] = '<table><tr><td>Cell 1</td></tr></table>';
|
||||||
|
|
||||||
$this->assertStrategyWorks($strategy, $inputs, $expect);
|
$this->assertStrategyWorks($strategy, $inputs, $expect);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user