0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-08 23:11:52 +00:00

[1.3.0] <li value="4"> and <ul start="2"> now allowed in loose mode

- Updated progress with some more impl-no decisions
 - Loose vs. Strict now has better tallying on current behavior
 - Document what we're not allowing in loose
 - Strict boolean indicator added to HTMLDefinition
 - Added XHTML 1.1 to TODO.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@571 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-23 22:15:35 +00:00
parent 3c4da9666f
commit 92b3f0e817
6 changed files with 65 additions and 42 deletions

1
NEWS
View File

@ -18,6 +18,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! New directive %HTML.Parent, allows you to only allow inline content ! New directive %HTML.Parent, allows you to only allow inline content
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let ! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
users narrow the set of allowed tags users narrow the set of allowed tags
! <li value="4"> and <ul start="2"> now allowed in loose mode
- Added missing type to ChildDef_Chameleon - Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available - Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags . ChildDef_Required guards against empty tags

1
TODO
View File

@ -39,6 +39,7 @@ TODO List
- Hooks for adding custom processors to custom namespaced tags and - Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation attributes, offer default implementation
- Lots of documentation and samples - Lots of documentation and samples
- XHTML 1.1 support
Ongoing Ongoing
- Lots of profiling, make it faster! - Lots of profiling, make it faster!

View File

@ -128,19 +128,20 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tbody> <tbody>
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr> <tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr> <tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
<tr class="danger"><td>left</td></tr> but it's still possible to arbitrarily position by running over.</td></tr>
<tr class="danger"><td>right</td></tr> <tr class="danger impl-no"><td>left</td></tr>
<tr class="danger"><td>top</td></tr> <tr class="danger impl-no"><td>right</td></tr>
<tr><td>clip</td><td>-</td></tr> <tr class="danger impl-no"><td>top</td></tr>
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit <tr class="impl-no"><td>clip</td><td>-</td></tr>
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
relative not absolute?</td></tr> relative not absolute?</td></tr>
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr> <tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
</tbody> </tbody>
<tbody> <tbody>
<tr><th colspan="2">Unknown</th></tr> <tr><th colspan="2">Unknown</th></tr>
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr> <tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed), <tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
Depends on background-image</td></tr> Depends on background-image</td></tr>
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr> <tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
@ -150,7 +151,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
inline-block has incomplete IE6 support and requires -moz-inline-box inline-block has incomplete IE6 support and requires -moz-inline-box
for Mozilla. Unknown target milestone.</td></tr> for Mozilla. Unknown target milestone.</td></tr>
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr> <tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr> <tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr> <tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
<tr class="impl-no"><td>min-height</td></tr> <tr class="impl-no"><td>min-height</td></tr>
<tr class="impl-no"><td>max-width</td></tr> <tr class="impl-no"><td>max-width</td></tr>
@ -236,7 +237,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr><th colspan="3">Questionable</th></tr> <tr><th colspan="3">Questionable</th></tr>
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr> <tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr> <tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr> <tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
</tbody> </tbody>
<tbody> <tbody>
@ -283,11 +284,11 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr> <tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr> <tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr> <tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr> <tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr> <tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
<tr><td>OL</td></tr> <tr><td>OL</td></tr>
<tr><td>UL</td></tr> <tr><td>UL</td></tr>
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr> <tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr> <tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr> <tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
<tr><td>TD, TH</td></tr> <tr><td>TD, TH</td></tr>

View File

@ -8,36 +8,30 @@ to HTML Purifier, though, so let's take a look:
== Major incompatibilities == == Major incompatibilities ==
[done] BLOCKQUOTE changes from 'flow' to 'block' [done] BLOCKQUOTE changes from 'flow' to 'block'
behavior: inline inner contents should not be nuked, paragraph as necessary current behavior: inline inner contents should not be nuked, block-ify as necessary
[partially-done] U, S, STRIKE cut [partially-done] U, S, STRIKE cut
behavior: replace with appropriate inline span + CSS current behavior: removed completely
[partially-done] ADDRESS from potpourri to Inline (removes p tags) (lower importance) projected behavior: replace with appropriate inline span + CSS
behavior: p tags silently dropped or replaced with something (<br>) [done] ADDRESS from potpourri to Inline (removes p tags)
current behavior: block tags silently dropped
ideal behavior: replace tags with something like <br>. (not high priority)
== Things we can loosen up == == Things we can loosen up ==
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
Attributes allowed in loose: current behavior: transform to strict-valid forms
div,p,h#.align Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
ul,ol.type current behavior: projected to transform into strict-valid forms
ul,ol,dl.compact
ol.start
li.type,value
hr.align,noshade,size,width
pre.width
img.name?,align,border,hspace,vspace
table.align,bgcolor
caption.align
tr.bgcolor
th,td.nowrap,bgcolor,width,height
== Periphery issues == == Periphery issues ==
A tag's attribute 'target' (for selecting frames) cut A tag's attribute 'target' (for selecting frames) cut
behavior: shouldn't be needed, use loose doctype if needed current behavior: not allowed at all
OL/LI tag's attribute 'start' (for renumbering lists) cut projected behavior: use loose doctype if needed, needs valid values
behavior: no substitute, just delete [done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
current behavior: no substitute, just delete when in strict, allow in loose
Attribute 'name' deprecated in favor of 'id' Attribute 'name' deprecated in favor of 'id'
behavior: not allowed in first place, but create proper AttrTransform current behavior: dropped silently
PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows) projected behavior: create proper AttrTransform (currently not allowed at all)
behavior: disallow as usual [done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
current behavior: disallow as usual

View File

@ -22,4 +22,15 @@ whole point about CSS is to seperate styling from content, so inline styling
doesn't solve that problem. doesn't solve that problem.
It's an icky question, and we'll have to deal with it as more and more It's an icky question, and we'll have to deal with it as more and more
transforms get implemented. transforms get implemented. As of right now, however, we currently support
these loose-only constructs in loose mode:
- <ul start="1">, <li value="1"> attributes
- <u>, <strike>, <s> tags
- flow children in <blockquote>
- mixed children in <address>
The changed child definitions as well as the ul.start li.value are the most
compelling reasons why loose should be used. We may want offer disabling <u>,
<strike> and <s> by themselves.

View File

@ -159,17 +159,23 @@ class HTMLPurifier_HTMLDefinition
/** /**
* Lookup table of flow elements * Lookup table of flow elements
* @public
*/ */
var $info_flow_elements = array(); var $info_flow_elements = array();
/**
* Boolean is a strict definition?
* @public
*/
var $strict;
/** /**
* Initializes the definition, the meat of the class. * Initializes the definition, the meat of the class.
*/ */
function setup($config) { function setup($config) {
// emulates the structure of the DTD // some cached config values
// these are condensed, however, with bad stuff taken out $this->strict = $config->get('HTML', 'Strict');
// screening process was done by hand
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info[] : initializes the definition objects // info[] : initializes the definition objects
@ -188,7 +194,7 @@ class HTMLPurifier_HTMLDefinition
'colgroup', 'col', 'td', 'th', 'tr' 'colgroup', 'col', 'td', 'th', 'tr'
); );
if (!$config->get('HTML', 'Strict')) { if (!$this->strict) {
$allowed_tags[] = 'u'; $allowed_tags[] = 'u';
$allowed_tags[] = 's'; $allowed_tags[] = 's';
$allowed_tags[] = 'strike'; $allowed_tags[] = 'strike';
@ -201,6 +207,10 @@ class HTMLPurifier_HTMLDefinition
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info[]->child : defines allowed children for elements // info[]->child : defines allowed children for elements
// emulates the structure of the DTD
// however, these are condensed, with bad stuff taken out
// screening process was done by hand
// entities: prefixed with e_ and _ replaces . from DTD // entities: prefixed with e_ and _ replaces . from DTD
// double underlines are entities we made up // double underlines are entities we made up
@ -254,7 +264,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['li']->child = $this->info['li']->child =
$this->info['div']->child = $e_Flow; $this->info['div']->child = $e_Flow;
if ($config->get('HTML', 'Strict')) { if ($this->strict) {
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote(); $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
} else { } else {
$this->info['blockquote']->child = $e_Flow; $this->info['blockquote']->child = $e_Flow;
@ -299,7 +309,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd'); $this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
if ($config->get('HTML', 'Strict')) { if ($this->strict) {
$this->info['address']->child = $e_Inline; $this->info['address']->child = $e_Inline;
} else { } else {
$this->info['address']->child = $this->info['address']->child =
@ -445,6 +455,11 @@ class HTMLPurifier_HTMLDefinition
// URI that causes HTTP request // URI that causes HTTP request
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true); $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
if (!$this->strict) {
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
}
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info_tag_transform : transformations of tags // info_tag_transform : transformations of tags