0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 16:31:53 +00:00

[1.3.0] <li value="4"> and <ul start="2"> now allowed in loose mode

- Updated progress with some more impl-no decisions
 - Loose vs. Strict now has better tallying on current behavior
 - Document what we're not allowing in loose
 - Strict boolean indicator added to HTMLDefinition
 - Added XHTML 1.1 to TODO.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@571 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-11-23 22:15:35 +00:00
parent 3c4da9666f
commit 92b3f0e817
6 changed files with 65 additions and 42 deletions

1
NEWS
View File

@ -18,6 +18,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! New directive %HTML.Parent, allows you to only allow inline content
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
users narrow the set of allowed tags
! <li value="4"> and <ul start="2"> now allowed in loose mode
- Added missing type to ChildDef_Chameleon
- Remove Tidy option from demo if there is not Tidy available
. ChildDef_Required guards against empty tags

1
TODO
View File

@ -39,6 +39,7 @@ TODO List
- Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation
- Lots of documentation and samples
- XHTML 1.1 support
Ongoing
- Lots of profiling, make it faster!

View File

@ -128,19 +128,20 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tbody>
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
<tr class="danger"><td>left</td></tr>
<tr class="danger"><td>right</td></tr>
<tr class="danger"><td>top</td></tr>
<tr><td>clip</td><td>-</td></tr>
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
<tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
but it's still possible to arbitrarily position by running over.</td></tr>
<tr class="danger impl-no"><td>left</td></tr>
<tr class="danger impl-no"><td>right</td></tr>
<tr class="danger impl-no"><td>top</td></tr>
<tr class="impl-no"><td>clip</td><td>-</td></tr>
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
relative not absolute?</td></tr>
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
<tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
</tbody>
<tbody>
<tr><th colspan="2">Unknown</th></tr>
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
Depends on background-image</td></tr>
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
@ -150,7 +151,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
inline-block has incomplete IE6 support and requires -moz-inline-box
for Mozilla. Unknown target milestone.</td></tr>
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
<tr class="impl-no"><td>min-height</td></tr>
<tr class="impl-no"><td>max-width</td></tr>
@ -236,7 +237,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr><th colspan="3">Questionable</th></tr>
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
</tbody>
<tbody>
@ -283,11 +284,11 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
<tr><td>OL</td></tr>
<tr><td>UL</td></tr>
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
<tr><td>TD, TH</td></tr>

View File

@ -8,36 +8,30 @@ to HTML Purifier, though, so let's take a look:
== Major incompatibilities ==
[done] BLOCKQUOTE changes from 'flow' to 'block'
behavior: inline inner contents should not be nuked, paragraph as necessary
current behavior: inline inner contents should not be nuked, block-ify as necessary
[partially-done] U, S, STRIKE cut
behavior: replace with appropriate inline span + CSS
[partially-done] ADDRESS from potpourri to Inline (removes p tags) (lower importance)
behavior: p tags silently dropped or replaced with something (<br>)
current behavior: removed completely
projected behavior: replace with appropriate inline span + CSS
[done] ADDRESS from potpourri to Inline (removes p tags)
current behavior: block tags silently dropped
ideal behavior: replace tags with something like <br>. (not high priority)
== Things we can loosen up ==
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
Attributes allowed in loose:
div,p,h#.align
ul,ol.type
ul,ol,dl.compact
ol.start
li.type,value
hr.align,noshade,size,width
pre.width
img.name?,align,border,hspace,vspace
table.align,bgcolor
caption.align
tr.bgcolor
th,td.nowrap,bgcolor,width,height
current behavior: transform to strict-valid forms
Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
current behavior: projected to transform into strict-valid forms
== Periphery issues ==
A tag's attribute 'target' (for selecting frames) cut
behavior: shouldn't be needed, use loose doctype if needed
OL/LI tag's attribute 'start' (for renumbering lists) cut
behavior: no substitute, just delete
current behavior: not allowed at all
projected behavior: use loose doctype if needed, needs valid values
[done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
current behavior: no substitute, just delete when in strict, allow in loose
Attribute 'name' deprecated in favor of 'id'
behavior: not allowed in first place, but create proper AttrTransform
PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
behavior: disallow as usual
current behavior: dropped silently
projected behavior: create proper AttrTransform (currently not allowed at all)
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
current behavior: disallow as usual

View File

@ -22,4 +22,15 @@ whole point about CSS is to seperate styling from content, so inline styling
doesn't solve that problem.
It's an icky question, and we'll have to deal with it as more and more
transforms get implemented.
transforms get implemented. As of right now, however, we currently support
these loose-only constructs in loose mode:
- <ul start="1">, <li value="1"> attributes
- <u>, <strike>, <s> tags
- flow children in <blockquote>
- mixed children in <address>
The changed child definitions as well as the ul.start li.value are the most
compelling reasons why loose should be used. We may want offer disabling <u>,
<strike> and <s> by themselves.

View File

@ -159,17 +159,23 @@ class HTMLPurifier_HTMLDefinition
/**
* Lookup table of flow elements
* @public
*/
var $info_flow_elements = array();
/**
* Boolean is a strict definition?
* @public
*/
var $strict;
/**
* Initializes the definition, the meat of the class.
*/
function setup($config) {
// emulates the structure of the DTD
// these are condensed, however, with bad stuff taken out
// screening process was done by hand
// some cached config values
$this->strict = $config->get('HTML', 'Strict');
//////////////////////////////////////////////////////////////////////
// info[] : initializes the definition objects
@ -188,7 +194,7 @@ class HTMLPurifier_HTMLDefinition
'colgroup', 'col', 'td', 'th', 'tr'
);
if (!$config->get('HTML', 'Strict')) {
if (!$this->strict) {
$allowed_tags[] = 'u';
$allowed_tags[] = 's';
$allowed_tags[] = 'strike';
@ -201,6 +207,10 @@ class HTMLPurifier_HTMLDefinition
//////////////////////////////////////////////////////////////////////
// info[]->child : defines allowed children for elements
// emulates the structure of the DTD
// however, these are condensed, with bad stuff taken out
// screening process was done by hand
// entities: prefixed with e_ and _ replaces . from DTD
// double underlines are entities we made up
@ -254,7 +264,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['li']->child =
$this->info['div']->child = $e_Flow;
if ($config->get('HTML', 'Strict')) {
if ($this->strict) {
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
} else {
$this->info['blockquote']->child = $e_Flow;
@ -299,7 +309,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
if ($config->get('HTML', 'Strict')) {
if ($this->strict) {
$this->info['address']->child = $e_Inline;
} else {
$this->info['address']->child =
@ -445,6 +455,11 @@ class HTMLPurifier_HTMLDefinition
// URI that causes HTTP request
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
if (!$this->strict) {
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
}
//////////////////////////////////////////////////////////////////////
// info_tag_transform : transformations of tags