0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 05:11:52 +00:00

Implement attribute transforms for required attributes. I can now confidently say that output will always be valid.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@256 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-08-14 23:11:28 +00:00
parent e770d994a7
commit 24c64dbbac
12 changed files with 229 additions and 37 deletions

View File

@ -80,7 +80,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr> <tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
<tr><td>href</td><td>A</td><td>-</td></tr> <tr><td>href</td><td>A</td><td>-</td></tr>
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr> <tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
<tr class="required impl-partial"><td>src</td><td>IMG</td><td>Required</td></tr> <tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
</tbody> </tbody>
<tbody> <tbody>
@ -90,14 +90,14 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tr><td>TABLE</td></tr> <tr><td>TABLE</td></tr>
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr> <tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr> <tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
<tr class="required"><td>alt</td><td>IMG</td><td>Required, insert image filename if non-existant</td></tr> <tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr> <tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr> <tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr> <tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr> <tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr> <tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class</td></tr> <tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class</td></tr>
<tr class="required"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr> <tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr> <tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr> <tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr> <tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
@ -106,7 +106,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr> <tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr> <tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr> <tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
<tr class="required"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr> <tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr> <tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr> <tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
<tr><td>OL</td></tr> <tr><td>OL</td></tr>
@ -168,7 +168,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
CSS 3. Mostly IE lack of support.</td></tr> CSS 3. Mostly IE lack of support.</td></tr>
<tr class="css1"><td>list-style</td><td>SHORTHAND</td></tr> <tr class="css1"><td>list-style</td><td>SHORTHAND</td></tr>
<tr class="css1"><td>margin</td><td>MULIPLE</td></tr> <tr class="css1"><td>margin</td><td>MULTIPLE</td></tr>
<tr class="css1"><td>margin-*</td><td>COMPOSITE(&lt;length&gt;, <tr class="css1"><td>margin-*</td><td>COMPOSITE(&lt;length&gt;,
&lt;percentage&gt;, auto)</td></tr> &lt;percentage&gt;, auto)</td></tr>
<tr class="css1"><td>padding</td><td>MULTIPLE</td></tr> <tr class="css1"><td>padding</td><td>MULTIPLE</td></tr>

View File

@ -0,0 +1,25 @@
<?php
require_once 'HTMLPurifier/AttrTransform.php';
// this MUST be placed in post, as it assumes that any value in dir is valid
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultTextDir', 'ltr',
'Defines the default text direction (ltr or rtl) of the document '.
'being parsed. This generally is the same as the value of the dir '.
'attribute in HTML, or ltr if that is not specified.'
);
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{
function transform($attributes, $config) {
if (isset($attributes['dir'])) return $attributes;
$attributes['dir'] = $config->get('Attr', 'DefaultTextDir');
return $attributes;
}
}
?>

View File

@ -0,0 +1,47 @@
<?php
require_once 'HTMLPurifier/AttrTransform.php';
// must be called POST validation
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultInvalidImage', '',
'This is the default image an img tag will be pointed to if it does '.
'not have a valid src attribute. In future versions, we may allow the '.
'image tag to be removed completely, but due to design issues, this is '.
'not possible right now.'
);
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultInvalidImageAlt', 'Invalid image',
'This is the content of the alt tag of an invalid image if the user '.
'had not previously specified an alt attribute. It has no effect when the '.
'image is valid but there was no alt attribute present.'
);
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
function transform($attributes, $config) {
$src = true;
if (!isset($attributes['src'])) {
$attributes['src'] = $config->get('Attr', 'DefaultInvalidImage');
$src = false;
}
if (!isset($attributes['alt'])) {
if ($src) {
$attributes['alt'] = basename($attributes['src']);
} else {
$attributes['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
}
}
return $attributes;
}
}
?>

View File

@ -2,10 +2,13 @@
require_once 'HTMLPurifier/AttrTransform.php'; require_once 'HTMLPurifier/AttrTransform.php';
// this transformation may be done pre or post validation, but post is
// preferred, since invalid languages then will have been dropped.
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{ {
function transform($attr) { function transform($attr, $config) {
$lang = isset($attr['lang']) ? $attr['lang'] : false; $lang = isset($attr['lang']) ? $attr['lang'] : false;
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false; $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;

View File

@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
class HTMLPurifier_AttrTransform_TextAlign class HTMLPurifier_AttrTransform_TextAlign
extends HTMLPurifier_AttrTransform { extends HTMLPurifier_AttrTransform {
function transform($attr) { function transform($attr, $config) {
if (!isset($attr['align'])) return $attr; if (!isset($attr['align'])) return $attr;

View File

@ -15,6 +15,8 @@ require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrTransform.php'; require_once 'HTMLPurifier/AttrTransform.php';
require_once 'HTMLPurifier/AttrTransform/Lang.php'; require_once 'HTMLPurifier/AttrTransform/Lang.php';
require_once 'HTMLPurifier/AttrTransform/TextAlign.php'; require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
require_once 'HTMLPurifier/ChildDef.php'; require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Token.php'; require_once 'HTMLPurifier/Token.php';
@ -56,7 +58,8 @@ class HTMLPurifier_HTMLDefinition
var $info_tag_transform = array(); var $info_tag_transform = array();
// used solely by HTMLPurifier_Strategy_ValidateAttributes // used solely by HTMLPurifier_Strategy_ValidateAttributes
var $info_attr_transform = array(); var $info_attr_transform_pre = array();
var $info_attr_transform_post = array();
// WARNING! Prototype is not passed by reference, so in order to get // WARNING! Prototype is not passed by reference, so in order to get
// a copy of the real one, you'll have to destroy your copy and // a copy of the real one, you'll have to destroy your copy and
@ -350,23 +353,31 @@ class HTMLPurifier_HTMLDefinition
// or we can just create another info // or we can just create another info
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info[]->attr_transform : attribute transformations in elements // info[]->attr_transform_* : attribute transformations in elements
// pre is applied before any validation is done, post is done after
$transform = new HTMLPurifier_AttrTransform_TextAlign(); $this->info['h1']->attr_transform_pre[] =
$this->info['h1']->attr_transform[] = $this->info['h2']->attr_transform_pre[] =
$this->info['h2']->attr_transform[] = $this->info['h3']->attr_transform_pre[] =
$this->info['h3']->attr_transform[] = $this->info['h4']->attr_transform_pre[] =
$this->info['h4']->attr_transform[] = $this->info['h5']->attr_transform_pre[] =
$this->info['h5']->attr_transform[] = $this->info['h6']->attr_transform_pre[] =
$this->info['h6']->attr_transform[] = $this->info['p'] ->attr_transform_pre[] =
$this->info['p'] ->attr_transform[] = $transform; new HTMLPurifier_AttrTransform_TextAlign();
$this->info['bdo']->attr_transform_post[] =
new HTMLPurifier_AttrTransform_BdoDir();
$this->info['img']->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ImgRequired();
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// info_attr_transform : global attribute transformation that is // info_attr_transform_* : global attribute transformation that is
// unconditionally called. Good for transformations that have complex // unconditionally called. Good for transformations that have complex
// start conditions // start conditions
// pre is applied before any validation is done, post is done after
$this->info_attr_transform[] = new HTMLPurifier_AttrTransform_Lang(); $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
} }
@ -387,7 +398,8 @@ class HTMLPurifier_ElementDef
{ {
var $attr = array(); var $attr = array();
var $attr_transform = array(); var $attr_transform_pre = array();
var $attr_transform_post = array();
var $auto_close = array(); var $auto_close = array();
var $child; var $child;
var $type = 'unknown'; var $type = 'unknown';

View File

@ -47,20 +47,20 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// copy out attributes for easy manipulation // copy out attributes for easy manipulation
$attr = $token->attributes; $attr = $token->attributes;
// do global transformations // do global transformations (pre)
// ex. <ELEMENT lang="fr"> to <ELEMENT lang="fr" xml:lang="fr"> // ex. <ELEMENT lang="fr"> to <ELEMENT lang="fr" xml:lang="fr">
// DEFINITION CALL // DEFINITION CALL
foreach ($this->definition->info_attr_transform as $transform) { foreach ($this->definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($attr); $attr = $transform->transform($attr, $config);
} }
// do local transformations only applicable to this element // do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;"> // ex. <p align="right"> to <p style="text-align:right;">
// DEFINITION CALL // DEFINITION CALL
foreach ($this->definition->info[$token->name]->attr_transform foreach ($this->definition->info[$token->name]->attr_transform_pre
as $transform as $transform
) { ) {
$attr = $transform->transform($attr); $attr = $transform->transform($attr, $config);
} }
// create alias to this element's attribute definition array, see // create alias to this element's attribute definition array, see
@ -115,6 +115,14 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// others would prepend themselves). // others would prepend themselves).
} }
// post transforms
foreach ($this->definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config);
}
foreach ($this->definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config);
}
// commit changes // commit changes
// could interfere with flyweight implementation // could interfere with flyweight implementation
$tokens[$key]->attributes = $attr; $tokens[$key]->attributes = $attr;

View File

@ -0,0 +1,36 @@
<?php
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
class HTMLPurifier_AttrTransform_BdoDirTest extends HTMLPurifier_AttrTransformHarness
{
function test() {
$this->transform = new HTMLPurifier_AttrTransform_BdoDir();
$inputs = array();
$expect = array();
$config = array();
// add dir
$inputs[0] = array();
$expect[0] = array('dir' => 'ltr');
// leave existing dir alone
$inputs[1] = array('dir' => 'rtl');
$expect[1] = array('dir' => 'rtl');
$config_rtl = HTMLPurifier_Config::createDefault();
$config_rtl->set('Attr', 'DefaultTextDir', 'rtl');
$inputs[2] = array();
$expect[2] = array('dir' => 'rtl');
$config[2] = $config_rtl;
$this->assertTransform($inputs, $expect, $config);
}
}
?>

View File

@ -0,0 +1,35 @@
<?php
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
class HTMLPurifier_AttrTransform_ImgRequiredTest extends HTMLPurifier_AttrTransformHarness
{
function test() {
$this->transform = new HTMLPurifier_AttrTransform_ImgRequired();
$inputs = $expect = $config = array();
$inputs[0] = array();
$expect[0] = array('src' => '', 'alt' => 'Invalid image');
$inputs[1] = array();
$expect[1] = array('src' => 'blank.png', 'alt' => 'Pawned!');
$config[1] = HTMLPurifier_Config::createDefault();
$config[1]->set('Attr', 'DefaultInvalidImage', 'blank.png');
$config[1]->set('Attr', 'DefaultInvalidImageAlt', 'Pawned!');
$inputs[2] = array('src' => '/path/to/foobar.png');
$expect[2] = array('src' => '/path/to/foobar.png', 'alt' => 'foobar.png');
$inputs[3] = array('alt' => 'intrigue');
$expect[3] = array('src' => '', 'alt' => 'intrigue');
$this->assertTransform($inputs, $expect, $config);
}
}
?>

View File

@ -5,14 +5,13 @@ class HTMLPurifier_AttrTransformHarness extends UnitTestCase
var $transform; var $transform;
function assertTransform($inputs, $expect) { function assertTransform($inputs, $expect, $config = array()) {
$default_config = HTMLPurifier_Config::createDefault();
foreach ($inputs as $i => $input) { foreach ($inputs as $i => $input) {
$result = $this->transform->transform($input); if (!isset($config[$i])) $config[$i] = $default_config;
if ($expect[$i] === true) { $result = $this->transform->transform($input, $config[$i]);
$this->assertEqual($input, $result, "Test $i: %s"); if ($expect[$i] === true) $expect[$i] = $input;
} else { $this->assertEqual($expect[$i], $result, "Test $i: %s");
$this->assertEqual($expect[$i], $result, "Test $i: %s");
}
} }
} }

View File

@ -12,6 +12,9 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
$strategy = new HTMLPurifier_Strategy_ValidateAttributes(); $strategy = new HTMLPurifier_Strategy_ValidateAttributes();
// attribute order is VERY fragile, perhaps we should define
// an ordering scheme!
$inputs = array(); $inputs = array();
$expect = array(); $expect = array();
$config = array(); $config = array();
@ -68,8 +71,9 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
$expect[12] = '<h1 style="text-align:center;">Centered Headline</h1>'; $expect[12] = '<h1 style="text-align:center;">Centered Headline</h1>';
// test table // test table
$inputs[13] = <<<HTML $inputs[13] =
<table frame="above" rules="rows" summary="A test table" border="2" cellpadding="5%" cellspacing="3" width="100%">
'<table frame="above" rules="rows" summary="A test table" border="2" cellpadding="5%" cellspacing="3" width="100%">
<col align="right" width="4*" /> <col align="right" width="4*" />
<col charoff="5" align="char" width="1*" /> <col charoff="5" align="char" width="1*" />
<tr valign="top"> <tr valign="top">
@ -83,8 +87,8 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
<tr> <tr>
<td colspan="2">Taken off the market</td> <td colspan="2">Taken off the market</td>
</tr> </tr>
</table> </table>';
HTML;
$expect[13] = $inputs[13]; $expect[13] = $inputs[13];
// test URI // test URI
@ -95,6 +99,27 @@ HTML;
$inputs[15] = '<a href="javascript:badstuff();">Google</a>'; $inputs[15] = '<a href="javascript:badstuff();">Google</a>';
$expect[15] = '<a>Google</a>'; $expect[15] = '<a>Google</a>';
// test required attributes for img
$inputs[16] = '<img />';
$expect[16] = '<img src="" alt="Invalid image" />';
$inputs[17] = '<img src="foobar.jpg" />';
$expect[17] = '<img src="foobar.jpg" alt="foobar.jpg" />';
$inputs[18] = '<img alt="pretty picture" />';
$expect[18] = '<img alt="pretty picture" src="" />';
// test required attributes for bdo
$inputs[19] = '<bdo>Go left.</bdo>';
$expect[19] = '<bdo dir="ltr">Go left.</bdo>';
$inputs[20] = '<bdo dir="blahblah">Invalid value!</bdo>';
$expect[20] = '<bdo dir="ltr">Invalid value!</bdo>';
// comparison check for test 20
$inputs[21] = '<span dir="blahblah">Invalid value!</span>';
$expect[21] = '<span>Invalid value!</span>';
$this->assertStrategyWorks($strategy, $inputs, $expect, $config); $this->assertStrategyWorks($strategy, $inputs, $expect, $config);
} }

View File

@ -74,6 +74,8 @@ $test_files[] = 'IDAccumulatorTest.php';
$test_files[] = 'TagTransformTest.php'; $test_files[] = 'TagTransformTest.php';
$test_files[] = 'AttrTransform/LangTest.php'; $test_files[] = 'AttrTransform/LangTest.php';
$test_files[] = 'AttrTransform/TextAlignTest.php'; $test_files[] = 'AttrTransform/TextAlignTest.php';
$test_files[] = 'AttrTransform/BdoDirTest.php';
$test_files[] = 'AttrTransform/ImgRequiredTest.php';
$test_files[] = 'URISchemeRegistryTest.php'; $test_files[] = 'URISchemeRegistryTest.php';
$test_files[] = 'URISchemeTest.php'; $test_files[] = 'URISchemeTest.php';