0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 08:21:52 +00:00

Forms implementation for %HTML.Trusted. Some backend changes:

* Added Charsets and Character attribute types
* Fix a heavily recursive form of ContentSets, this allows a content-set
  to include another content-set which includes another content-set, and
  so forth.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-08-15 18:57:44 -04:00
parent dc28346677
commit c9b6f125aa
17 changed files with 475 additions and 37 deletions

1
NEWS
View File

@ -30,6 +30,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
Users who are not performing inbound filtering, this may seem a little
useless, but as a bonus, the test suite and handling of edge cases is also
improved.
! Experimental implementation of forms for %HTML.Trusted
- Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs,
the other involving an undefined $is_folder error.
- Throw error when %Core.Encoding is set to a spurious value. Previously,

4
TODO
View File

@ -22,7 +22,6 @@ FUTURE VERSIONS
3.2 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
- Forms are especially wanted
# Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs)
# Frameset XHTML 1.0 and HTML 4.01 doctypes
@ -44,7 +43,6 @@ FUTURE VERSIONS
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
- Remove <span> tags that don't do anything (no attributes)
- Remove empty inline tags<i></i>
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
@ -60,8 +58,6 @@ FUTURE VERSIONS
Also, enable disabling of directionality
5.0 release [To XML and Beyond]
- AllowedAttributes and ForbiddenAttributes step on the toes of XML by
using periods; this needs to be changed.
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
- Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation

View File

@ -151,7 +151,7 @@
</directive>
<directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>199</line>
<line>202</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>233</line>
@ -168,27 +168,27 @@
</directive>
<directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>206</line>
<line>209</line>
</file>
</directive>
<directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>207</line>
<line>210</line>
</file>
</directive>
<directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>221</line>
<line>224</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>226</line>
<line>229</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line>
<line>232</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">

View File

@ -115,6 +115,7 @@ require 'HTMLPurifier/AttrTransform/Border.php';
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
require 'HTMLPurifier/AttrTransform/Input.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
@ -122,6 +123,7 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
@ -137,6 +139,7 @@ require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require 'HTMLPurifier/HTMLModule/Bdo.php';
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';

View File

@ -109,6 +109,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
@ -116,6 +117,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
@ -131,6 +133,7 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';

View File

@ -0,0 +1,39 @@
<?php
/**
* Performs miscellaneous cross attribute validation and filtering for
* input elements. This is meant to be a post-transform.
*/
class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
protected $pixels;
public function __construct() {
$this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
}
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) $t = 'text';
else $t = strtolower($attr['type']);
if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
unset($attr['checked']);
}
if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
unset($attr['maxlength']);
}
if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
$result = $this->pixels->validate($attr['size'], $config, $context);
if ($result === false) unset($attr['size']);
else $attr['size'] = $result;
}
if (isset($attr['src']) && $t !== 'image') {
unset($attr['src']);
}
if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
$attr['value'] = '';
}
return $attr;
}
}

View File

@ -0,0 +1,16 @@
<?php
/**
* Sets height/width defaults for <textarea>
*/
class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Calculated from Firefox
if (!isset($attr['cols'])) $attr['cols'] = '22';
if (!isset($attr['rows'])) $attr['rows'] = '3';
return $attr;
}
}

View File

@ -32,6 +32,9 @@ class HTMLPurifier_AttrTypes
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
$this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items

View File

@ -5,8 +5,6 @@
*
* @warning Currently this class is an all or nothing proposition, that is,
* it will only give a bool return value.
* @note This class is currently not used by any code, although it is unit
* tested.
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{

View File

@ -37,33 +37,35 @@ class HTMLPurifier_ContentSets
// sorry, no way of overloading
foreach ($modules as $module_i => $module) {
foreach ($module->content_sets as $key => $value) {
if (isset($this->info[$key])) {
$temp = $this->convertToLookup($value);
if (isset($this->lookup[$key])) {
// add it into the existing content set
$this->info[$key] = $this->info[$key] . ' | ' . $value;
$this->lookup[$key] = array_merge($this->lookup[$key], $temp);
} else {
$this->info[$key] = $value;
$this->lookup[$key] = $temp;
}
}
}
// perform content_set expansions
$this->keys = array_keys($this->info);
foreach ($this->info as $i => $set) {
// only performed once, so infinite recursion is not
// a problem
$this->info[$i] =
str_replace(
$this->keys,
// must be recalculated each time due to
// changing substitutions
array_values($this->info),
$set);
$old_lookup = false;
while ($old_lookup !== $this->lookup) {
$old_lookup = $this->lookup;
foreach ($this->lookup as $i => $set) {
$add = array();
foreach ($set as $element => $x) {
if (isset($this->lookup[$element])) {
$add += $this->lookup[$element];
unset($this->lookup[$i][$element]);
}
}
$this->lookup[$i] += $add;
}
}
$this->values = array_values($this->info);
// generate lookup tables
foreach ($this->info as $name => $set) {
$this->lookup[$name] = $this->convertToLookup($set);
foreach ($this->lookup as $key => $lookup) {
$this->info[$key] = implode(' | ', array_keys($lookup));
}
$this->keys = array_keys($this->info);
$this->values = array_values($this->info);
}
/**
@ -75,12 +77,22 @@ class HTMLPurifier_ContentSets
if (!empty($def->child)) return; // already done!
$content_model = $def->content_model;
if (is_string($content_model)) {
$def->content_model = str_replace(
$this->keys, $this->values, $content_model);
// Assume that $this->keys is alphanumeric
$def->content_model = preg_replace_callback(
'/\b(' . implode('|', $this->keys) . ')\b/',
array($this, 'generateChildDefCallback'),
$content_model
);
//$def->content_model = str_replace(
// $this->keys, $this->values, $content_model);
}
$def->child = $this->getChildDef($def, $module);
}
public function generateChildDefCallback($matches) {
return $this->info[$matches[0]];
}
/**
* Instantiates a ChildDef based on content_model and content_model_type
* member variables in HTMLPurifier_ElementDef

View File

@ -0,0 +1,117 @@
<?php
/**
* XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
*/
class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
{
public $name = 'Forms';
public $safe = false;
public $content_sets = array(
'Block' => 'Form',
'Inline' => 'Formctrl',
);
public function setup($config) {
$form = $this->addElement('form', 'Form',
'Required: Heading | List | Block | fieldset', 'Common', array(
'accept' => 'ContentTypes',
'accept-charset' => 'Charsets',
'action*' => 'URI',
'method' => 'Enum#get,post',
// really ContentType, but these two are the only ones used today
'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
));
$form->excludes = array('form' => true);
$input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array(
'accept' => 'ContentTypes',
'accesskey' => 'Character',
'alt' => 'Text',
'checked' => 'Bool#checked',
'disabled' => 'Bool#disabled',
'maxlength' => 'Number',
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'size' => 'Number',
'src' => 'URI#embeds',
'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA',
));
$input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
$this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array(
'disabled' => 'Bool#disabled',
'multiple' => 'Bool#multiple',
'name' => 'CDATA',
'size' => 'Number',
'tabindex' => 'Number',
));
$this->addElement('option', false, 'Optional: #PCDATA', 'Common', array(
'disabled' => 'Bool#disabled',
'label' => 'Text',
'selected' => 'Bool#selected',
'value' => 'CDATA',
));
// It's illegal for there to be more than one selected, but not
// be multiple. Also, no selected means undefined behavior. This might
// be difficult to implement; perhaps an injector, or a context variable.
$textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array(
'accesskey' => 'Character',
'cols*' => 'Number',
'disabled' => 'Bool#disabled',
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'rows*' => 'Number',
'tabindex' => 'Number',
));
$textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
$button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array(
'accesskey' => 'Character',
'disabled' => 'Bool#disabled',
'name' => 'CDATA',
'tabindex' => 'Number',
'type' => 'Enum#button,submit,reset',
'value' => 'CDATA',
));
// For exclusions, ideally we'd specify content sets, not literal elements
$button->excludes = $this->makeLookup(
'form', 'fieldset', // Form
'input', 'select', 'textarea', 'label', 'button', // Formctrl
'a' // as per HTML 4.01 spec, this is omitted by modularization
);
// Extra exclusion: img usemap="" is not permitted within this element.
// We'll omit this for now, since we don't have any good way of
// indicating it yet.
// This is HIGHLY user-unfriendly; we need a custom child-def for this
$this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
$label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array(
'accesskey' => 'Character',
// 'for' => 'IDREF', // IDREF not implemented, cannot allow
));
$label->excludes = array('label' => true);
$this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array(
'accesskey' => 'Character',
));
$this->addElement('optgroup', false, 'Required: option', 'Common', array(
'disabled' => 'Bool#disabled',
'label*' => 'Text',
));
// Don't forget an injector for <isindex>. This one's a little complex
// because it maps to multiple elements.
}
}

View File

@ -63,8 +63,11 @@ class HTMLPurifier_HTMLModuleManager
$common = array(
'CommonAttributes', 'Text', 'Hypertext', 'List',
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
'StyleAttribute', 'Scripting', 'Object',
'Name' // technically legacy, but present in all the specs
'StyleAttribute',
// Unsafe:
'Scripting', 'Object', 'Forms',
// Sorta legacy, but present in strict:
'Name',
);
$transitional = array('Legacy', 'Target');
$xml = array('XMLCommonAttributes');

View File

@ -0,0 +1,93 @@
<?php
class HTMLPurifier_AttrTransform_InputTest extends HTMLPurifier_AttrTransformHarness
{
function setUp() {
parent::setUp();
$this->obj = new HTMLPurifier_AttrTransform_Input();
}
function testEmptyInput() {
$this->assertResult(array());
}
function testInvalidCheckedWithEmpty() {
$this->assertResult(array('checked' => 'checked'), array());
}
function testInvalidCheckedWithPassword() {
$this->assertResult(array(
'checked' => 'checked',
'type' => 'password'
), array(
'type' => 'password'
));
}
function testValidCheckedWithUcCheckbox() {
$this->assertResult(array(
'checked' => 'checked',
'type' => 'CHECKBOX',
'value' => 'bar',
));
}
function testInvalidMaxlength() {
$this->assertResult(array(
'maxlength' => '10',
'type' => 'checkbox',
'value' => 'foo',
), array(
'type' => 'checkbox',
'value' => 'foo',
));
}
function testValidMaxLength() {
$this->assertResult(array(
'maxlength' => '10',
));
}
// these two are really bad test-cases
function testSizeWithCheckbox() {
$this->assertResult(array(
'type' => 'checkbox',
'value' => 'foo',
'size' => '100px',
), array(
'type' => 'checkbox',
'value' => 'foo',
'size' => '100',
));
}
function testSizeWithText() {
$this->assertResult(array(
'type' => 'password',
'size' => '100px', // spurious value, to indicate no validation takes place
), array(
'type' => 'password',
'size' => '100px',
));
}
function testInvalidSrc() {
$this->assertResult(array(
'src' => 'img.png',
), array());
}
function testMissingValue() {
$this->assertResult(array(
'type' => 'checkbox',
), array(
'type' => 'checkbox',
'value' => '',
));
}
}

View File

@ -69,5 +69,20 @@ class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
}
function testPcdata() {
$this->obj = new HTMLPurifier_ChildDef_Custom('#PCDATA,a');
$this->assertEqual($this->obj->elements, array('#PCDATA' => true, 'a' => true));
$this->assertResult('foo<a />');
$this->assertResult('<a />', false);
}
function testWhitespace() {
$this->obj = new HTMLPurifier_ChildDef_Custom('a');
$this->assertEqual($this->obj->elements, array('a' => true));
$this->assertResult('foo<a />', false);
$this->assertResult('<a />');
$this->assertResult(' <a />');
}
}

View File

@ -67,8 +67,6 @@ class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
'Out <b>Bold text</b><img />',
'Out <b>Bold text</b>&lt;img /&gt;'
);
}
}

View File

@ -80,6 +80,10 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
}
$this->assertIdentical($expect, $result);
if ($expect !== $result) {
echo '<pre>' . htmlspecialchars($result) . '</pre>';
}
}
/**

View File

@ -0,0 +1,137 @@
<?php
class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
{
function setUp() {
parent::setUp();
$this->config->set('HTML', 'Trusted', true);
$this->config->set('Attr', 'EnableID', true);
$this->config->set('Cache', 'DefinitionImpl', null);
}
function testBasicUse() {
$this->assertResult( // need support for label for later
'
<form action="http://somesite.com/prog/adduser" method="post">
<p>
<label>First name: </label>
<input type="text" id="firstname" /><br />
<label>Last name: </label>
<input type="text" id="lastname" /><br />
<label>email: </label>
<input type="text" id="email" /><br />
<input type="radio" name="sex" value="Male" /> Male<br />
<input type="radio" name="sex" value="Female" /> Female<br />
<input type="submit" value="Send" /> <input type="reset" />
</p>
</form>'
);
}
function testSelectOption() {
$this->assertResult('
<form action="http://somesite.com/prog/component-select" method="post">
<p>
<select multiple="multiple" size="4" name="component-select">
<option selected="selected" value="Component_1_a">Component_1</option>
<option selected="selected" value="Component_1_b">Component_2</option>
<option>Component_3</option>
<option>Component_4</option>
<option>Component_5</option>
<option>Component_6</option>
<option>Component_7</option>
</select>
<input type="submit" value="Send" /><input type="reset" />
</p>
</form>
');
}
function testSelectOptgroup() {
$this->assertResult('
<form action="http://somesite.com/prog/someprog" method="post">
<p>
<select name="ComOS">
<option selected="selected" label="none" value="none">None</option>
<optgroup label="PortMaster 3">
<option label="3.7.1" value="pm3_3.7.1">PortMaster 3 with ComOS 3.7.1</option>
<option label="3.7" value="pm3_3.7">PortMaster 3 with ComOS 3.7</option>
<option label="3.5" value="pm3_3.5">PortMaster 3 with ComOS 3.5</option>
</optgroup>
<optgroup label="PortMaster 2">
<option label="3.7" value="pm2_3.7">PortMaster 2 with ComOS 3.7</option>
<option label="3.5" value="pm2_3.5">PortMaster 2 with ComOS 3.5</option>
</optgroup>
<optgroup label="IRX">
<option label="3.7R" value="IRX_3.7R">IRX with ComOS 3.7R</option>
<option label="3.5R" value="IRX_3.5R">IRX with ComOS 3.5R</option>
</optgroup>
</select>
</p>
</form>
');
}
function testTextarea() {
$this->assertResult('
<form action="http://somesite.com/prog/text-read" method="post">
<p>
<textarea name="thetext" rows="20" cols="80">
First line of initial text.
Second line of initial text.
</textarea>
<input type="submit" value="Send" /><input type="reset" />
</p>
</form>
');
}
// label tests omitted
function testFieldset() {
$this->assertResult('
<form action="..." method="post">
<fieldset>
<legend>Personal Information</legend>
Last Name: <input name="personal_lastname" type="text" tabindex="1" />
First Name: <input name="personal_firstname" type="text" tabindex="2" />
Address: <input name="personal_address" type="text" tabindex="3" />
...more personal information...
</fieldset>
<fieldset>
<legend>Medical History</legend>
<input name="history_illness" type="checkbox" value="Smallpox" tabindex="20" />Smallpox
<input name="history_illness" type="checkbox" value="Mumps" tabindex="21" /> Mumps
<input name="history_illness" type="checkbox" value="Dizziness" tabindex="22" /> Dizziness
<input name="history_illness" type="checkbox" value="Sneezing" tabindex="23" /> Sneezing
...more medical history...
</fieldset>
<fieldset>
<legend>Current Medication</legend>
Are you currently taking any medication?
<input name="medication_now" type="radio" value="Yes" tabindex="35" />Yes
<input name="medication_now" type="radio" value="No" tabindex="35" />No
If you are currently taking medication, please indicate
it in the space below:
<textarea name="current_medication" rows="20" cols="50" tabindex="40"></textarea>
</fieldset>
</form>
');
}
function testInputTransform() {
$this->assertResult('<input type="checkbox" />', '<input type="checkbox" value="" />');
}
function testTextareaTransform() {
$this->assertResult('<textarea></textarea>', '<textarea cols="22" rows="3"></textarea>');
}
function testTextInFieldset() {
$this->assertResult('<fieldset> <legend></legend>foo</fieldset>');
}
}