0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

[1.7.0] Implement HTML.Allowed, a TinyMCE style whitelist format.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1119 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-05-29 21:26:43 +00:00
parent e2a951420f
commit 12f73605a3
6 changed files with 132 additions and 7 deletions

2
NEWS
View File

@ -30,6 +30,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
format: key1:value1,key2:value2
! ConfigDoc now factored into OOP design
! All deprecated elements now natively supported
! Implement TinyMCE styled whitelist specification format in
%HTML.Allowed
- Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work!
. Unit test for ElementDef created, ElementDef behavior modified to
be more flexible

11
TODO
View File

@ -10,10 +10,7 @@ TODO List
1.7 release [Advanced API]
# Complete advanced API, and fully document it
- Add framework for unsafe attributes
- Reorganize configuration directives
- Set up anonymous module management by HTMLDefinition (Advanced API)
- Get all AttrTypes into string form
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
1.8 release [Refactor, refactor!]
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
@ -48,6 +45,9 @@ TODO List
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
1.11 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
2.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every
AttrDef class)
@ -81,8 +81,6 @@ Ongoing
- eFiction
- more! (look for ones that use WYSIWYGs)
- Complete basic smoketests
- Reorganize Unit Tests
- Refactor loop tests (esp. AttrDef_URI)
Unknown release (on a scratch-an-itch basis)
? Semi-lossy dumb alternate character encoding transfor
@ -91,6 +89,9 @@ Unknown release (on a scratch-an-itch basis)
- Explain how to use HTML Purifier in non-PHP languages
- Abstract ChildDef_BlockQuote to work with all elements that only
allow blocks in them, required or optional
- Reorganize Unit Tests
- Refactor loop tests (esp. AttrDef_URI)
- Reorganize configuration directives (Create more namespaces! Get messy!)
Requested
? Native content compression, whitespace stripping (don't rely on Tidy, make

View File

@ -12,6 +12,7 @@ TODO:
- allow generation of packaged docs that can be easily moved
- multipage documentation
- determine how to multilingualize
- add blurbs to ToC
*/
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');

View File

@ -65,6 +65,25 @@ HTMLPurifier_ConfigSchema::define(
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'Allowed', null, 'string/null', '
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>.
</p>
<p>
<strong>Warning</strong>:
All of the constraints on the component directives are still enforced.
The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
whitelist: directly copy-pasting it here will probably result in
broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
are set, this directive has no effect.
This directive has been available since 1.7.0.
</p>
');
/**
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.
@ -233,8 +252,18 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$support = "(for information on implementing this, see the ".
"support forums) ";
// setup allowed elements, SubtractiveWhitelist module(?)
// setup allowed elements
$allowed_elements = $config->get('HTML', 'AllowedElements');
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
$allowed = $config->get('HTML', 'Allowed');
if (is_string($allowed)) {
list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
}
}
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
@ -247,7 +276,6 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
}
}
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
@ -289,6 +317,41 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
}
/**
* Parses a TinyMCE-flavored Allowed Elements and Attributes list into
* separate lists for processing. Format is element[attr1|attr2],element2...
* @warning Although it's largely drawn from TinyMCE's implementation,
* it is different, and you'll probably have to modify your lists
* @param $list String list to parse
* @param array($allowed_elements, $allowed_attributes)
*/
function parseTinyMCEAllowedList($list) {
$elements = array();
$attributes = array();
$chunks = explode(',', $list);
foreach ($chunks as $chunk) {
// remove TinyMCE element control characters
if (!strpos($chunk, '[')) {
$element = $chunk;
$attr = false;
} else {
list($element, $attr) = explode('[', $chunk);
}
if ($element !== '*') $elements[$element] = true;
if (!$attr) continue;
$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
$attr = explode('|', $attr);
foreach ($attr as $key) {
$attributes["$element.$key"] = true;
}
}
return array($elements, $attributes);
}
}

View File

@ -0,0 +1,57 @@
<?php
require_once 'HTMLPurifier/HTMLDefinition.php';
class HTMLPurifier_HTMLDefinitionTest extends UnitTestCase
{
function test_parseTinyMCEAllowedList() {
$def = new HTMLPurifier_HTMLDefinition();
$this->assertEqual(
$def->parseTinyMCEAllowedList('a,b,c'),
array(array('a' => true, 'b' => true, 'c' => true), array())
);
$this->assertEqual(
$def->parseTinyMCEAllowedList('a[x|y|z]'),
array(array('a' => true), array('a.x' => true, 'a.y' => true, 'a.z' => true))
);
$this->assertEqual(
$def->parseTinyMCEAllowedList('*[id]'),
array(array(), array('*.id' => true))
);
$this->assertEqual(
$def->parseTinyMCEAllowedList('a[*]'),
array(array('a' => true), array('a.*' => true))
);
$this->assertEqual(
$def->parseTinyMCEAllowedList('span[style],strong,a[href|title]'),
array(array('span' => true, 'strong' => true, 'a' => true),
array('span.style' => true, 'a.href' => true, 'a.title' => true))
);
}
function test_Allowed() {
$config1 = HTMLPurifier_Config::create(array(
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
'HTML.AllowedAttributes' => array('a.href', '*.id')
));
$config2 = HTMLPurifier_Config::create(array(
'HTML.Allowed' => 'b,i,p,a[href],*[id]'
));
$this->assertEqual($config1->getHTMLDefinition(), $config2->getHTMLDefinition());
}
}
?>

View File

@ -74,6 +74,7 @@ $test_files[] = 'HTMLPurifier/EncoderTest.php';
$test_files[] = 'HTMLPurifier/EntityLookupTest.php';
$test_files[] = 'HTMLPurifier/EntityParserTest.php';
$test_files[] = 'HTMLPurifier/GeneratorTest.php';
$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';