0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 23:28:42 +00:00

Move out SetParent and TweakSubtractiveWhitelist. Move out some other configurations, disable ID references.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@743 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-14 02:54:41 +00:00
parent 67befbc8a8
commit bae5b0c022
6 changed files with 146 additions and 124 deletions

View File

@ -3,6 +3,22 @@
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/IDAccumulator.php';
HTMLPurifier_ConfigSchema::define(
'Attr', 'EnableID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '.
'due to the fact that without proper configuration user input can '.
'easily break the validation of a webpage by specifying an ID that is '.
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
'the wind, enable this directive, but I strongly recommend you also '.
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
'versions.'
);
HTMLPurifier_ConfigSchema::defineAlias(
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDPrefix', '', 'string',
'String to prefix to IDs. If you have no idea what IDs your pages '.
@ -39,22 +55,13 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
{
/**
* Is the ID an actual ID, or a reference to one?
* @note IDAccumulator checking is disabled for references
* @bool
*/
var $ref = false;
/**
* @param $ref bool indication if it's ID or IDREF
*/
function HTMLPurifier_AttrDef_ID($ref = false) {
$this->ref = $ref;
}
// ref functionality disabled, since we also have to verify
// whether or not the ID it refers to exists
function validate($id, $config, &$context) {
if (!$config->get('Attr', 'EnableID')) return false;
$id = trim($id); // trim it first
if ($id === '') return false;
@ -69,10 +76,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
'%Attr.IDPrefix is set', E_USER_WARNING);
}
if (!$this->ref) {
//if (!$this->ref) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false;
}
//}
// we purposely avoid using regex, hopefully this is faster
@ -87,7 +94,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
$result = ($trim === '');
}
if (!$this->ref && $result) $id_accumulator->add($id);
if (/*!$this->ref && */$result) $id_accumulator->add($id);
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it

View File

@ -38,22 +38,14 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
// config modules
require_once 'HTMLPurifier/HTMLModule/SetParent.php';
// tweak modules
require_once 'HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php';
// this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces
HTMLPurifier_ConfigSchema::define(
'HTML', 'EnableAttrID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '.
'due to the fact that without proper configuration user input can '.
'easily break the validation of a webpage by specifying an ID that is '.
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
'the wind, enable this directive, but I strongly recommend you also '.
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
'versions.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool',
@ -72,39 +64,6 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.'
);
/**
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.
@ -246,11 +205,19 @@ class HTMLPurifier_HTMLDefinition
* @public
*/
var $order_keywords = array(
'setup' => 10,
'early' => 20,
'main' => 30,
'late' => 40,
'cleanup' => 50,
'begin' => 10,
'setup' => 20,
'pre' => 30,
'early' => 40,
'main' => 50,
'late' => 60,
'post' => 70,
'cleanup' => 80,
'end' => 90
);
/**
@ -277,23 +244,16 @@ class HTMLPurifier_HTMLDefinition
// modules
// early
// main
$main_modules = array('Text', 'Hypertext', 'List', 'Presentation',
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute');
foreach ($main_modules as $module) $this->addModule($module, 'main');
// late
if (!$this->strict) $this->addModule('Legacy', 'late');
// cleanup
$this->addModule('TransformToStrict', 'cleanup');
$this->addModule('SetParent', 'post');
// remove ID module (refactor to module)
if (!$config->get('HTML', 'EnableAttrID')) {
$this->attr_collections->info['Core']['id'] = false;
}
$this->addModule('TransformToStrict', 'cleanup');
$this->addModule('TweakSubtractiveWhitelist', 'cleanup');
}
@ -341,8 +301,6 @@ class HTMLPurifier_HTMLDefinition
$this->processModules();
$this->setupAttrTransform();
$this->setupBlockWrapper();
$this->setupParent();
$this->setupCompat();
unset($this->config);
@ -453,50 +411,6 @@ class HTMLPurifier_HTMLDefinition
}
}
/**
* Sets up parent of fragment based on config
*/
function setupParent() {
$parent = $this->config->get('HTML', 'Parent');
if (isset($this->info[$parent])) {
$this->info_parent = $parent;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
}
$this->info_parent_def = $this->info[$this->info_parent];
}
/**
* Sets up compat code from HTMLDefinition that has not been
* delegated to modules yet
*/
function setupCompat() {
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
}
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info_global_attr[$attr_key]);
}
}
foreach ($this->info as $tag => $info) {
foreach ($info->attr as $attr => $attr_info) {
if (!isset($allowed_attributes["$tag.$attr"])) {
unset($this->info[$tag]->attr[$attr]);
}
}
}
}
}
}

View File

@ -0,0 +1,31 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
/**
* Proprietary module that sets up the parent definitions.
*/
class HTMLPurifier_HTMLModule_SetParent extends HTMLPurifier_HTMLModule
{
function postProcess(&$definition) {
$parent = $definition->config->get('HTML', 'Parent');
if (isset($definition->info[$parent])) {
$definition->info_parent = $parent;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
}
$definition->info_parent_def = $definition->info[$definition->info_parent];
}
}
?>

View File

@ -0,0 +1,68 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.'
);
/**
* Proprietary module that further narrows down allowed elements and
* attributes that were allowed to a user-defined whitelist.
* @warning This module cannot ADD elements or attributes, you must
* implement full definitions yourself!
*/
class HTMLPurifier_HTMLModule_TweakSubtractiveWhitelist extends HTMLPurifier_HTMLModule
{
function postProcess(&$definition) {
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $definition->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($definition->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($definition->info[$name]);
}
}
$allowed_attributes = $definition->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($definition->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($definition->info_global_attr[$attr_key]);
}
}
foreach ($definition->info as $tag => $info) {
foreach ($info->attr as $attr => $attr_info) {
if (!isset($allowed_attributes["$tag.$attr"]) &&
!isset($allowed_attributes["*.$attr"])) {
unset($definition->info[$tag]->attr[$attr]);
}
}
}
}
}
}
?>

View File

@ -12,6 +12,7 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness
$id_accumulator = new HTMLPurifier_IDAccumulator();
$this->context->register('IDAccumulator', $id_accumulator);
$this->config->set('Attr', 'EnableID', true);
$this->def = new HTMLPurifier_AttrDef_ID();
}
@ -74,7 +75,8 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness
}
function testIDReference() {
// reference functionality is disabled for now
function disabled_testIDReference() {
$this->def = new HTMLPurifier_AttrDef_ID(true);