0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-05 14:11:52 +00:00

Add an HTMLModuleManager.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@751 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-15 14:00:18 +00:00
parent c09432e171
commit edf20018f0
8 changed files with 489 additions and 348 deletions

View File

@ -36,8 +36,10 @@ class HTMLPurifier_ContentSets
* sets and populates the keys, values and lookup member variables. * sets and populates the keys, values and lookup member variables.
* @param $modules List of HTMLPurifier_HTMLModule * @param $modules List of HTMLPurifier_HTMLModule
*/ */
function setup($modules) { function HTMLPurifier_ContentSets($modules) {
if (!is_array($modules)) $modules = array($modules);
// populate content_sets based on module hints // populate content_sets based on module hints
// sorry, no way of overloading
foreach ($modules as $module_i => $module) { foreach ($modules as $module_i => $module) {
foreach ($module->content_sets as $key => $value) { foreach ($module->content_sets as $key => $value) {
if (isset($this->info[$key])) { if (isset($this->info[$key])) {
@ -48,7 +50,6 @@ class HTMLPurifier_ContentSets
} }
} }
} }
// perform content_set expansions // perform content_set expansions
$this->keys = array_keys($this->info); $this->keys = array_keys($this->info);
foreach ($this->info as $i => $set) { foreach ($this->info as $i => $set) {

View File

@ -3,7 +3,7 @@
// components // components
require_once 'HTMLPurifier/AttrTypes.php'; require_once 'HTMLPurifier/AttrTypes.php';
require_once 'HTMLPurifier/AttrCollections.php'; require_once 'HTMLPurifier/AttrCollections.php';
require_once 'HTMLPurifier/ContentSets.php'; require_once 'HTMLPurifier/HTMLModuleManager.php';
require_once 'HTMLPurifier/ElementDef.php'; require_once 'HTMLPurifier/ElementDef.php';
require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/AttrDef.php';
@ -22,31 +22,13 @@ require_once 'HTMLPurifier/TagTransform/Simple.php';
require_once 'HTMLPurifier/TagTransform/Center.php'; require_once 'HTMLPurifier/TagTransform/Center.php';
require_once 'HTMLPurifier/TagTransform/Font.php'; require_once 'HTMLPurifier/TagTransform/Font.php';
// default modules
require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/HTMLModule/Text.php';
require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
require_once 'HTMLPurifier/HTMLModule/List.php';
require_once 'HTMLPurifier/HTMLModule/Presentation.php';
require_once 'HTMLPurifier/HTMLModule/Edit.php';
require_once 'HTMLPurifier/HTMLModule/Bdo.php';
require_once 'HTMLPurifier/HTMLModule/Tables.php';
require_once 'HTMLPurifier/HTMLModule/Image.php';
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
// compat modules
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
// config modules
require_once 'HTMLPurifier/HTMLModule/SetParent.php';
// tweak modules // tweak modules
require_once 'HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php'; require_once 'HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php';
// this definition and its modules MUST NOT define configuration directives // this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces // outside of the HTML or Attr namespaces
// will be superceded by more accurate doctype declaration schemes
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool', 'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets. '. 'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
@ -64,6 +46,39 @@ HTMLPurifier_ConfigSchema::define(
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.'
);
/** /**
* Definition of the purified HTML that describes allowed children, * Definition of the purified HTML that describes allowed children,
* attributes, and many other things. * attributes, and many other things.
@ -150,147 +165,24 @@ class HTMLPurifier_HTMLDefinition
/** PUBLIC BUT INTERNAL VARIABLES */ /** PUBLIC BUT INTERNAL VARIABLES */
/** var $setup = false; /**< Has setup() been called yet? */
* Boolean is a strict definition? var $config; /**< Temporary instance of HTMLPurifier_Config */
* @public
*/
var $strict;
/**
* Array of HTMLPurifier_Module instances, indexed by module's class name
* @public
*/
var $modules = array();
/**
* Associative array of module class name to module order keywords or
* numbers (keyword is preferred, all keywords are resolved at beginning
* of setup())
* @public
*/
var $modules_order = array();
/**
* List of prefixes HTML Purifier should try to resolve short names to.
* @public
*/
var $module_prefixes = array('HTMLPurifier_HTMLModule_');
/**
* Instance of HTMLPurifier_AttrTypes
* @public
*/
var $attr_types;
/**
* Instance of HTMLPurifier_AttrCollections
* @public
*/
var $attr_collections;
/**
* Has setup() been called yet?
* @public
*/
var $setup = false;
/**
* Instance of HTMLPurifier_ContentSets
* @public
*/
var $content_sets;
/**
* Lookup table of module order "names" and an integer index
* @public
*/
var $order_keywords = array(
'begin' => 10,
'setup' => 20,
'pre' => 30,
'early' => 40,
'main' => 50,
'late' => 60,
'post' => 70,
'cleanup' => 80,
'end' => 90
);
/**
* Temporary instance of HTMLPurifier_Config for convenience reasons,
* is removed after setup().
* @public
*/
var $config;
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
/** /**
* Performs low-cost, preliminary initialization. * Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function HTMLPurifier_HTMLDefinition(&$config) { function HTMLPurifier_HTMLDefinition(&$config) {
$this->config =& $config; $this->config =& $config;
$this->manager = new HTMLPurifier_HTMLModuleManager();
// set up public internals
$this->strict = $config->get('HTML', 'Strict');
$this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections();
$this->content_sets = new HTMLPurifier_ContentSets();
// modules
$main_modules = array('Text', 'Hypertext', 'List', 'Presentation',
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute');
foreach ($main_modules as $module) $this->addModule($module, 'main');
if (!$this->strict) $this->addModule('Legacy', 'late');
$this->addModule('SetParent', 'post');
$this->addModule('TransformToStrict', 'cleanup');
$this->addModule('TweakSubtractiveWhitelist', 'cleanup');
}
/**
* Adds a module to the ordered list.
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
*/
function addModule($module, $order = 'main') {
if (is_string($module)) {
$original_module = $module;
if (!class_exists($module)) {
foreach ($this->module_prefixes as $prefix) {
$module = $prefix . $original_module;
if (class_exists($module)) break;
}
}
if (!class_exists($module)) {
trigger_error($original_module . ' module does not exist', E_USER_ERROR);
return;
}
$module = new $module($this);
}
if (!isset($this->order_keywords[$order])) {
trigger_error('Order keyword does not exist', E_USER_ERROR);
return;
}
$this->modules[$module->name] = $module;
$this->modules_order[$module->name] = $order;
} }
/** /**
* Processes internals into form usable by HTMLPurifier internals. * Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not * Modifying the definition after calling this function should not
* be done. * be done.
* @param $config Instance of HTMLPurifier_Config
*/ */
function setup() { function setup() {
@ -298,109 +190,37 @@ class HTMLPurifier_HTMLDefinition
if ($this->setup) {return;} else {$this->setup = true;} if ($this->setup) {return;} else {$this->setup = true;}
$this->processModules(); $this->processModules();
$this->setupAttrTransform(); $this->setupConfigStuff();
$this->setupBlockWrapper();
unset($this->config); unset($this->config);
unset($this->manager);
} }
/** /**
* Processes the modules, setting up related info variables * Extract out the information from the manager
*/ */
function processModules() { function processModules() {
// substitute out the order keywords $this->manager->setup($this->config);
foreach ($this->modules_order as $name => $order) { $modules = $this->manager->getModules($this->config);
if (empty($this->modules[$name])) {
trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR);
return;
}
if (is_int($order)) continue;
if (empty($this->order_keywords[$order])) {
trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR);
return;
}
$this->modules_order[$name] = $this->order_keywords[$order];
}
// sort modules member variable foreach ($modules as $module) {
array_multisort(
$this->modules_order, SORT_ASC, SORT_NUMERIC,
$this->modules
);
// setup the global registries
$this->attr_collections->setup($this->attr_types, $this->modules);
$this->content_sets->setup($this->modules);
$this->info_content_sets = $this->content_sets->lookup;
// process the modules
foreach ($this->modules as $module_i => $module) {
$module->preProcess($this);
// process element-wise definitions
foreach ($module->info as $name => $def) {
// setup info
if (!isset($this->info[$name])) {
if ($def->standalone) {
$this->info[$name] = $this->modules[$module_i]->info[$name];
} else {
// attempting to merge into an element that doesn't
// exist, ignore it
continue;
}
} else {
$this->info[$name]->mergeIn($this->modules[$module_i]->info[$name]);
}
// process info
$def = $this->info[$name];
// attribute value expansions
$this->attr_collections->performInclusions($def->attr);
$this->attr_collections->expandIdentifiers(
$def->attr, $this->attr_types);
// descendants_are_inline, for ChildDef_Chameleon
if (is_string($def->content_model) &&
strpos($def->content_model, 'Inline') !== false) {
if ($name != 'del' && $name != 'ins') {
// this is for you, ins/del
$def->descendants_are_inline = true;
}
}
// set child def from content model
$this->content_sets->generateChildDef($def, $module);
$this->info[$name] = $def;
}
// merge in global info variables from module
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
$module->postProcess($this);
} }
$this->info = $this->manager->getElements($this->config);
$this->info_content_sets = $this->manager->contentSets->lookup;
} }
/** /**
* Sets up attribute transformations * Sets up stuff based on config. We need a better way of doing this.
*/ */
function setupAttrTransform() { function setupConfigStuff() {
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
}
/**
* Sets up block wrapper based on config
*/
function setupBlockWrapper() {
$block_wrapper = $this->config->get('HTML', 'BlockWrapper'); $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) { if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper; $this->info_block_wrapper = $block_wrapper;
@ -408,6 +228,43 @@ class HTMLPurifier_HTMLDefinition
trigger_error('Cannot use non-block element as block wrapper.', trigger_error('Cannot use non-block element as block wrapper.',
E_USER_ERROR); E_USER_ERROR);
} }
$parent = $this->config->get('HTML', 'Parent');
$def = $this->manager->getElement($parent, $this->config);
if ($def) {
$this->info_parent = $parent;
$this->info_parent_def = $def;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
$this->info_parent_def = $this->manager->getElement(
$this->info_parent, $this->config);
}
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
}
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info_global_attr[$attr_key]);
}
}
foreach ($this->info as $tag => $info) {
foreach ($info->attr as $attr => $attr_info) {
if (!isset($allowed_attributes["$tag.$attr"]) &&
!isset($allowed_attributes["*.$attr"])) {
unset($this->info[$tag]->attr[$attr]);
}
}
}
}
} }

View File

@ -22,10 +22,10 @@ class HTMLPurifier_HTMLModule
var $name; var $name;
/** /**
* List of elements that the module implements. * List of elements that the module implements or substantially
* @note This is only for convention, as a module will often loop * modifies, either through a new ElementDef or a modified
* through the $elements array to define HTMLPurifier_ElementDef * content set that directly affects the element (if the element was
* in the $info array. * removed or added from a content set).
* @protected * @protected
*/ */
var $elements = array(); var $elements = array();

View File

@ -1,33 +0,0 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
/**
* Proprietary module that sets up the parent definitions.
*/
class HTMLPurifier_HTMLModule_SetParent extends HTMLPurifier_HTMLModule
{
var $name = 'SetParent';
function postProcess(&$definition) {
$parent = $definition->config->get('HTML', 'Parent');
if (isset($definition->info[$parent])) {
$definition->info_parent = $parent;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
}
$definition->info_parent_def = $definition->info[$definition->info_parent];
}
}
?>

View File

@ -33,8 +33,7 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
'lang' => false // placeholder 'lang' => false // placeholder
); );
function HTMLPurifier_HTMLModule_TransformToStrict(&$definition) { function HTMLPurifier_HTMLModule_TransformToStrict() {
$config = $definition->config;
// deprecated tag transforms // deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
@ -62,10 +61,10 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
if ($config->get('HTML', 'Strict')) { // this should not be applied to XHTML 1.0 Transitional, ONLY
// XHTML 1.0 Strict. We may need three classes
$this->info['blockquote']->content_model_type = 'strictblockquote'; $this->info['blockquote']->content_model_type = 'strictblockquote';
$this->info['blockquote']->child = false; // recalculate please! $this->info['blockquote']->child = false; // recalculate please!
}
} }

View File

@ -1,70 +0,0 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.'
);
/**
* Proprietary module that further narrows down allowed elements and
* attributes that were allowed to a user-defined whitelist.
* @warning This module cannot ADD elements or attributes, you must
* implement full definitions yourself!
*/
class HTMLPurifier_HTMLModule_TweakSubtractiveWhitelist extends HTMLPurifier_HTMLModule
{
var $name = 'TweakSubtractiveWhitelist';
function postProcess(&$definition) {
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $definition->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($definition->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($definition->info[$name]);
}
}
$allowed_attributes = $definition->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($definition->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($definition->info_global_attr[$attr_key]);
}
}
foreach ($definition->info as $tag => $info) {
foreach ($info->attr as $attr => $attr_info) {
if (!isset($allowed_attributes["$tag.$attr"]) &&
!isset($allowed_attributes["*.$attr"])) {
unset($definition->info[$tag]->attr[$attr]);
}
}
}
}
}
}
?>

View File

@ -0,0 +1,388 @@
<?php
require_once 'HTMLPurifier/ContentSets.php';
require_once 'HTMLPurifier/HTMLModule.php';
// W3C modules
require_once 'HTMLPurifier/HTMLModule/Text.php';
require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
require_once 'HTMLPurifier/HTMLModule/List.php';
require_once 'HTMLPurifier/HTMLModule/Presentation.php';
require_once 'HTMLPurifier/HTMLModule/Edit.php';
require_once 'HTMLPurifier/HTMLModule/Bdo.php';
require_once 'HTMLPurifier/HTMLModule/Tables.php';
require_once 'HTMLPurifier/HTMLModule/Image.php';
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
// proprietary modules
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
class HTMLPurifier_HTMLModuleManager
{
/**
* Array of HTMLPurifier_Module instances, indexed by module's class name.
* All known modules, regardless of use, are in this array.
*/
var $modules = array();
/**
* Associative array of module class name to module order keywords or
* numbers (keyword is preferred, all keywords are resolved at beginning
* of setup())
*/
var $order = array();
/**
* Associative array of module setup names to the corresponding safe
* (as in no XSS, no full document markup) modules.
*/
var $collectionsSafe = array(
'_Common' => array( // leading _ indicates private
'Text',
'Hypertext',
'List',
'Presentation',
'Edit',
'Bdo',
'Tables',
'Image',
'StyleAttribute'
),
// HTML definitions, defer completely to XHTML definitions
'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional',
'HTML 4.01 Strict' => 'XHTML 1.0 Strict',
// XHTML definitions
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
'XHTML 1.0 Strict' => array(array('_Common')),
'XHTML 1.1' => array(array('_Common')),
);
/**
* Modules to import if lenient mode (attempt to convert everything
* to a valid representation) is on
*/
var $collectionsLenient = array(
'HTML 4.01 Strict' => 'XHTML 1.0 Strict',
'XHTML 1.0 Strict' => array('TransformToStrict'),
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
);
/**
* Modules to import if correctional mode (correct everything that
* is feasible to strict mode) is on
*/
var $collectionsCorrectional = array(
'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional',
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
);
/** Associative array of element name to defining modules (always array) */
var $elementModuleLookup = array();
/** List of prefixes we should use for resolving small names */
var $prefixes = array('HTMLPurifier_HTMLModule_');
/** Associative array of order keywords to an integer index */
var $orderKeywords = array(
'define' => 10,
'define-redefine' => 20,
'redefine' => 30,
);
/** Instance of HTMLPurifier_ContentSets configured with full modules. */
var $contentSets;
var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
function HTMLPurifier_HTMLModuleManager() {
// modules
$modules = array(
'define' => array(
'Text', 'Hypertext', 'List', 'Presentation',
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute'
),
'define-redefine' => array(
'Legacy'
),
'redefine' => array(
'TransformToStrict', 'TransformToXHTML11'
)
);
foreach ($modules as $order => $modules_of_order) {
foreach ($modules_of_order as $module) {
$this->addModule($module, $order);
}
}
$this->attrTypes = new HTMLPurifier_AttrTypes();
$this->attrCollections = new HTMLPurifier_AttrCollections();
}
/**
* Adds a module to the ordered list.
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
*/
function addModule($module, $order = 'main') {
if (is_string($module)) {
$original_module = $module;
if (!class_exists($module)) {
foreach ($this->prefixes as $prefix) {
$module = $prefix . $original_module;
if (class_exists($module)) break;
}
}
if (!class_exists($module)) {
trigger_error($original_module . ' module does not exist', E_USER_ERROR);
return;
}
$module = new $module();
}
if (!isset($this->orderKeywords[$order])) {
trigger_error('Order keyword does not exist', E_USER_ERROR);
return;
}
$this->modules[$module->name] = $module;
$this->order[$module->name] = $order;
foreach ($module->elements as $name) {
if (!isset($this->elementModuleLookup[$name])) {
$this->elementModuleLookup[$name] = array();
}
$this->elementModuleLookup[$name][] = $module->name;
}
}
function setup($config) {
// substitute out the order keywords
foreach ($this->order as $name => $order) {
if (empty($this->modules[$name])) {
trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR);
return;
}
if (is_int($order)) continue;
if (empty($this->orderKeywords[$order])) {
trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR);
return;
}
$this->order[$name] = $this->orderKeywords[$order];
}
// sort modules member variable
array_multisort(
$this->order, SORT_ASC, SORT_NUMERIC,
$this->modules
);
// sort the lookup modules
foreach ($this->elementModuleLookup as $k => $modules) {
if (count($modules) > 1) {
$this->elementModuleLookup[$k] = array();
$module_lookup = array_flip($modules);
foreach ($this->order as $name => $v) {
if (isset($module_lookup[$name])) {
$this->elementModuleLookup[$k][] = $name;
}
}
}
}
$this->processCollections($this->collectionsSafe);
$this->processCollections($this->collectionsLenient);
$this->processCollections($this->collectionsCorrectional);
// notice that it is vital that we get a full content sets
// elements lineup, but attr collections must not go by
// anything other than the modules the user wants
$this->contentSets = new HTMLPurifier_ContentSets(
$this->getModules($config, true)
);
$this->attrCollections->setup($this->attrTypes,
$this->getModules($config));
}
function processCollections(&$cols) {
// $cols is the set of collections
// $col_i is the name (index) of a collection
// $col is a collection/list of modules
// perform inclusions
foreach ($cols as $col_i => $col) {
if (is_string($col)) continue; // alias, save for later
if (!is_array($col[0])) continue; // no inclusions to do
$includes = $col[0];
unset($cols[$col_i][0]); // remove inclusions value
for ($i = 0; isset($includes[$i]); $i++) {
$inc = $includes[$i];
foreach ($cols[$inc] as $module) {
if (is_array($module)) { // another inclusion!
foreach ($module as $inc2) $includes[] = $inc2;
continue;
}
$cols[$col_i][] = $module; // merge in the other modules
}
}
}
// replace with real modules
foreach ($cols as $col_i => $col) {
if (is_string($col)) continue;
$seen = array(); // lookup array to prevent dupes
foreach ($col as $module_i => $module) {
if (isset($seen[$module])) {
unset($cols[$col_i][$module_i]);
continue;
}
$cols[$col_i][$module_i] = $this->modules[$module];
$seen[$module] = true;
}
}
// hook up aliases
foreach ($cols as $col_i => $col) {
if (!is_string($col)) continue;
$cols[$col_i] = $cols[$col];
}
// delete pseudo-collections
foreach ($cols as $col_i => $col) {
if ($col_i[0] == '_') unset($cols[$col_i]);
}
}
function getDoctype($config) {
// get rid of this later
if ($config->get('HTML', 'Strict')) {
$doctype = 'XHTML 1.0 Strict';
} else {
$doctype = 'XHTML 1.0 Transitional';
}
return $doctype;
}
/**
* @param $config
* @param $full Whether or not to retrieve *all* applicable modules
* for the doctype and not just the safe/whitelisted ones.
* Leniency modules are added based on config though.
*/
function getModules($config, $full = false) {
// CACHE!!!
$doctype = $this->getDoctype($config);
// more logic is needed here to retrieve modules based on
// configuration's leniency, etc.
$modules = $this->collectionsSafe[$doctype];
if(isset($this->collectionsLenient[$doctype])) {
$modules = array_merge($modules, $this->collectionsLenient[$doctype]);
}
if(isset($this->collectionsCorrectional[$doctype])) {
$modules = array_merge($modules, $this->collectionsCorrectional[$doctype]);
}
// convert from numeric to module name indexing, also prevents
// duplicates
$ret = array();
foreach ($modules as $module) {
$ret[$module->name] = $module;
}
return $ret;
}
/**
* @param $config
*/
function getElements($config) {
$modules = $this->getModules($config);
$elements = array();
foreach ($modules as $module) {
foreach ($module->elements as $name) {
$elements[$name] = $this->getElement($name, $config);
}
}
return $elements;
}
function getElement($name, $config) {
$def = false;
$modules = $this->getModules($config, true);
if (!isset($this->elementModuleLookup[$name])) {
return false;
}
foreach($this->elementModuleLookup[$name] as $module_name) {
// oops, we can't use that module at all
if (!isset($modules[$module_name])) continue;
$module = $modules[$module_name];
$new_def = $module->info[$name];
if (!$def && $new_def->standalone) {
$def = $new_def;
} elseif ($def) {
$def->mergeIn($new_def);
} else {
continue;
}
// attribute value expansions
$this->attrCollections->performInclusions($def->attr);
$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
// descendants_are_inline, for ChildDef_Chameleon
if (is_string($def->content_model) &&
strpos($def->content_model, 'Inline') !== false) {
if ($name != 'del' && $name != 'ins') {
// this is for you, ins/del
$def->descendants_are_inline = true;
}
}
$this->contentSets->generateChildDef($def, $module);
}
return $def;
}
/**
* Retrieves full child definition for child, for the parent. Parent
* is a special case because it may not be allowed in the document.
*/
function getFullChildDef($element, $config) {
$def = $this->getElement($element, $config);
if ($def === false) {
trigger_error('Cannot get child def of element not available in doctype',
E_USER_ERROR);
return false;
}
return $def->child;
}
}
?>

View File

@ -23,8 +23,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->row('Parent of fragment', $def->info_parent); $ret .= $this->row('Parent of fragment', $def->info_parent);
$ret .= $this->renderChildren($def->info_parent_def->child); $ret .= $this->renderChildren($def->info_parent_def->child);
$ret .= $this->row('Strict mode', $def->strict); $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
$ret .= $this->start('tr'); $ret .= $this->start('tr');
$ret .= $this->element('th', 'Global attributes'); $ret .= $this->element('th', 'Global attributes');