0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-08 15:11:51 +00:00

[1.7.0] Remove vestigal chunks of code from HTMLModuleManager, switch HTMLDefinition to use validModules, and update some inline docs.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1055 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-05-14 01:03:21 +00:00
parent 05e1aca2fa
commit 048242004e
2 changed files with 13 additions and 270 deletions

View File

@ -182,7 +182,7 @@ class HTMLPurifier_HTMLDefinition
$this->manager->setup($this->config); $this->manager->setup($this->config);
foreach ($this->manager->activeModules as $module) { foreach ($this->manager->validModules as $module) {
foreach($module->info_tag_transform as $k => $v) { foreach($module->info_tag_transform as $k => $v) {
if ($v === false) unset($this->info_tag_transform[$k]); if ($v === false) unset($this->info_tag_transform[$k]);
else $this->info_tag_transform[$k] = $v; else $this->info_tag_transform[$k] = $v;

View File

@ -31,7 +31,7 @@ require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'Doctype', null, 'string/null', 'HTML', 'Doctype', null, 'string/null',
'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '. 'Doctype to use, pre-defined values are HTML 4.01 Transitional, HTML 4.01 '.
'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '. 'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
'Technically speaking this is not actually a doctype (as it does '. 'Technically speaking this is not actually a doctype (as it does '.
'not identify a corresponding DTD), but we are using this name '. 'not identify a corresponding DTD), but we are using this name '.
@ -93,16 +93,6 @@ class HTMLPurifier_HTMLModuleManager
*/ */
var $doctypes; var $doctypes;
/**
* Associative array: $collections[$type][$doctype] = list of modules.
* This is used to logically separate types of functionality so that
* based on the doctype and other configuration settings they may
* be easily switched and on and off. Custom setups may not need
* to use this abstraction, opting to have only one big collection
* with one valid doctype.
*/
var $collections = array();
/** /**
* Modules that may be used in a valid doctype of this kind. * Modules that may be used in a valid doctype of this kind.
* Correctional and leniency modules should not be placed in this * Correctional and leniency modules should not be placed in this
@ -110,14 +100,6 @@ class HTMLPurifier_HTMLModuleManager
* module for this doctype in here. * module for this doctype in here.
*/ */
var $validModules = array(); var $validModules = array();
var $validCollections = array(); /**< Collections to merge into $validModules */
/**
* Modules that we will allow in input, subset of $validModules. Single
* element definitions may result in us consulting validModules.
*/
var $activeModules = array();
var $activeCollections = array(); /**< Collections to merge into $activeModules */
var $counter = 0; /**< Designates next available integer order for modules. */ var $counter = 0; /**< Designates next available integer order for modules. */
var $initialized = false; /**< Says whether initialize() was called */ var $initialized = false; /**< Says whether initialize() was called */
@ -128,12 +110,6 @@ class HTMLPurifier_HTMLModuleManager
* with $autoCollection. * with $autoCollection.
*/ */
var $autoDoctype = false; var $autoDoctype = false;
/**
* Specifies what collection to siphon new modules from addModule() to,
* or false to disable the functionality. Must be used in conjunction
* with $autoCollection.
*/
var $autoCollection = false;
/** Associative array of element name to defining modules (always array) */ /** Associative array of element name to defining modules (always array) */
var $elementLookup = array(); var $elementLookup = array();
@ -209,64 +185,13 @@ class HTMLPurifier_HTMLModuleManager
$d->modules = array_merge($common); $d->modules = array_merge($common);
$d->modulesForMode['lenient'] = array('TransformToStrict', 'TransformToXHTML11'); $d->modulesForMode['lenient'] = array('TransformToStrict', 'TransformToXHTML11');
// ----------------------------------------------------------------
// Safe modules for supported doctypes. These are included
// in the valid and active module lists by default
$this->collections['Safe'] = array(
'_Common' => array( // leading _ indicates private
'CommonAttributes', 'Text', 'Hypertext', 'List',
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
'StyleAttribute'
),
// HTML definitions, defer to XHTML definitions
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
// XHTML definitions
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy', 'Target' ),
'XHTML 1.0 Strict' => array(array('_Common')),
'XHTML 1.1' => array(array('_Common')),
);
// Modules that specify elements that are unsafe from untrusted
// third-parties. These should be registered in $validModules but
// almost never $activeModules unless you really know what you're
// doing.
$this->collections['Unsafe'] = array();
// Modules to import if lenient mode (attempt to convert everything
// to a valid representation) is on. These must not be in $validModules
// unless specified so.
$this->collections['Lenient'] = array(
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
'XHTML 1.0 Strict' => array('TransformToStrict'),
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
);
// Modules to import if correctional mode (correct everything that
// is feasible to strict mode) is on. These must not be in $validModules
// unless specified so.
$this->collections['Correctional'] = array(
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
);
// User-space modules, custom code or whatever
$this->collections['Extension'] = array();
// setup active versus valid modules. ORDER IS IMPORTANT!
// definition modules
$this->makeCollectionActive('Safe');
$this->makeCollectionValid('Unsafe');
// redefinition modules
$this->makeCollectionActive('Lenient');
$this->makeCollectionActive('Correctional');
$this->autoDoctype = '*'; $this->autoDoctype = '*';
$this->autoCollection = 'Extension';
} }
/**
* @temporary
*/
function &addDoctype($name) { function &addDoctype($name) {
$this->doctypes[$name] = new HTMLPurifier_Doctype(); $this->doctypes[$name] = new HTMLPurifier_Doctype();
$this->doctypes[$name]->name = $name; $this->doctypes[$name]->name = $name;
@ -336,25 +261,6 @@ class HTMLPurifier_HTMLModuleManager
} }
} }
/**
* Makes a collection active, while also making it valid if not
* already done so. See $activeModules for the semantics of "active".
* @param $collection_name Name of collection to activate
*/
function makeCollectionActive($collection_name) {
if (!in_array($collection_name, $this->validCollections)) {
$this->makeCollectionValid($collection_name);
}
$this->activeCollections[] = $collection_name;
}
/**
* Makes a collection valid. See $validModules for the semantics of "valid"
*/
function makeCollectionValid($collection_name) {
$this->validCollections[] = $collection_name;
}
/** /**
* Adds a class prefix that addModule() will use to resolve a * Adds a class prefix that addModule() will use to resolve a
* string name to a concrete class * string name to a concrete class
@ -365,27 +271,14 @@ class HTMLPurifier_HTMLModuleManager
function setup($config) { function setup($config) {
// load up the autocollection
if ($this->autoCollection !== false) {
$this->makeCollectionActive($this->autoCollection);
}
// retrieve the doctype // retrieve the doctype
$this->doctype = $this->getDoctype($config); $this->doctype = $this->getDoctype($config);
if (isset($this->doctypeAliases[$this->doctype])) { if (isset($this->doctypeAliases[$this->doctype])) {
// resolve alias
$this->doctype = $this->doctypeAliases[$this->doctype]; $this->doctype = $this->doctypeAliases[$this->doctype];
} }
// process module collections to module name => module instance form // retrieve object instance of doctype
foreach ($this->collections as $col_i => $x) {
$this->processCollections($this->collections[$col_i]);
}
//$this->validModules = $this->assembleModules($this->validCollections);
$this->activeModules = $this->assembleModules($this->activeCollections);
// ----------------------------------------------------------------
$doctype = $this->doctypes[$this->doctype]; $doctype = $this->doctypes[$this->doctype];
$modules = $doctype->modules; $modules = $doctype->modules;
foreach ($doctype->modulesForMode as $mode => $mode_modules) { foreach ($doctype->modulesForMode as $mode => $mode_modules) {
@ -397,8 +290,6 @@ class HTMLPurifier_HTMLModuleManager
$this->validModules[$module] = $this->modules[$module]; $this->validModules[$module] = $this->modules[$module];
} }
// ----------------------------------------------------------------
// setup lookup table based on all valid modules // setup lookup table based on all valid modules
foreach ($this->validModules as $module) { foreach ($this->validModules as $module) {
foreach ($module->info as $name => $def) { foreach ($module->info as $name => $def) {
@ -411,168 +302,20 @@ class HTMLPurifier_HTMLModuleManager
// note the different choice // note the different choice
$this->contentSets = new HTMLPurifier_ContentSets( $this->contentSets = new HTMLPurifier_ContentSets(
// content models that contain non-allowed elements are // content set assembly deals with all possible modules,
// harmless because RemoveForeignElements will ensure // not just ones deemed to be "safe"
// they never get in anyway, and there is usually no
// reason why you should want to restrict a content
// model beyond what is mandated by the doctype.
// Note, however, that this means redefinitions of
// content models can't be tossed in validModels willy-nilly:
// that stuff still is regulated by configuration.
$this->validModules $this->validModules
); );
$this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrCollections = new HTMLPurifier_AttrCollections(
$this->attrTypes, $this->attrTypes,
// only explicitly allowed modules are allowed to affect // there is no way to directly disable a global attribute,
// the global attribute collections. This mean's there's // but using AllowedAttributes or simply not including
// a distinction between loading the Bdo module, and the // the module in your custom doctype should be sufficient
// bdo element: Bdo will enable the dir attribute on all $this->validModules
// elements, while bdo will only define the bdo element,
// which will not have an editable directionality. This might
// catch people who are loading only elements by surprise, so
// we should consider loading an entire module if all the
// elements it defines are requested by the user, especially
// if it affects the global attribute collections.
$this->activeModules
); );
} }
/**
* Takes a list of collections and merges together all the defined
* modules for the current doctype from those collections.
* @param $collections List of collection suffixes we should grab
* modules from (like 'Safe' or 'Lenient')
*/
function assembleModules($collections) {
$modules = array();
$numOfCollectionsUsed = 0;
foreach ($collections as $name) {
$disable_global = false;
if (!isset($this->collections[$name])) {
trigger_error("$name collection is undefined", E_USER_ERROR);
continue;
}
$cols = $this->collections[$name];
if (isset($cols[$this->doctype])) {
if (isset($cols[$this->doctype]['*'])) {
unset($cols[$this->doctype]['*']);
$disable_global = true;
}
$modules += $cols[$this->doctype];
$numOfCollectionsUsed++;
}
// accept catch-all doctype
if (
$this->doctype !== '*' &&
isset($cols['*']) &&
!$disable_global
) {
$modules += $cols['*'];
}
}
if ($numOfCollectionsUsed < 1) {
// possible XSS injection if user-specified doctypes
// are allowed
trigger_error("Doctype {$this->doctype} does not exist, ".
"check for typos (if you desire a doctype that allows ".
"no elements, use an empty array collection)", E_USER_ERROR);
}
return $modules;
}
/**
* Takes a collection and performs inclusions and substitutions for it.
* @param $cols Reference to collections class member variable
*/
function processCollections(&$cols) {
// $cols is the set of collections
// $col_i is the name (index) of a collection
// $col is a collection/list of modules
// perform inclusions
foreach ($cols as $col_i => $col) {
$seen = array();
if (!empty($col[0]) && is_array($col[0])) {
$seen[$col_i] = true; // recursion reporting
$includes = $col[0];
unset($cols[$col_i][0]); // remove inclusions value, recursion guard
} else {
$includes = array();
}
if (empty($includes)) continue;
for ($i = 0; isset($includes[$i]); $i++) {
$inc = $includes[$i];
if (isset($seen[$inc])) {
trigger_error(
"Circular inclusion detected in $col_i collection",
E_USER_ERROR
);
continue;
} else {
$seen[$inc] = true;
}
if (!isset($cols[$inc])) {
trigger_error(
"Collection $col_i tried to include undefined ".
"collection $inc", E_USER_ERROR);
continue;
}
foreach ($cols[$inc] as $module) {
if (is_array($module)) { // another inclusion!
foreach ($module as $inc2) $includes[] = $inc2;
continue;
}
$cols[$col_i][] = $module; // merge in the other modules
}
}
}
// replace with real modules, invert module from list to
// assoc array of module name to module instance
foreach ($cols as $col_i => $col) {
$ignore_global = false;
$order = array();
foreach ($col as $module_i => $module) {
unset($cols[$col_i][$module_i]);
if (is_array($module)) {
trigger_error("Illegal inclusion array at index".
" $module_i found collection $col_i, inclusion".
" arrays must be at start of collection (index 0)",
E_USER_ERROR);
continue;
}
if ($module_i === '*' && $module === false) {
$ignore_global = true;
continue;
}
if (!isset($this->modules[$module])) {
trigger_error(
"Collection $col_i references undefined ".
"module $module",
E_USER_ERROR
);
continue;
}
$module = $this->modules[$module];
$cols[$col_i][$module->name] = $module;
$order[$module->name] = $module->order;
}
array_multisort(
$order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
);
if ($ignore_global) $cols[$col_i]['*'] = false;
}
// delete pseudo-collections
foreach ($cols as $col_i => $col) {
if ($col_i[0] == '_') unset($cols[$col_i]);
}
}
/** /**
* Retrieves the doctype from the configuration object * Retrieves the doctype from the configuration object
*/ */