mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-08 23:11:52 +00:00
More refactoring: for interest of unit testing, default doctypes were moved to an initialize() method which could optionally be omitted. Disable collection aliases in favor of doctype aliases.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@761 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
0d0173eb6e
commit
7eb751b5f5
@ -36,6 +36,22 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
*/
|
*/
|
||||||
var $modules = array();
|
var $modules = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String doctype we will validate against. See $validModules for use.
|
||||||
|
*/
|
||||||
|
var $doctype;
|
||||||
|
var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array: $collections[$type][$doctype] = list of modules.
|
||||||
|
* This is used to logically separate types of functionality so that
|
||||||
|
* based on the doctype and other configuration settings they may
|
||||||
|
* be easily switched and on and off. Custom setups may not need
|
||||||
|
* to use this abstraction, opting to have only one big collection
|
||||||
|
* with one valid doctype.
|
||||||
|
*/
|
||||||
|
var $collections = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Modules that may be used in a valid doctype of this kind.
|
* Modules that may be used in a valid doctype of this kind.
|
||||||
* Correctional and leniency modules should not be placed in this
|
* Correctional and leniency modules should not be placed in this
|
||||||
@ -43,89 +59,22 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
* module for this doctype in here.
|
* module for this doctype in here.
|
||||||
*/
|
*/
|
||||||
var $validModules = array();
|
var $validModules = array();
|
||||||
|
var $validCollections = array(); /**< Collections to merge into $validModules */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collections that should be merged into $validModules
|
* Modules that we will allow in input, subset of $validModules. Single
|
||||||
*/
|
|
||||||
var $validModulesCollections = array('Safe', 'Unsafe', 'Lenient', 'Correctional');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Modules that we will use broadly, subset of $validModules. Single
|
|
||||||
* element definitions may result in us consulting validModules.
|
* element definitions may result in us consulting validModules.
|
||||||
*/
|
*/
|
||||||
var $activeModules = array();
|
var $activeModules = array();
|
||||||
|
var $activeCollections = array(); /**< Collections to merge into $activeModules */
|
||||||
/**
|
|
||||||
* Collections that should be merged into $activeModules
|
|
||||||
*/
|
|
||||||
var $activeModulesCollections = array('Safe', 'Lenient', 'Correctional');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Current doctype for which $validModules is based
|
|
||||||
*/
|
|
||||||
var $doctype;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Designates next available integer order for modules.
|
* Designates next available integer order for modules.
|
||||||
*/
|
*/
|
||||||
var $moduleCounter = 0;
|
var $counter = 0;
|
||||||
|
|
||||||
/**
|
|
||||||
* List of suffixes of collections to process
|
|
||||||
*/
|
|
||||||
var $collections = array('Safe', 'Unsafe', 'Lenient', 'Correctional');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Associative array of module setup names to the corresponding safe
|
|
||||||
* (as in no XSS, no full document markup) modules. These are
|
|
||||||
* included in both valid and active module lists by default.
|
|
||||||
*/
|
|
||||||
var $collectionsSafe = array(
|
|
||||||
'_Common' => array( // leading _ indicates private
|
|
||||||
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
|
||||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
|
||||||
'StyleAttribute'
|
|
||||||
),
|
|
||||||
// HTML definitions, defer completely to XHTML definitions
|
|
||||||
'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional',
|
|
||||||
'HTML 4.01 Strict' => 'XHTML 1.0 Strict',
|
|
||||||
// XHTML definitions
|
|
||||||
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
|
|
||||||
'XHTML 1.0 Strict' => array(array('_Common')),
|
|
||||||
'XHTML 1.1' => array(array('_Common')),
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Modules that specify elements that are unsafe from untrusted
|
|
||||||
* third-parties. These should be registered in $validModules but
|
|
||||||
* almost never $activeModules unless you really know what you're
|
|
||||||
* doing.
|
|
||||||
*/
|
|
||||||
var $collectionsUnsafe = array( );
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Modules to import if lenient mode (attempt to convert everything
|
|
||||||
* to a valid representation) is on. These must not be in $activeModules
|
|
||||||
* unless specified so.
|
|
||||||
*/
|
|
||||||
var $collectionsLenient = array(
|
|
||||||
'HTML 4.01 Strict' => 'XHTML 1.0 Strict',
|
|
||||||
'XHTML 1.0 Strict' => array('TransformToStrict'),
|
|
||||||
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Modules to import if correctional mode (correct everything that
|
|
||||||
* is feasible to strict mode) is on. These must not be in $activeModules
|
|
||||||
* unless specified so.
|
|
||||||
*/
|
|
||||||
var $collectionsCorrectional = array(
|
|
||||||
'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional',
|
|
||||||
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
|
|
||||||
);
|
|
||||||
|
|
||||||
/** Associative array of element name to defining modules (always array) */
|
/** Associative array of element name to defining modules (always array) */
|
||||||
var $elementModuleLookup = array();
|
var $elementLookup = array();
|
||||||
|
|
||||||
/** List of prefixes we should use for resolving small names */
|
/** List of prefixes we should use for resolving small names */
|
||||||
var $prefixes = array('HTMLPurifier_HTMLModule_');
|
var $prefixes = array('HTMLPurifier_HTMLModule_');
|
||||||
@ -134,9 +83,21 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
|
var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
|
||||||
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
|
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModuleManager() {
|
/**
|
||||||
|
* @param $blank If true, don't do any initializing
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_HTMLModuleManager($blank = false) {
|
||||||
|
|
||||||
// modules
|
// the only editable internal object. The rest need to
|
||||||
|
// be manipulated through modules
|
||||||
|
$this->attrTypes = new HTMLPurifier_AttrTypes();
|
||||||
|
|
||||||
|
if (!$blank) $this->initialize();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function initialize() {
|
||||||
|
// load default modules to the recognized modules list (not active)
|
||||||
$modules = array(
|
$modules = array(
|
||||||
// define
|
// define
|
||||||
'CommonAttributes',
|
'CommonAttributes',
|
||||||
@ -147,19 +108,63 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
// redefine
|
// redefine
|
||||||
'TransformToStrict', 'TransformToXHTML11'
|
'TransformToStrict', 'TransformToXHTML11'
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ($modules as $module) {
|
foreach ($modules as $module) {
|
||||||
$this->addModule($module);
|
$this->addModule($module);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the only editable internal object. The rest need to
|
// Safe modules for supported doctypes. These are included
|
||||||
// be manipulated through modules
|
// in the valid and active module lists by default
|
||||||
$this->attrTypes = new HTMLPurifier_AttrTypes();
|
$this->collections['Safe'] = array(
|
||||||
|
'_Common' => array( // leading _ indicates private
|
||||||
|
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
||||||
|
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||||
|
'StyleAttribute'
|
||||||
|
),
|
||||||
|
// HTML definitions, defer to XHTML definitions
|
||||||
|
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
|
||||||
|
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
|
||||||
|
// XHTML definitions
|
||||||
|
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
|
||||||
|
'XHTML 1.0 Strict' => array(array('_Common')),
|
||||||
|
'XHTML 1.1' => array(array('_Common')),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Modules that specify elements that are unsafe from untrusted
|
||||||
|
// third-parties. These should be registered in $validModules but
|
||||||
|
// almost never $activeModules unless you really know what you're
|
||||||
|
// doing.
|
||||||
|
$this->collections['Unsafe'] = array();
|
||||||
|
|
||||||
|
// Modules to import if lenient mode (attempt to convert everything
|
||||||
|
// to a valid representation) is on. These must not be in $activeModules
|
||||||
|
// unless specified so.
|
||||||
|
$this->collections['Lenient'] = array(
|
||||||
|
'HTML 4.01 Strict' => 'XHTML 1.0 Strict',
|
||||||
|
'XHTML 1.0 Strict' => array('TransformToStrict'),
|
||||||
|
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Modules to import if correctional mode (correct everything that
|
||||||
|
// is feasible to strict mode) is on. These must not be in $activeModules
|
||||||
|
// unless specified so.
|
||||||
|
$this->collections['Correctional'] = array(
|
||||||
|
'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional',
|
||||||
|
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
|
||||||
|
);
|
||||||
|
|
||||||
|
// setup active versus valid modules. ORDER IS IMPORTANT!
|
||||||
|
// definition modules
|
||||||
|
$this->makeCollectionActive('Safe');
|
||||||
|
$this->makeCollectionValid('Unsafe');
|
||||||
|
// redefinition modules
|
||||||
|
$this->makeCollectionActive('Lenient');
|
||||||
|
$this->makeCollectionActive('Correctional');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a module to the ordered list.
|
* Adds a module to the recognized module list. This does not
|
||||||
|
* do anything else: the module must be added to a corresponding
|
||||||
|
* collection to be "activated".
|
||||||
* @param $module Mixed: string module name, with or without
|
* @param $module Mixed: string module name, with or without
|
||||||
* HTMLPurifier_HTMLModule prefix, or instance of
|
* HTMLPurifier_HTMLModule prefix, or instance of
|
||||||
* subclass of HTMLPurifier_HTMLModule.
|
* subclass of HTMLPurifier_HTMLModule.
|
||||||
@ -180,10 +185,29 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
}
|
}
|
||||||
$module = new $module();
|
$module = new $module();
|
||||||
}
|
}
|
||||||
$module->order = $this->moduleCounter++; // assign then increment
|
$module->order = $this->counter++; // assign then increment
|
||||||
$this->modules[$module->name] = $module;
|
$this->modules[$module->name] = $module;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a collection active, while also making it valid if not
|
||||||
|
* already done so. See $activeModules for the semantics of "active".
|
||||||
|
* @param $collection_name Name of collection to activate
|
||||||
|
*/
|
||||||
|
function makeCollectionActive($collection_name) {
|
||||||
|
if (!in_array($collection_name, $this->validCollections)) {
|
||||||
|
$this->makeCollectionValid($collection_name);
|
||||||
|
}
|
||||||
|
$this->activeCollections[] = $collection_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a collection valid. See $validModules for the semantics of "valid"
|
||||||
|
*/
|
||||||
|
function makeCollectionValid($collection_name) {
|
||||||
|
$this->validCollections[] = $collection_name;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a class prefix that addModule() will use to resolve a
|
* Adds a class prefix that addModule() will use to resolve a
|
||||||
* string name to a concrete class
|
* string name to a concrete class
|
||||||
@ -195,23 +219,25 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
function setup($config) {
|
function setup($config) {
|
||||||
// retrieve the doctype
|
// retrieve the doctype
|
||||||
$this->doctype = $this->getDoctype($config);
|
$this->doctype = $this->getDoctype($config);
|
||||||
|
if (isset($this->doctypeAliases[$this->doctype])) {
|
||||||
// process module collections to module name => module instance form
|
$this->doctype = $this->doctypeAliases[$this->doctype];
|
||||||
foreach ($this->collections as $suffix) {
|
|
||||||
$varname = 'collections' . $suffix;
|
|
||||||
$this->processCollections($this->$varname);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->validModules = $this->assembleModules($this->validModulesCollections);
|
// process module collections to module name => module instance form
|
||||||
$this->activeModules = $this->assembleModules($this->activeModulesCollections);
|
foreach ($this->collections as $col_i => $x) {
|
||||||
|
$this->processCollections($this->collections[$col_i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->validModules = $this->assembleModules($this->validCollections);
|
||||||
|
$this->activeModules = $this->assembleModules($this->activeCollections);
|
||||||
|
|
||||||
// setup lookup table based on all valid modules
|
// setup lookup table based on all valid modules
|
||||||
foreach ($this->validModules as $module) {
|
foreach ($this->validModules as $module) {
|
||||||
foreach ($module->elements as $name) {
|
foreach ($module->elements as $name) {
|
||||||
if (!isset($this->elementModuleLookup[$name])) {
|
if (!isset($this->elementLookup[$name])) {
|
||||||
$this->elementModuleLookup[$name] = array();
|
$this->elementLookup[$name] = array();
|
||||||
}
|
}
|
||||||
$this->elementModuleLookup[$name][] = $module->name;
|
$this->elementLookup[$name][] = $module->name;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,13 +278,21 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
*/
|
*/
|
||||||
function assembleModules($collections) {
|
function assembleModules($collections) {
|
||||||
$modules = array();
|
$modules = array();
|
||||||
foreach ($collections as $suffix) {
|
$numOfCollectionsUsed = 0;
|
||||||
$varname = 'collections' . $suffix;
|
foreach ($collections as $name) {
|
||||||
$cols = $this->$varname;
|
$cols = $this->collections[$name];
|
||||||
if (!empty($cols[$this->doctype])) {
|
if (isset($cols[$this->doctype])) {
|
||||||
$modules += $cols[$this->doctype];
|
$modules += $cols[$this->doctype];
|
||||||
|
$numOfCollectionsUsed++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if ($numOfCollectionsUsed < 1) {
|
||||||
|
// possible XSS injection if user-specified doctypes
|
||||||
|
// are allowed
|
||||||
|
trigger_error("Doctype {$this->doctype} does not exist, ".
|
||||||
|
"check for typos (if you desire a doctype that allows ".
|
||||||
|
"no elements, use an empty array collection)", E_USER_ERROR);
|
||||||
|
}
|
||||||
return $modules;
|
return $modules;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -337,29 +371,6 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// hook up aliases
|
|
||||||
foreach ($cols as $col_i => $col) {
|
|
||||||
if (!is_string($col)) continue;
|
|
||||||
if (!isset($cols[$col])) {
|
|
||||||
trigger_error(
|
|
||||||
"$col_i collection is alias to undefined $col collection",
|
|
||||||
E_USER_ERROR
|
|
||||||
);
|
|
||||||
unset($cols[$col_i]);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// recursion guard
|
|
||||||
if (is_string($cols[$col])) {
|
|
||||||
trigger_error(
|
|
||||||
"Cannot alias $col_i collection to alias",
|
|
||||||
E_USER_ERROR
|
|
||||||
);
|
|
||||||
unset($cols[$col_i]);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$cols[$col_i] = $cols[$col];
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete pseudo-collections
|
// delete pseudo-collections
|
||||||
foreach ($cols as $col_i => $col) {
|
foreach ($cols as $col_i => $col) {
|
||||||
if ($col_i[0] == '_') unset($cols[$col_i]);
|
if ($col_i[0] == '_') unset($cols[$col_i]);
|
||||||
@ -371,6 +382,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
* Retrieves the doctype from the configuration object
|
* Retrieves the doctype from the configuration object
|
||||||
*/
|
*/
|
||||||
function getDoctype($config) {
|
function getDoctype($config) {
|
||||||
|
// this is BC
|
||||||
if ($config->get('Core', 'XHTML')) {
|
if ($config->get('Core', 'XHTML')) {
|
||||||
$doctype = 'XHTML 1.0';
|
$doctype = 'XHTML 1.0';
|
||||||
} else {
|
} else {
|
||||||
@ -418,11 +430,11 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
|
|
||||||
$modules = $this->validModules;
|
$modules = $this->validModules;
|
||||||
|
|
||||||
if (!isset($this->elementModuleLookup[$name])) {
|
if (!isset($this->elementLookup[$name])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach($this->elementModuleLookup[$name] as $module_name) {
|
foreach($this->elementLookup[$name] as $module_name) {
|
||||||
|
|
||||||
$module = $modules[$module_name];
|
$module = $modules[$module_name];
|
||||||
$new_def = $module->info[$name];
|
$new_def = $module->info[$name];
|
||||||
@ -432,7 +444,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
} elseif ($def) {
|
} elseif ($def) {
|
||||||
$def->mergeIn($new_def);
|
$def->mergeIn($new_def);
|
||||||
} else {
|
} else {
|
||||||
// could have save it for another day functionality:
|
// could "save it for another day":
|
||||||
// non-standalone definitions that don't have a standalone
|
// non-standalone definitions that don't have a standalone
|
||||||
// to merge into could be deferred to the end
|
// to merge into could be deferred to the end
|
||||||
continue;
|
continue;
|
||||||
|
@ -111,16 +111,6 @@ class HTMLPurifier_HTMLModuleManagerTest extends UnitTestCase
|
|||||||
'HTML' => array('Text', 'Hypertext', 'Legacy')
|
'HTML' => array('Text', 'Hypertext', 'Legacy')
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
$this->assertProcessCollections( // aliasing!
|
|
||||||
array(
|
|
||||||
'HTML' => 'XHTML',
|
|
||||||
'XHTML' => array('Text', 'Hypertext')
|
|
||||||
),
|
|
||||||
array(
|
|
||||||
'HTML' => array('Text', 'Hypertext'),
|
|
||||||
'XHTML' => array('Text', 'Hypertext')
|
|
||||||
)
|
|
||||||
);
|
|
||||||
$this->assertProcessCollections( // nested inclusions
|
$this->assertProcessCollections( // nested inclusions
|
||||||
array(
|
array(
|
||||||
'Full' => array(array('Minimal'), 'Hypertext'),
|
'Full' => array(array('Minimal'), 'Hypertext'),
|
||||||
@ -171,21 +161,6 @@ class HTMLPurifier_HTMLModuleManagerTest extends UnitTestCase
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
$this->expectError('XHTML collection is alias to undefined Foobar collection');
|
|
||||||
$this->manager->processCollections(
|
|
||||||
$c = array(
|
|
||||||
'XHTML' => 'Foobar'
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
$this->expectError('Cannot alias XHTML collection to alias');
|
|
||||||
$this->manager->processCollections(
|
|
||||||
$c = array(
|
|
||||||
'XHTML' => 'Foobar',
|
|
||||||
'Foobar' => 'HTML',
|
|
||||||
'HTML' => array()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user