diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index 2be7b067..bfda03b9 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -22,14 +22,9 @@ class HTMLPurifier_HTMLModule var $name; /** - * Type of module. Currently three supported values: define, - * define-redefine and redefine. Define means that the module solely - * creates new elements. Redefine means that the module solely - * redefines aspects of already existing elements. Define-Redefine - * is a combo of the two. This affects the order in which the module - * will be loaded, see HTMLPurifier_HTMLModuleManager for more details. + * Dynamically set integer that specifies when the module was loaded in. */ - var $type; + var $order; /** * List of elements that the module implements or substantially diff --git a/library/HTMLPurifier/HTMLModule/Bdo.php b/library/HTMLPurifier/HTMLModule/Bdo.php index 08a20aa7..17e5e987 100644 --- a/library/HTMLPurifier/HTMLModule/Bdo.php +++ b/library/HTMLPurifier/HTMLModule/Bdo.php @@ -11,7 +11,6 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule { var $name = 'Bdo'; - var $type = 'define'; var $elements = array('bdo'); var $info = array(); var $content_sets = array('Inline' => 'bdo'); diff --git a/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/library/HTMLPurifier/HTMLModule/CommonAttributes.php index c8ba2fe2..8f17c2f0 100644 --- a/library/HTMLPurifier/HTMLModule/CommonAttributes.php +++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -3,7 +3,6 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule { var $name = 'CommonAttributes'; - var $type = 'define'; var $attr_collections = array( 'Core' => array( diff --git a/library/HTMLPurifier/HTMLModule/Edit.php b/library/HTMLPurifier/HTMLModule/Edit.php index 43441217..6a415906 100644 --- a/library/HTMLPurifier/HTMLModule/Edit.php +++ b/library/HTMLPurifier/HTMLModule/Edit.php @@ -11,7 +11,6 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule { var $name = 'Edit'; - var $type = 'define'; var $elements = array('del', 'ins'); var $info = array(); var $content_sets = array('Inline' => 'del | ins'); diff --git a/library/HTMLPurifier/HTMLModule/Hypertext.php b/library/HTMLPurifier/HTMLModule/Hypertext.php index f0a8a652..0b8a2e98 100644 --- a/library/HTMLPurifier/HTMLModule/Hypertext.php +++ b/library/HTMLPurifier/HTMLModule/Hypertext.php @@ -9,7 +9,6 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule { var $name = 'Hypertext'; - var $type = 'define'; var $elements = array('a'); var $info = array(); var $content_sets = array('Inline' => 'a'); diff --git a/library/HTMLPurifier/HTMLModule/Image.php b/library/HTMLPurifier/HTMLModule/Image.php index 3b3e4041..3852836d 100644 --- a/library/HTMLPurifier/HTMLModule/Image.php +++ b/library/HTMLPurifier/HTMLModule/Image.php @@ -14,7 +14,6 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule { var $name = 'Image'; - var $type = 'define'; var $elements = array('img'); var $info = array(); var $content_sets = array('Inline' => 'img'); diff --git a/library/HTMLPurifier/HTMLModule/Legacy.php b/library/HTMLPurifier/HTMLModule/Legacy.php index f734aed2..a0613a2f 100644 --- a/library/HTMLPurifier/HTMLModule/Legacy.php +++ b/library/HTMLPurifier/HTMLModule/Legacy.php @@ -22,7 +22,6 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule // incomplete var $name = 'Legacy'; - var $type = 'define-redefine'; var $elements = array('u', 's', 'strike'); var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote'); diff --git a/library/HTMLPurifier/HTMLModule/List.php b/library/HTMLPurifier/HTMLModule/List.php index 73e57ef1..c74982df 100644 --- a/library/HTMLPurifier/HTMLModule/List.php +++ b/library/HTMLPurifier/HTMLModule/List.php @@ -9,7 +9,6 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule { var $name = 'List'; - var $type = 'define'; var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li'); var $info = array(); // According to the abstract schema, the List content set is a fully formed diff --git a/library/HTMLPurifier/HTMLModule/Presentation.php b/library/HTMLPurifier/HTMLModule/Presentation.php index f1b9f25d..42d9c11e 100644 --- a/library/HTMLPurifier/HTMLModule/Presentation.php +++ b/library/HTMLPurifier/HTMLModule/Presentation.php @@ -16,7 +16,6 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule { var $name = 'Presentation'; - var $type = 'define'; var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt'); var $info = array(); var $content_sets = array( diff --git a/library/HTMLPurifier/HTMLModule/StyleAttribute.php b/library/HTMLPurifier/HTMLModule/StyleAttribute.php index b06a337e..5ee5d1cf 100644 --- a/library/HTMLPurifier/HTMLModule/StyleAttribute.php +++ b/library/HTMLPurifier/HTMLModule/StyleAttribute.php @@ -11,7 +11,6 @@ class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule { var $name = 'StyleAttribute'; - var $type = 'define'; var $attr_collections = array( // The inclusion routine differs from the Abstract Modules but // is in line with the DTD and XML Schemas. diff --git a/library/HTMLPurifier/HTMLModule/Tables.php b/library/HTMLPurifier/HTMLModule/Tables.php index 9268be95..ea41f5b1 100644 --- a/library/HTMLPurifier/HTMLModule/Tables.php +++ b/library/HTMLPurifier/HTMLModule/Tables.php @@ -10,7 +10,6 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule { var $name = 'Tables'; - var $type = 'define'; var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col', 'colgroup', 'tbody', 'thead', 'tfoot'); var $info = array(); diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php index 8a07d9c8..56361a39 100644 --- a/library/HTMLPurifier/HTMLModule/Text.php +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -16,7 +16,6 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule { var $name = 'Text'; - var $type = 'define'; var $elements = array('abbr', 'acronym', 'address', 'blockquote', 'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3', diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php index b3e896af..5c75a0da 100644 --- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php +++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -11,7 +11,6 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule { var $name = 'TransformToStrict'; - var $type = 'redefine'; // we're actually modifying these elements, not defining them var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote'); diff --git a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php index e208aea7..0915f5b6 100644 --- a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php +++ b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php @@ -15,7 +15,6 @@ class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule { var $name = 'TransformToXHTML11'; - var $type = 'redefine'; var $attr_collections = array( 'Lang' => array( 'lang' => false // remove it diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index 5283d884..ba085383 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -31,6 +31,9 @@ class HTMLPurifier_HTMLModuleManager /** * Modules that may be used in a valid doctype of this kind. + * Correctional and leniency modules should not be placed in this + * array unless the user said so: don't stuff every possible lenient + * module for this doctype in here. */ var $validModules = array(); @@ -46,11 +49,14 @@ class HTMLPurifier_HTMLModuleManager var $doctype; /** - * Associative array of module class name to module order keywords or - * numbers (keyword is preferred, all keywords are resolved at beginning - * of setup()) + * Designates next available integer order for modules. */ - var $order = array(); + var $moduleCounter = 0; + + /** + * List of suffixes of collections to process + */ + var $collections = array('Safe', 'Unsafe', 'Lenient', 'Correctional'); /** * Associative array of module setup names to the corresponding safe @@ -72,9 +78,17 @@ class HTMLPurifier_HTMLModuleManager 'XHTML 1.1' => array(array('_Common')), ); + /** + * Modules that specify elements that are unsafe from untrusted + * third-parties. These should be registered in $validModules but + * almost never $activeModules unless you really know what you're + * doing. + */ + var $collectionsUnsafe = array( ); + /** * Modules to import if lenient mode (attempt to convert everything - * to a valid representation) is on. These must not be in activeModules + * to a valid representation) is on. These must not be in $activeModules * unless specified so. */ var $collectionsLenient = array( @@ -85,7 +99,7 @@ class HTMLPurifier_HTMLModuleManager /** * Modules to import if correctional mode (correct everything that - * is feasible to strict mode) is on. These must not be in activeModules + * is feasible to strict mode) is on. These must not be in $activeModules * unless specified so. */ var $collectionsCorrectional = array( @@ -152,62 +166,36 @@ class HTMLPurifier_HTMLModuleManager } } if (!class_exists($module)) { - trigger_error($original_module . ' module does not exist', E_USER_ERROR); + trigger_error($original_module . ' module does not exist', + E_USER_ERROR); return; } $module = new $module(); } - $order = $module->type; - if (!isset($this->orderKeywords[$order])) { - trigger_error('Order keyword does not exist', E_USER_ERROR); - return; - } + $module->order = $this->moduleCounter++; // assign then increment $this->modules[$module->name] = $module; - $this->order[$module->name] = $order; } function setup($config) { // retrieve the doctype $this->doctype = $this->getDoctype($config); - // substitute out the order keywords - foreach ($this->order as $name => $order) { - if (empty($this->modules[$name])) { - trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR); - return; - } - if (is_int($order)) continue; - if (empty($this->orderKeywords[$order])) { - trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR); - return; - } - $this->order[$name] = $this->orderKeywords[$order]; + // process module collections to module name => module instance form + foreach ($this->collections as $suffix) { + $varname = 'collections' . $suffix; + $this->processCollections($this->$varname); } - // sort modules member variable - array_multisort( - $this->order, SORT_ASC, SORT_NUMERIC, - $this->modules - ); - - // process module collections to module name => module instance form - $this->processCollections($this->collectionsSafe); - $this->processCollections($this->collectionsLenient); - $this->processCollections($this->collectionsCorrectional); + // $collections variable in following instances will be dynamically + // generated once we figure out some config variables // setup the validModules array - if (isset($this->collectionsSafe[$this->doctype])) { - $this->validModules += $this->collectionsSafe[$this->doctype]; - } - if (isset($this->collectionsLenient[$this->doctype])) { - $this->validModules += $this->collectionsLenient[$this->doctype]; - } - if (isset($this->collectionsCorrectional[$this->doctype])) { - $this->validModules += $this->collectionsCorrectional[$this->doctype]; - } + $collections = array('Safe', 'Unsafe', 'Lenient', 'Correctional'); + $this->validModules = $this->assembleModules($collections); // setup the activeModules array - $this->activeModules = $this->validModules; // unimplemented! + $collections = array('Safe', 'Lenient', 'Correctional'); + $this->activeModules = $this->assembleModules($collections); // setup lookup table based on all valid modules foreach ($this->validModules as $module) { @@ -221,15 +209,55 @@ class HTMLPurifier_HTMLModuleManager // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( + // content models that contain non-allowed elements are + // harmless because RemoveForeignElements will ensure + // they never get in anyway, and there is usually no + // reason why you should want to restrict a content + // model beyond what is mandated by the doctype. + // Note, however, that this means redefinitions of + // content models can't be tossed in validModels willy-nilly: + // that stuff still is regulated by configuration. $this->validModules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, + // only explicitly allowed modules are allowed to affect + // the global attribute collections. This mean's there's + // a distinction between loading the Bdo module, and the + // bdo element: Bdo will enable the dir attribute on all + // elements, while bdo will only define the bdo element, + // which will not have an editable directionality. This might + // catch people who are loading only elements by surprise, so + // we should consider loading an entire module if all the + // elements it defines are requested by the user, especially + // if it affects the global attribute collections. $this->activeModules ); } + /** + * Takes a list of collections and merges together all the defined + * modules for the current doctype from those collections. + * @param $collections List of collection suffixes we should grab + * modules from (like 'Safe' or 'Lenient') + */ + function assembleModules($collections) { + $modules = array(); + foreach ($collections as $suffix) { + $varname = 'collections' . $suffix; + $cols = $this->$varname; + if (!empty($cols[$this->doctype])) { + $modules += $cols[$this->doctype]; + } + } + return $modules; + } + + /** + * Takes a collection and performs inclusions and substitutions for it. + * @param $cols Reference to collections class member variable + */ function processCollections(&$cols) { // $cols is the set of collections @@ -258,11 +286,16 @@ class HTMLPurifier_HTMLModuleManager // assoc array of module name to module instance foreach ($cols as $col_i => $col) { if (is_string($col)) continue; + $order = array(); foreach ($col as $module_i => $module) { unset($cols[$col_i][$module_i]); $module = $this->modules[$module]; $cols[$col_i][$module->name] = $module; + $order[$module->name] = $module->order; } + array_multisort( + $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i] + ); } // hook up aliases @@ -278,18 +311,30 @@ class HTMLPurifier_HTMLModuleManager } + /** + * Retrieves the doctype from the configuration object + */ function getDoctype($config) { - // get rid of this later - if ($config->get('HTML', 'Strict')) { - $doctype = 'XHTML 1.0 Strict'; + if ($config->get('Core', 'XHTML')) { + $doctype = 'XHTML 1.0'; } else { - $doctype = 'XHTML 1.0 Transitional'; + $doctype = 'HTML 4.01'; + } + if ($config->get('HTML', 'Strict')) { + $doctype .= ' Strict'; + } else { + $doctype .= ' Transitional'; } return $doctype; } /** - * @param $config + * Retrieves merged element definitions for all active elements. + * @note We may want to generate an elements array during setup + * and pass that on, because a specific combination of + * elements may trigger the loading of a module. + * @param $config Instance of HTMLPurifier_Config, for determining + * stray elements. */ function getElements($config) { @@ -300,10 +345,17 @@ class HTMLPurifier_HTMLModuleManager } } + // standalone elements now loaded + return $elements; } + /** + * Retrieves a single merged element definition + * @param $name Name of element + * @param $config Instance of HTMLPurifier_Config, may not be necessary. + */ function getElement($name, $config) { $def = false; @@ -316,9 +368,6 @@ class HTMLPurifier_HTMLModuleManager foreach($this->elementModuleLookup[$name] as $module_name) { - // oops, we can't use that module at all - if (!isset($modules[$module_name])) continue; - $module = $modules[$module_name]; $new_def = $module->info[$name]; @@ -327,6 +376,9 @@ class HTMLPurifier_HTMLModuleManager } elseif ($def) { $def->mergeIn($new_def); } else { + // could have save it for another day functionality: + // non-standalone definitions that don't have a standalone + // to merge into could be deferred to the end continue; } @@ -350,20 +402,6 @@ class HTMLPurifier_HTMLModuleManager } - /** - * Retrieves full child definition for child, for the parent. Parent - * is a special case because it may not be allowed in the document. - */ - function getFullChildDef($element, $config) { - $def = $this->getElement($element, $config); - if ($def === false) { - trigger_error('Cannot get child def of element not available in doctype', - E_USER_ERROR); - return false; - } - return $def->child; - } - } ?> \ No newline at end of file