array( // leading _ indicates private 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute' ), // HTML definitions, defer completely to XHTML definitions 'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional', 'HTML 4.01 Strict' => 'XHTML 1.0 Strict', // XHTML definitions 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ), 'XHTML 1.0 Strict' => array(array('_Common')), 'XHTML 1.1' => array(array('_Common')), ); /** * Modules that specify elements that are unsafe from untrusted * third-parties. These should be registered in $validModules but * almost never $activeModules unless you really know what you're * doing. */ var $collectionsUnsafe = array( ); /** * Modules to import if lenient mode (attempt to convert everything * to a valid representation) is on. These must not be in $activeModules * unless specified so. */ var $collectionsLenient = array( 'HTML 4.01 Strict' => 'XHTML 1.0 Strict', 'XHTML 1.0 Strict' => array('TransformToStrict'), 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11') ); /** * Modules to import if correctional mode (correct everything that * is feasible to strict mode) is on. These must not be in $activeModules * unless specified so. */ var $collectionsCorrectional = array( 'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional', 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one ); /** Associative array of element name to defining modules (always array) */ var $elementModuleLookup = array(); /** List of prefixes we should use for resolving small names */ var $prefixes = array('HTMLPurifier_HTMLModule_'); var $contentSets; /**< Instance of HTMLPurifier_ContentSets */ var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */ var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ function HTMLPurifier_HTMLModuleManager() { // modules $modules = array( // define 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // define-redefine 'Legacy', // redefine 'TransformToStrict', 'TransformToXHTML11' ); foreach ($modules as $module) { $this->addModule($module); } // the only editable internal object. The rest need to // be manipulated through modules $this->attrTypes = new HTMLPurifier_AttrTypes(); } /** * Adds a module to the ordered list. * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. */ function addModule($module) { if (is_string($module)) { $original_module = $module; if (!class_exists($module)) { foreach ($this->prefixes as $prefix) { $module = $prefix . $original_module; if (class_exists($module)) break; } } if (!class_exists($module)) { trigger_error($original_module . ' module does not exist', E_USER_ERROR); return; } $module = new $module(); } $module->order = $this->moduleCounter++; // assign then increment $this->modules[$module->name] = $module; } /** * Adds a class prefix that addModule() will use to resolve a * string name to a concrete class */ function addPrefix($prefix) { $this->prefixes[] = (string) $prefix; } function setup($config) { // retrieve the doctype $this->doctype = $this->getDoctype($config); // process module collections to module name => module instance form foreach ($this->collections as $suffix) { $varname = 'collections' . $suffix; $this->processCollections($this->$varname); } $this->validModules = $this->assembleModules($this->validModulesCollections); $this->activeModules = $this->assembleModules($this->activeModulesCollections); // setup lookup table based on all valid modules foreach ($this->validModules as $module) { foreach ($module->elements as $name) { if (!isset($this->elementModuleLookup[$name])) { $this->elementModuleLookup[$name] = array(); } $this->elementModuleLookup[$name][] = $module->name; } } // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( // content models that contain non-allowed elements are // harmless because RemoveForeignElements will ensure // they never get in anyway, and there is usually no // reason why you should want to restrict a content // model beyond what is mandated by the doctype. // Note, however, that this means redefinitions of // content models can't be tossed in validModels willy-nilly: // that stuff still is regulated by configuration. $this->validModules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, // only explicitly allowed modules are allowed to affect // the global attribute collections. This mean's there's // a distinction between loading the Bdo module, and the // bdo element: Bdo will enable the dir attribute on all // elements, while bdo will only define the bdo element, // which will not have an editable directionality. This might // catch people who are loading only elements by surprise, so // we should consider loading an entire module if all the // elements it defines are requested by the user, especially // if it affects the global attribute collections. $this->activeModules ); } /** * Takes a list of collections and merges together all the defined * modules for the current doctype from those collections. * @param $collections List of collection suffixes we should grab * modules from (like 'Safe' or 'Lenient') */ function assembleModules($collections) { $modules = array(); foreach ($collections as $suffix) { $varname = 'collections' . $suffix; $cols = $this->$varname; if (!empty($cols[$this->doctype])) { $modules += $cols[$this->doctype]; } } return $modules; } /** * Takes a collection and performs inclusions and substitutions for it. * @param $cols Reference to collections class member variable */ function processCollections(&$cols) { // $cols is the set of collections // $col_i is the name (index) of a collection // $col is a collection/list of modules // perform inclusions foreach ($cols as $col_i => $col) { if (is_string($col)) continue; // alias, save for later if (empty($col[0]) || !is_array($col[0])) continue; // no inclusions to do $seen = array($col_i => true); // recursion reporting $includes = $col[0]; unset($cols[$col_i][0]); // remove inclusions value, recursion guard for ($i = 0; isset($includes[$i]); $i++) { $inc = $includes[$i]; if (isset($seen[$inc])) { trigger_error( "Circular inclusion detected in $col_i collection", E_USER_ERROR ); continue; } else { $seen[$inc] = true; } if (!isset($cols[$inc])) { trigger_error( "Collection $col_i tried to include undefined ". "collection $inc", E_USER_ERROR); continue; } foreach ($cols[$inc] as $module) { if (is_array($module)) { // another inclusion! foreach ($module as $inc2) $includes[] = $inc2; continue; } $cols[$col_i][] = $module; // merge in the other modules } } } // replace with real modules, invert module from list to // assoc array of module name to module instance foreach ($cols as $col_i => $col) { if (is_string($col)) continue; $order = array(); foreach ($col as $module_i => $module) { unset($cols[$col_i][$module_i]); if (is_array($module)) { trigger_error("Illegal inclusion array at index". " $module_i found collection $col_i, inclusion". " arrays must be at start of collection (index 0)", E_USER_ERROR); continue; } if (!isset($this->modules[$module])) { trigger_error( "Collection $col_i references undefined ". "module $module", E_USER_ERROR ); continue; } $module = $this->modules[$module]; $cols[$col_i][$module->name] = $module; $order[$module->name] = $module->order; } array_multisort( $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i] ); } // hook up aliases foreach ($cols as $col_i => $col) { if (!is_string($col)) continue; if (!isset($cols[$col])) { trigger_error( "$col_i collection is alias to undefined $col collection", E_USER_ERROR ); unset($cols[$col_i]); continue; } // recursion guard if (is_string($cols[$col])) { trigger_error( "Cannot alias $col_i collection to alias", E_USER_ERROR ); unset($cols[$col_i]); continue; } $cols[$col_i] = $cols[$col]; } // delete pseudo-collections foreach ($cols as $col_i => $col) { if ($col_i[0] == '_') unset($cols[$col_i]); } } /** * Retrieves the doctype from the configuration object */ function getDoctype($config) { if ($config->get('Core', 'XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; } if ($config->get('HTML', 'Strict')) { $doctype .= ' Strict'; } else { $doctype .= ' Transitional'; } return $doctype; } /** * Retrieves merged element definitions for all active elements. * @note We may want to generate an elements array during setup * and pass that on, because a specific combination of * elements may trigger the loading of a module. * @param $config Instance of HTMLPurifier_Config, for determining * stray elements. */ function getElements($config) { $elements = array(); foreach ($this->activeModules as $module) { foreach ($module->elements as $name) { $elements[$name] = $this->getElement($name, $config); } } // standalone elements now loaded return $elements; } /** * Retrieves a single merged element definition * @param $name Name of element * @param $config Instance of HTMLPurifier_Config, may not be necessary. */ function getElement($name, $config) { $def = false; $modules = $this->validModules; if (!isset($this->elementModuleLookup[$name])) { return false; } foreach($this->elementModuleLookup[$name] as $module_name) { $module = $modules[$module_name]; $new_def = $module->info[$name]; if (!$def && $new_def->standalone) { $def = $new_def; } elseif ($def) { $def->mergeIn($new_def); } else { // could have save it for another day functionality: // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end continue; } // attribute value expansions $this->attrCollections->performInclusions($def->attr); $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); // descendants_are_inline, for ChildDef_Chameleon if (is_string($def->content_model) && strpos($def->content_model, 'Inline') !== false) { if ($name != 'del' && $name != 'ins') { // this is for you, ins/del $def->descendants_are_inline = true; } } $this->contentSets->generateChildDef($def, $module); } return $def; } } ?>