attrTypes = new HTMLPurifier_AttrTypes(); if (!$blank) $this->initialize(); } function initialize() { // load default modules to the recognized modules list (not active) $modules = array( // define 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // define-redefine 'Legacy', // redefine 'TransformToStrict', 'TransformToXHTML11' ); foreach ($modules as $module) { $this->addModule($module); } // Safe modules for supported doctypes. These are included // in the valid and active module lists by default $this->collections['Safe'] = array( '_Common' => array( // leading _ indicates private 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute' ), // HTML definitions, defer to XHTML definitions 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')), 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')), // XHTML definitions 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ), 'XHTML 1.0 Strict' => array(array('_Common')), 'XHTML 1.1' => array(array('_Common')), ); // Modules that specify elements that are unsafe from untrusted // third-parties. These should be registered in $validModules but // almost never $activeModules unless you really know what you're // doing. $this->collections['Unsafe'] = array(); // Modules to import if lenient mode (attempt to convert everything // to a valid representation) is on. These must not be in $activeModules // unless specified so. $this->collections['Lenient'] = array( 'HTML 4.01 Strict' => 'XHTML 1.0 Strict', 'XHTML 1.0 Strict' => array('TransformToStrict'), 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11') ); // Modules to import if correctional mode (correct everything that // is feasible to strict mode) is on. These must not be in $activeModules // unless specified so. $this->collections['Correctional'] = array( 'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional', 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one ); // setup active versus valid modules. ORDER IS IMPORTANT! // definition modules $this->makeCollectionActive('Safe'); $this->makeCollectionValid('Unsafe'); // redefinition modules $this->makeCollectionActive('Lenient'); $this->makeCollectionActive('Correctional'); } /** * Adds a module to the recognized module list. This does not * do anything else: the module must be added to a corresponding * collection to be "activated". * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. */ function addModule($module) { if (is_string($module)) { $original_module = $module; if (!class_exists($module)) { foreach ($this->prefixes as $prefix) { $module = $prefix . $original_module; if (class_exists($module)) break; } } if (!class_exists($module)) { trigger_error($original_module . ' module does not exist', E_USER_ERROR); return; } $module = new $module(); } $module->order = $this->counter++; // assign then increment $this->modules[$module->name] = $module; } /** * Makes a collection active, while also making it valid if not * already done so. See $activeModules for the semantics of "active". * @param $collection_name Name of collection to activate */ function makeCollectionActive($collection_name) { if (!in_array($collection_name, $this->validCollections)) { $this->makeCollectionValid($collection_name); } $this->activeCollections[] = $collection_name; } /** * Makes a collection valid. See $validModules for the semantics of "valid" */ function makeCollectionValid($collection_name) { $this->validCollections[] = $collection_name; } /** * Adds a class prefix that addModule() will use to resolve a * string name to a concrete class */ function addPrefix($prefix) { $this->prefixes[] = (string) $prefix; } function setup($config) { // retrieve the doctype $this->doctype = $this->getDoctype($config); if (isset($this->doctypeAliases[$this->doctype])) { $this->doctype = $this->doctypeAliases[$this->doctype]; } // process module collections to module name => module instance form foreach ($this->collections as $col_i => $x) { $this->processCollections($this->collections[$col_i]); } $this->validModules = $this->assembleModules($this->validCollections); $this->activeModules = $this->assembleModules($this->activeCollections); // setup lookup table based on all valid modules foreach ($this->validModules as $module) { foreach ($module->elements as $name) { if (!isset($this->elementLookup[$name])) { $this->elementLookup[$name] = array(); } $this->elementLookup[$name][] = $module->name; } } // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( // content models that contain non-allowed elements are // harmless because RemoveForeignElements will ensure // they never get in anyway, and there is usually no // reason why you should want to restrict a content // model beyond what is mandated by the doctype. // Note, however, that this means redefinitions of // content models can't be tossed in validModels willy-nilly: // that stuff still is regulated by configuration. $this->validModules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, // only explicitly allowed modules are allowed to affect // the global attribute collections. This mean's there's // a distinction between loading the Bdo module, and the // bdo element: Bdo will enable the dir attribute on all // elements, while bdo will only define the bdo element, // which will not have an editable directionality. This might // catch people who are loading only elements by surprise, so // we should consider loading an entire module if all the // elements it defines are requested by the user, especially // if it affects the global attribute collections. $this->activeModules ); } /** * Takes a list of collections and merges together all the defined * modules for the current doctype from those collections. * @param $collections List of collection suffixes we should grab * modules from (like 'Safe' or 'Lenient') */ function assembleModules($collections) { $modules = array(); $numOfCollectionsUsed = 0; foreach ($collections as $name) { $cols = $this->collections[$name]; if (isset($cols[$this->doctype])) { $modules += $cols[$this->doctype]; $numOfCollectionsUsed++; } } if ($numOfCollectionsUsed < 1) { // possible XSS injection if user-specified doctypes // are allowed trigger_error("Doctype {$this->doctype} does not exist, ". "check for typos (if you desire a doctype that allows ". "no elements, use an empty array collection)", E_USER_ERROR); } return $modules; } /** * Takes a collection and performs inclusions and substitutions for it. * @param $cols Reference to collections class member variable */ function processCollections(&$cols) { // $cols is the set of collections // $col_i is the name (index) of a collection // $col is a collection/list of modules // perform inclusions foreach ($cols as $col_i => $col) { if (is_string($col)) continue; // alias, save for later if (empty($col[0]) || !is_array($col[0])) continue; // no inclusions to do $seen = array($col_i => true); // recursion reporting $includes = $col[0]; unset($cols[$col_i][0]); // remove inclusions value, recursion guard for ($i = 0; isset($includes[$i]); $i++) { $inc = $includes[$i]; if (isset($seen[$inc])) { trigger_error( "Circular inclusion detected in $col_i collection", E_USER_ERROR ); continue; } else { $seen[$inc] = true; } if (!isset($cols[$inc])) { trigger_error( "Collection $col_i tried to include undefined ". "collection $inc", E_USER_ERROR); continue; } foreach ($cols[$inc] as $module) { if (is_array($module)) { // another inclusion! foreach ($module as $inc2) $includes[] = $inc2; continue; } $cols[$col_i][] = $module; // merge in the other modules } } } // replace with real modules, invert module from list to // assoc array of module name to module instance foreach ($cols as $col_i => $col) { if (is_string($col)) continue; $order = array(); foreach ($col as $module_i => $module) { unset($cols[$col_i][$module_i]); if (is_array($module)) { trigger_error("Illegal inclusion array at index". " $module_i found collection $col_i, inclusion". " arrays must be at start of collection (index 0)", E_USER_ERROR); continue; } if (!isset($this->modules[$module])) { trigger_error( "Collection $col_i references undefined ". "module $module", E_USER_ERROR ); continue; } $module = $this->modules[$module]; $cols[$col_i][$module->name] = $module; $order[$module->name] = $module->order; } array_multisort( $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i] ); } // delete pseudo-collections foreach ($cols as $col_i => $col) { if ($col_i[0] == '_') unset($cols[$col_i]); } } /** * Retrieves the doctype from the configuration object */ function getDoctype($config) { // this is BC if ($config->get('Core', 'XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; } if ($config->get('HTML', 'Strict')) { $doctype .= ' Strict'; } else { $doctype .= ' Transitional'; } return $doctype; } /** * Retrieves merged element definitions for all active elements. * @note We may want to generate an elements array during setup * and pass that on, because a specific combination of * elements may trigger the loading of a module. * @param $config Instance of HTMLPurifier_Config, for determining * stray elements. */ function getElements($config) { $elements = array(); foreach ($this->activeModules as $module) { foreach ($module->elements as $name) { $elements[$name] = $this->getElement($name, $config); } } // standalone elements now loaded return $elements; } /** * Retrieves a single merged element definition * @param $name Name of element * @param $config Instance of HTMLPurifier_Config, may not be necessary. */ function getElement($name, $config) { $def = false; $modules = $this->validModules; if (!isset($this->elementLookup[$name])) { return false; } foreach($this->elementLookup[$name] as $module_name) { $module = $modules[$module_name]; $new_def = $module->info[$name]; if (!$def && $new_def->standalone) { $def = $new_def; } elseif ($def) { $def->mergeIn($new_def); } else { // could "save it for another day": // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end continue; } // attribute value expansions $this->attrCollections->performInclusions($def->attr); $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); // descendants_are_inline, for ChildDef_Chameleon if (is_string($def->content_model) && strpos($def->content_model, 'Inline') !== false) { if ($name != 'del' && $name != 'ins') { // this is for you, ins/del $def->descendants_are_inline = true; } } $this->contentSets->generateChildDef($def, $module); } return $def; } } ?>