diff --git a/NEWS b/NEWS index d55dcf17..6cad4514 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . AttrTypes now has accessor functions that should be used instead of directly manipulating info . TagTransform_Center deprecated in favor of generic TagTransform_Simple +. Add extra protection in AttrDef_URI against phantom Schemes +. Doctype object added to HTMLDefinition which describes certain aspects + of the operational document type 1.6.1, released 2007-05-05 ! Support for more deprecated attributes via transformations: diff --git a/TODO b/TODO index f65c0e23..acf2e7d8 100644 --- a/TODO +++ b/TODO @@ -11,14 +11,15 @@ TODO List # Complete advanced API, and fully document it - Add framework for unsafe attributes - Reorganize configuration directives - - Set up doctype object inside configuration object - - Set up anonymous module management by HTMLDefinition - # Implement HTMLDefinition caching using serialize + - Set up anonymous module management by HTMLDefinition (Advanced API) + - Get all AttrTypes into string form + # Clean up HTMLDefinition caching, need easy cache invalidation, + versioning of caches, etc. # Implement all deprecated tags and attributes # Create parsing/standards compliance smoketest # Reorganize Unit Tests - - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly - do this earlier) + - Refactor loop tests (esp. AttrDef_URI) + - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists ? HTML interface for tweaking configuration to see changes 1.8 release [Refactor, refactor!] diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index fdaa35bb..ca214980 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -158,6 +158,14 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef ); } + // something funky weird happened in the registry, abort! + if (!$scheme_obj) { + trigger_error( + 'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable', + E_USER_WARNING + ); + return false; + } // the URI we're processing embeds_resource a resource in the page, but the URI // it references cannot be located diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 373123e8..10db8245 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -53,11 +53,6 @@ class HTMLPurifier_Config */ var $autoFinalize = true; - /** - * Instance of HTMLPurifier_Doctype, representing current doctype - */ - var $doctype; - /** * @param $definition HTMLPurifier_ConfigSchema that defines what directives * are allowed. @@ -201,10 +196,17 @@ class HTMLPurifier_Config empty($this->html_definition) || // hasn't ever been setup ($raw && $this->html_definition->setup) // requesting new one ) { + if (!$raw) { + $this->html_definition = HTMLPurifier_HTMLDefinition::getCache($this); + if ($this->html_definition) return $this->html_definition; + } $this->html_definition = new HTMLPurifier_HTMLDefinition($this); if ($raw) return $this->html_definition; // no setup! } - if (!$this->html_definition->setup) $this->html_definition->setup(); + if (!$this->html_definition->setup) { + $this->html_definition->setup(); + $this->html_definition->saveCache($this); + } return $this->html_definition; } @@ -243,16 +245,6 @@ class HTMLPurifier_Config } } - /** - * Returns the current doctype object - */ - function getDoctype() { - if (!$this->doctype) { - $this->getHTMLDefinition(); - } - return $this->doctype; - } - /** * Loads configuration values from an ini file * @param $filename Name of ini file diff --git a/library/HTMLPurifier/DoctypeRegistry.php b/library/HTMLPurifier/DoctypeRegistry.php index 40dc4cc3..57ccd506 100644 --- a/library/HTMLPurifier/DoctypeRegistry.php +++ b/library/HTMLPurifier/DoctypeRegistry.php @@ -77,7 +77,8 @@ class HTMLPurifier_DoctypeRegistry if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; if (!isset($this->doctypes[$doctype])) { trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist'); - $null = null; return $null; + $anon = new HTMLPurifier_Doctype($doctype); + return $anon; } return $this->doctypes[$doctype]; } @@ -93,7 +94,6 @@ class HTMLPurifier_DoctypeRegistry function make($config) { $original_doctype = $this->get($this->getDoctypeFromConfig($config)); $doctype = $original_doctype->copy(); - $config->doctype = $doctype; return $doctype; } diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index c2a723d5..63750e9c 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -71,10 +71,8 @@ class HTMLPurifier_Generator if (!$config) $config = HTMLPurifier_Config::createDefault(); $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); - $doctype = $config->getDoctype(); - $this->_xhtml = $doctype->xml; - $this->_def = $config->getHTMLDefinition(); + $this->_xhtml = $this->_def->doctype->xml; if (!$tokens) return ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 4a1d5a04..d75be453 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -147,6 +147,10 @@ class HTMLPurifier_HTMLDefinition */ var $info_content_sets = array(); + /** + * Doctype object + */ + var $doctype; /** PUBLIC BUT INTERNAL VARIABLES */ @@ -160,11 +164,49 @@ class HTMLPurifier_HTMLDefinition * Performs low-cost, preliminary initialization. * @param $config Instance of HTMLPurifier_Config */ - function HTMLPurifier_HTMLDefinition(&$config) { - $this->config =& $config; + function HTMLPurifier_HTMLDefinition($config) { + $this->config = $config; $this->manager = new HTMLPurifier_HTMLModuleManager(); } + /** + * Retrieve definition object from cache + */ + function getCache($config) { + static $cache = array(); + $file = HTMLPurifier_HTMLDefinition::getCacheFile($config); + if (isset($cache[$file])) return $cache[$file]; // unit test optimization + if (!file_exists($file)) return false; + $cache[$file] = unserialize(file_get_contents($file)); + return $cache[$file]; + } + + /** + * Determines a cache key identifier for a particular configuration + */ + function getCacheKey($config) { + return md5(serialize(array($config->getBatch('HTML'), $config->getBatch('Attr')))); + } + + /** + * Determines file a particular configuration's definition is stored in + */ + function getCacheFile($config) { + $key = HTMLPurifier_HTMLDefinition::getCacheKey($config); + return dirname(__FILE__) . '/HTMLDefinition/' . $key . '.ser'; + } + + /** + * Saves HTMLDefinition to cache + */ + function saveCache($config) { + $file = $this->getCacheFile($config); + $contents = serialize($this); + $fh = fopen($file, 'w'); + fwrite($fh, $contents); + fclose($fh); + } + /** * Processes internals into form usable by HTMLPurifier internals. * Modifying the definition after calling this function should not @@ -190,6 +232,7 @@ class HTMLPurifier_HTMLDefinition function processModules() { $this->manager->setup($this->config); + $this->doctype = $this->manager->doctype; foreach ($this->manager->modules as $module) { foreach($module->info_tag_transform as $k => $v) { diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index cad46136..4bfc741a 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -99,6 +99,12 @@ class HTMLPurifier_HTMLModuleManager */ var $doctypes; + /** + * Instance of current doctype + * @public + */ + var $doctype; + /** * Instance of HTMLPurifier_AttrTypes * @public @@ -288,8 +294,8 @@ class HTMLPurifier_HTMLModuleManager $this->trusted = $config->get('HTML', 'Trusted'); // generate - $doctype = $this->doctypes->make($config); - $modules = $doctype->modules; + $this->doctype = $this->doctypes->make($config); + $modules = $this->doctype->modules; // take out the default modules that aren't allowed $lookup = $config->get('HTML', 'AllowedModules'); @@ -309,7 +315,7 @@ class HTMLPurifier_HTMLModuleManager $this->processModule($module); } - foreach ($doctype->tidyModules as $module) { + foreach ($this->doctype->tidyModules as $module) { $this->processModule($module); if (method_exists($this->modules[$module], 'construct')) { $this->modules[$module]->construct($config); diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 8f035e32..e6aab057 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -209,17 +209,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $this->scheme = new HTMLPurifier_URISchemeMock($this); // here are the schemes we will support with overloaded mocks - $registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context)); - $registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context)); + $registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*')); + $registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*')); // default return value is false (meaning no scheme defined: reject) - $registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context)); + $registry->setReturnValue('getScheme', false, array('*', '*', '*')); if ($this->components === false) { $this->scheme->expectNever('validateComponents'); } else { - $this->components[] = $this->config; // append the configuration - $this->components[] =& $this->context; // append context + $this->components[] = '*'; // append the configuration + $this->components[] = '*'; // append context $this->scheme->setReturnValue( 'validateComponents', $this->return_components, $this->components); $this->scheme->expectOnce('validateComponents', $this->components); @@ -325,4 +325,4 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } -?> \ No newline at end of file +?>