0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00

[1.7.0] Implement HTMLDefinition cache (very hacked together, but long unit test times were driving me crazy!)

- Add extra protection in AttrDef_URI against phantom Schemes
- Doctype moved from config to HTMLDefinition
- AttrDef_URITest mocks have more generic object parameters to deal with PHP4's copy-happy behavior

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1089 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-05-23 03:27:36 +00:00
parent 7579932948
commit e180b7689e
9 changed files with 88 additions and 37 deletions

3
NEWS
View File

@ -27,6 +27,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. AttrTypes now has accessor functions that should be used instead . AttrTypes now has accessor functions that should be used instead
of directly manipulating info of directly manipulating info
. TagTransform_Center deprecated in favor of generic TagTransform_Simple . TagTransform_Center deprecated in favor of generic TagTransform_Simple
. Add extra protection in AttrDef_URI against phantom Schemes
. Doctype object added to HTMLDefinition which describes certain aspects
of the operational document type
1.6.1, released 2007-05-05 1.6.1, released 2007-05-05
! Support for more deprecated attributes via transformations: ! Support for more deprecated attributes via transformations:

11
TODO
View File

@ -11,14 +11,15 @@ TODO List
# Complete advanced API, and fully document it # Complete advanced API, and fully document it
- Add framework for unsafe attributes - Add framework for unsafe attributes
- Reorganize configuration directives - Reorganize configuration directives
- Set up doctype object inside configuration object - Set up anonymous module management by HTMLDefinition (Advanced API)
- Set up anonymous module management by HTMLDefinition - Get all AttrTypes into string form
# Implement HTMLDefinition caching using serialize # Clean up HTMLDefinition caching, need easy cache invalidation,
versioning of caches, etc.
# Implement all deprecated tags and attributes # Implement all deprecated tags and attributes
# Create parsing/standards compliance smoketest # Create parsing/standards compliance smoketest
# Reorganize Unit Tests # Reorganize Unit Tests
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly - Refactor loop tests (esp. AttrDef_URI)
do this earlier) - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
? HTML interface for tweaking configuration to see changes ? HTML interface for tweaking configuration to see changes
1.8 release [Refactor, refactor!] 1.8 release [Refactor, refactor!]

View File

@ -158,6 +158,14 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
); );
} }
// something funky weird happened in the registry, abort!
if (!$scheme_obj) {
trigger_error(
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
E_USER_WARNING
);
return false;
}
// the URI we're processing embeds_resource a resource in the page, but the URI // the URI we're processing embeds_resource a resource in the page, but the URI
// it references cannot be located // it references cannot be located

View File

@ -53,11 +53,6 @@ class HTMLPurifier_Config
*/ */
var $autoFinalize = true; var $autoFinalize = true;
/**
* Instance of HTMLPurifier_Doctype, representing current doctype
*/
var $doctype;
/** /**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives * @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed. * are allowed.
@ -201,10 +196,17 @@ class HTMLPurifier_Config
empty($this->html_definition) || // hasn't ever been setup empty($this->html_definition) || // hasn't ever been setup
($raw && $this->html_definition->setup) // requesting new one ($raw && $this->html_definition->setup) // requesting new one
) { ) {
if (!$raw) {
$this->html_definition = HTMLPurifier_HTMLDefinition::getCache($this);
if ($this->html_definition) return $this->html_definition;
}
$this->html_definition = new HTMLPurifier_HTMLDefinition($this); $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
if ($raw) return $this->html_definition; // no setup! if ($raw) return $this->html_definition; // no setup!
} }
if (!$this->html_definition->setup) $this->html_definition->setup(); if (!$this->html_definition->setup) {
$this->html_definition->setup();
$this->html_definition->saveCache($this);
}
return $this->html_definition; return $this->html_definition;
} }
@ -243,16 +245,6 @@ class HTMLPurifier_Config
} }
} }
/**
* Returns the current doctype object
*/
function getDoctype() {
if (!$this->doctype) {
$this->getHTMLDefinition();
}
return $this->doctype;
}
/** /**
* Loads configuration values from an ini file * Loads configuration values from an ini file
* @param $filename Name of ini file * @param $filename Name of ini file

View File

@ -77,7 +77,8 @@ class HTMLPurifier_DoctypeRegistry
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
if (!isset($this->doctypes[$doctype])) { if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist'); trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
$null = null; return $null; $anon = new HTMLPurifier_Doctype($doctype);
return $anon;
} }
return $this->doctypes[$doctype]; return $this->doctypes[$doctype];
} }
@ -93,7 +94,6 @@ class HTMLPurifier_DoctypeRegistry
function make($config) { function make($config) {
$original_doctype = $this->get($this->getDoctypeFromConfig($config)); $original_doctype = $this->get($this->getDoctypeFromConfig($config));
$doctype = $original_doctype->copy(); $doctype = $original_doctype->copy();
$config->doctype = $doctype;
return $doctype; return $doctype;
} }

View File

@ -71,10 +71,8 @@ class HTMLPurifier_Generator
if (!$config) $config = HTMLPurifier_Config::createDefault(); if (!$config) $config = HTMLPurifier_Config::createDefault();
$this->_scriptFix = $config->get('Output', 'CommentScriptContents'); $this->_scriptFix = $config->get('Output', 'CommentScriptContents');
$doctype = $config->getDoctype();
$this->_xhtml = $doctype->xml;
$this->_def = $config->getHTMLDefinition(); $this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
if (!$tokens) return ''; if (!$tokens) return '';
for ($i = 0, $size = count($tokens); $i < $size; $i++) { for ($i = 0, $size = count($tokens); $i < $size; $i++) {

View File

@ -147,6 +147,10 @@ class HTMLPurifier_HTMLDefinition
*/ */
var $info_content_sets = array(); var $info_content_sets = array();
/**
* Doctype object
*/
var $doctype;
/** PUBLIC BUT INTERNAL VARIABLES */ /** PUBLIC BUT INTERNAL VARIABLES */
@ -160,11 +164,49 @@ class HTMLPurifier_HTMLDefinition
* Performs low-cost, preliminary initialization. * Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function HTMLPurifier_HTMLDefinition(&$config) { function HTMLPurifier_HTMLDefinition($config) {
$this->config =& $config; $this->config = $config;
$this->manager = new HTMLPurifier_HTMLModuleManager(); $this->manager = new HTMLPurifier_HTMLModuleManager();
} }
/**
* Retrieve definition object from cache
*/
function getCache($config) {
static $cache = array();
$file = HTMLPurifier_HTMLDefinition::getCacheFile($config);
if (isset($cache[$file])) return $cache[$file]; // unit test optimization
if (!file_exists($file)) return false;
$cache[$file] = unserialize(file_get_contents($file));
return $cache[$file];
}
/**
* Determines a cache key identifier for a particular configuration
*/
function getCacheKey($config) {
return md5(serialize(array($config->getBatch('HTML'), $config->getBatch('Attr'))));
}
/**
* Determines file a particular configuration's definition is stored in
*/
function getCacheFile($config) {
$key = HTMLPurifier_HTMLDefinition::getCacheKey($config);
return dirname(__FILE__) . '/HTMLDefinition/' . $key . '.ser';
}
/**
* Saves HTMLDefinition to cache
*/
function saveCache($config) {
$file = $this->getCacheFile($config);
$contents = serialize($this);
$fh = fopen($file, 'w');
fwrite($fh, $contents);
fclose($fh);
}
/** /**
* Processes internals into form usable by HTMLPurifier internals. * Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not * Modifying the definition after calling this function should not
@ -190,6 +232,7 @@ class HTMLPurifier_HTMLDefinition
function processModules() { function processModules() {
$this->manager->setup($this->config); $this->manager->setup($this->config);
$this->doctype = $this->manager->doctype;
foreach ($this->manager->modules as $module) { foreach ($this->manager->modules as $module) {
foreach($module->info_tag_transform as $k => $v) { foreach($module->info_tag_transform as $k => $v) {

View File

@ -99,6 +99,12 @@ class HTMLPurifier_HTMLModuleManager
*/ */
var $doctypes; var $doctypes;
/**
* Instance of current doctype
* @public
*/
var $doctype;
/** /**
* Instance of HTMLPurifier_AttrTypes * Instance of HTMLPurifier_AttrTypes
* @public * @public
@ -288,8 +294,8 @@ class HTMLPurifier_HTMLModuleManager
$this->trusted = $config->get('HTML', 'Trusted'); $this->trusted = $config->get('HTML', 'Trusted');
// generate // generate
$doctype = $this->doctypes->make($config); $this->doctype = $this->doctypes->make($config);
$modules = $doctype->modules; $modules = $this->doctype->modules;
// take out the default modules that aren't allowed // take out the default modules that aren't allowed
$lookup = $config->get('HTML', 'AllowedModules'); $lookup = $config->get('HTML', 'AllowedModules');
@ -309,7 +315,7 @@ class HTMLPurifier_HTMLModuleManager
$this->processModule($module); $this->processModule($module);
} }
foreach ($doctype->tidyModules as $module) { foreach ($this->doctype->tidyModules as $module) {
$this->processModule($module); $this->processModule($module);
if (method_exists($this->modules[$module], 'construct')) { if (method_exists($this->modules[$module], 'construct')) {
$this->modules[$module]->construct($config); $this->modules[$module]->construct($config);

View File

@ -209,17 +209,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$this->scheme = new HTMLPurifier_URISchemeMock($this); $this->scheme = new HTMLPurifier_URISchemeMock($this);
// here are the schemes we will support with overloaded mocks // here are the schemes we will support with overloaded mocks
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context)); $registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*'));
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context)); $registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*'));
// default return value is false (meaning no scheme defined: reject) // default return value is false (meaning no scheme defined: reject)
$registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context)); $registry->setReturnValue('getScheme', false, array('*', '*', '*'));
if ($this->components === false) { if ($this->components === false) {
$this->scheme->expectNever('validateComponents'); $this->scheme->expectNever('validateComponents');
} else { } else {
$this->components[] = $this->config; // append the configuration $this->components[] = '*'; // append the configuration
$this->components[] =& $this->context; // append context $this->components[] = '*'; // append context
$this->scheme->setReturnValue( $this->scheme->setReturnValue(
'validateComponents', $this->return_components, $this->components); 'validateComponents', $this->return_components, $this->components);
$this->scheme->expectOnce('validateComponents', $this->components); $this->scheme->expectOnce('validateComponents', $this->components);