0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 16:31:53 +00:00

[1.7.0] Implement HTMLDefinition cache (very hacked together, but long unit test times were driving me crazy!)

- Add extra protection in AttrDef_URI against phantom Schemes
- Doctype moved from config to HTMLDefinition
- AttrDef_URITest mocks have more generic object parameters to deal with PHP4's copy-happy behavior

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1089 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-05-23 03:27:36 +00:00
parent 7579932948
commit e180b7689e
9 changed files with 88 additions and 37 deletions

3
NEWS
View File

@ -27,6 +27,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. AttrTypes now has accessor functions that should be used instead
of directly manipulating info
. TagTransform_Center deprecated in favor of generic TagTransform_Simple
. Add extra protection in AttrDef_URI against phantom Schemes
. Doctype object added to HTMLDefinition which describes certain aspects
of the operational document type
1.6.1, released 2007-05-05
! Support for more deprecated attributes via transformations:

11
TODO
View File

@ -11,14 +11,15 @@ TODO List
# Complete advanced API, and fully document it
- Add framework for unsafe attributes
- Reorganize configuration directives
- Set up doctype object inside configuration object
- Set up anonymous module management by HTMLDefinition
# Implement HTMLDefinition caching using serialize
- Set up anonymous module management by HTMLDefinition (Advanced API)
- Get all AttrTypes into string form
# Clean up HTMLDefinition caching, need easy cache invalidation,
versioning of caches, etc.
# Implement all deprecated tags and attributes
# Create parsing/standards compliance smoketest
# Reorganize Unit Tests
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
do this earlier)
- Refactor loop tests (esp. AttrDef_URI)
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
? HTML interface for tweaking configuration to see changes
1.8 release [Refactor, refactor!]

View File

@ -158,6 +158,14 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
);
}
// something funky weird happened in the registry, abort!
if (!$scheme_obj) {
trigger_error(
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
E_USER_WARNING
);
return false;
}
// the URI we're processing embeds_resource a resource in the page, but the URI
// it references cannot be located

View File

@ -53,11 +53,6 @@ class HTMLPurifier_Config
*/
var $autoFinalize = true;
/**
* Instance of HTMLPurifier_Doctype, representing current doctype
*/
var $doctype;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
@ -201,10 +196,17 @@ class HTMLPurifier_Config
empty($this->html_definition) || // hasn't ever been setup
($raw && $this->html_definition->setup) // requesting new one
) {
if (!$raw) {
$this->html_definition = HTMLPurifier_HTMLDefinition::getCache($this);
if ($this->html_definition) return $this->html_definition;
}
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
if ($raw) return $this->html_definition; // no setup!
}
if (!$this->html_definition->setup) $this->html_definition->setup();
if (!$this->html_definition->setup) {
$this->html_definition->setup();
$this->html_definition->saveCache($this);
}
return $this->html_definition;
}
@ -243,16 +245,6 @@ class HTMLPurifier_Config
}
}
/**
* Returns the current doctype object
*/
function getDoctype() {
if (!$this->doctype) {
$this->getHTMLDefinition();
}
return $this->doctype;
}
/**
* Loads configuration values from an ini file
* @param $filename Name of ini file

View File

@ -77,7 +77,8 @@ class HTMLPurifier_DoctypeRegistry
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
$null = null; return $null;
$anon = new HTMLPurifier_Doctype($doctype);
return $anon;
}
return $this->doctypes[$doctype];
}
@ -93,7 +94,6 @@ class HTMLPurifier_DoctypeRegistry
function make($config) {
$original_doctype = $this->get($this->getDoctypeFromConfig($config));
$doctype = $original_doctype->copy();
$config->doctype = $doctype;
return $doctype;
}

View File

@ -71,10 +71,8 @@ class HTMLPurifier_Generator
if (!$config) $config = HTMLPurifier_Config::createDefault();
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
$doctype = $config->getDoctype();
$this->_xhtml = $doctype->xml;
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
if (!$tokens) return '';
for ($i = 0, $size = count($tokens); $i < $size; $i++) {

View File

@ -147,6 +147,10 @@ class HTMLPurifier_HTMLDefinition
*/
var $info_content_sets = array();
/**
* Doctype object
*/
var $doctype;
/** PUBLIC BUT INTERNAL VARIABLES */
@ -160,11 +164,49 @@ class HTMLPurifier_HTMLDefinition
* Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config
*/
function HTMLPurifier_HTMLDefinition(&$config) {
$this->config =& $config;
function HTMLPurifier_HTMLDefinition($config) {
$this->config = $config;
$this->manager = new HTMLPurifier_HTMLModuleManager();
}
/**
* Retrieve definition object from cache
*/
function getCache($config) {
static $cache = array();
$file = HTMLPurifier_HTMLDefinition::getCacheFile($config);
if (isset($cache[$file])) return $cache[$file]; // unit test optimization
if (!file_exists($file)) return false;
$cache[$file] = unserialize(file_get_contents($file));
return $cache[$file];
}
/**
* Determines a cache key identifier for a particular configuration
*/
function getCacheKey($config) {
return md5(serialize(array($config->getBatch('HTML'), $config->getBatch('Attr'))));
}
/**
* Determines file a particular configuration's definition is stored in
*/
function getCacheFile($config) {
$key = HTMLPurifier_HTMLDefinition::getCacheKey($config);
return dirname(__FILE__) . '/HTMLDefinition/' . $key . '.ser';
}
/**
* Saves HTMLDefinition to cache
*/
function saveCache($config) {
$file = $this->getCacheFile($config);
$contents = serialize($this);
$fh = fopen($file, 'w');
fwrite($fh, $contents);
fclose($fh);
}
/**
* Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not
@ -190,6 +232,7 @@ class HTMLPurifier_HTMLDefinition
function processModules() {
$this->manager->setup($this->config);
$this->doctype = $this->manager->doctype;
foreach ($this->manager->modules as $module) {
foreach($module->info_tag_transform as $k => $v) {

View File

@ -99,6 +99,12 @@ class HTMLPurifier_HTMLModuleManager
*/
var $doctypes;
/**
* Instance of current doctype
* @public
*/
var $doctype;
/**
* Instance of HTMLPurifier_AttrTypes
* @public
@ -288,8 +294,8 @@ class HTMLPurifier_HTMLModuleManager
$this->trusted = $config->get('HTML', 'Trusted');
// generate
$doctype = $this->doctypes->make($config);
$modules = $doctype->modules;
$this->doctype = $this->doctypes->make($config);
$modules = $this->doctype->modules;
// take out the default modules that aren't allowed
$lookup = $config->get('HTML', 'AllowedModules');
@ -309,7 +315,7 @@ class HTMLPurifier_HTMLModuleManager
$this->processModule($module);
}
foreach ($doctype->tidyModules as $module) {
foreach ($this->doctype->tidyModules as $module) {
$this->processModule($module);
if (method_exists($this->modules[$module], 'construct')) {
$this->modules[$module]->construct($config);

View File

@ -209,17 +209,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
$this->scheme = new HTMLPurifier_URISchemeMock($this);
// here are the schemes we will support with overloaded mocks
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context));
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context));
$registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*'));
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*'));
// default return value is false (meaning no scheme defined: reject)
$registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context));
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
if ($this->components === false) {
$this->scheme->expectNever('validateComponents');
} else {
$this->components[] = $this->config; // append the configuration
$this->components[] =& $this->context; // append context
$this->components[] = '*'; // append the configuration
$this->components[] = '*'; // append context
$this->scheme->setReturnValue(
'validateComponents', $this->return_components, $this->components);
$this->scheme->expectOnce('validateComponents', $this->components);