mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-23 00:41:52 +00:00
[1.7.0] Implement HTMLDefinition cache (very hacked together, but long unit test times were driving me crazy!)
- Add extra protection in AttrDef_URI against phantom Schemes - Doctype moved from config to HTMLDefinition - AttrDef_URITest mocks have more generic object parameters to deal with PHP4's copy-happy behavior git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1089 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
7579932948
commit
e180b7689e
3
NEWS
3
NEWS
@ -27,6 +27,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. AttrTypes now has accessor functions that should be used instead
|
. AttrTypes now has accessor functions that should be used instead
|
||||||
of directly manipulating info
|
of directly manipulating info
|
||||||
. TagTransform_Center deprecated in favor of generic TagTransform_Simple
|
. TagTransform_Center deprecated in favor of generic TagTransform_Simple
|
||||||
|
. Add extra protection in AttrDef_URI against phantom Schemes
|
||||||
|
. Doctype object added to HTMLDefinition which describes certain aspects
|
||||||
|
of the operational document type
|
||||||
|
|
||||||
1.6.1, released 2007-05-05
|
1.6.1, released 2007-05-05
|
||||||
! Support for more deprecated attributes via transformations:
|
! Support for more deprecated attributes via transformations:
|
||||||
|
11
TODO
11
TODO
@ -11,14 +11,15 @@ TODO List
|
|||||||
# Complete advanced API, and fully document it
|
# Complete advanced API, and fully document it
|
||||||
- Add framework for unsafe attributes
|
- Add framework for unsafe attributes
|
||||||
- Reorganize configuration directives
|
- Reorganize configuration directives
|
||||||
- Set up doctype object inside configuration object
|
- Set up anonymous module management by HTMLDefinition (Advanced API)
|
||||||
- Set up anonymous module management by HTMLDefinition
|
- Get all AttrTypes into string form
|
||||||
# Implement HTMLDefinition caching using serialize
|
# Clean up HTMLDefinition caching, need easy cache invalidation,
|
||||||
|
versioning of caches, etc.
|
||||||
# Implement all deprecated tags and attributes
|
# Implement all deprecated tags and attributes
|
||||||
# Create parsing/standards compliance smoketest
|
# Create parsing/standards compliance smoketest
|
||||||
# Reorganize Unit Tests
|
# Reorganize Unit Tests
|
||||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
|
- Refactor loop tests (esp. AttrDef_URI)
|
||||||
do this earlier)
|
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
|
||||||
? HTML interface for tweaking configuration to see changes
|
? HTML interface for tweaking configuration to see changes
|
||||||
|
|
||||||
1.8 release [Refactor, refactor!]
|
1.8 release [Refactor, refactor!]
|
||||||
|
@ -158,6 +158,14 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// something funky weird happened in the registry, abort!
|
||||||
|
if (!$scheme_obj) {
|
||||||
|
trigger_error(
|
||||||
|
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||||
|
E_USER_WARNING
|
||||||
|
);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||||
// it references cannot be located
|
// it references cannot be located
|
||||||
|
@ -53,11 +53,6 @@ class HTMLPurifier_Config
|
|||||||
*/
|
*/
|
||||||
var $autoFinalize = true;
|
var $autoFinalize = true;
|
||||||
|
|
||||||
/**
|
|
||||||
* Instance of HTMLPurifier_Doctype, representing current doctype
|
|
||||||
*/
|
|
||||||
var $doctype;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||||
* are allowed.
|
* are allowed.
|
||||||
@ -201,10 +196,17 @@ class HTMLPurifier_Config
|
|||||||
empty($this->html_definition) || // hasn't ever been setup
|
empty($this->html_definition) || // hasn't ever been setup
|
||||||
($raw && $this->html_definition->setup) // requesting new one
|
($raw && $this->html_definition->setup) // requesting new one
|
||||||
) {
|
) {
|
||||||
|
if (!$raw) {
|
||||||
|
$this->html_definition = HTMLPurifier_HTMLDefinition::getCache($this);
|
||||||
|
if ($this->html_definition) return $this->html_definition;
|
||||||
|
}
|
||||||
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||||
if ($raw) return $this->html_definition; // no setup!
|
if ($raw) return $this->html_definition; // no setup!
|
||||||
}
|
}
|
||||||
if (!$this->html_definition->setup) $this->html_definition->setup();
|
if (!$this->html_definition->setup) {
|
||||||
|
$this->html_definition->setup();
|
||||||
|
$this->html_definition->saveCache($this);
|
||||||
|
}
|
||||||
return $this->html_definition;
|
return $this->html_definition;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,16 +245,6 @@ class HTMLPurifier_Config
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the current doctype object
|
|
||||||
*/
|
|
||||||
function getDoctype() {
|
|
||||||
if (!$this->doctype) {
|
|
||||||
$this->getHTMLDefinition();
|
|
||||||
}
|
|
||||||
return $this->doctype;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads configuration values from an ini file
|
* Loads configuration values from an ini file
|
||||||
* @param $filename Name of ini file
|
* @param $filename Name of ini file
|
||||||
|
@ -77,7 +77,8 @@ class HTMLPurifier_DoctypeRegistry
|
|||||||
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
|
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
|
||||||
if (!isset($this->doctypes[$doctype])) {
|
if (!isset($this->doctypes[$doctype])) {
|
||||||
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
|
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
|
||||||
$null = null; return $null;
|
$anon = new HTMLPurifier_Doctype($doctype);
|
||||||
|
return $anon;
|
||||||
}
|
}
|
||||||
return $this->doctypes[$doctype];
|
return $this->doctypes[$doctype];
|
||||||
}
|
}
|
||||||
@ -93,7 +94,6 @@ class HTMLPurifier_DoctypeRegistry
|
|||||||
function make($config) {
|
function make($config) {
|
||||||
$original_doctype = $this->get($this->getDoctypeFromConfig($config));
|
$original_doctype = $this->get($this->getDoctypeFromConfig($config));
|
||||||
$doctype = $original_doctype->copy();
|
$doctype = $original_doctype->copy();
|
||||||
$config->doctype = $doctype;
|
|
||||||
return $doctype;
|
return $doctype;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,10 +71,8 @@ class HTMLPurifier_Generator
|
|||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
|
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
|
||||||
|
|
||||||
$doctype = $config->getDoctype();
|
|
||||||
$this->_xhtml = $doctype->xml;
|
|
||||||
|
|
||||||
$this->_def = $config->getHTMLDefinition();
|
$this->_def = $config->getHTMLDefinition();
|
||||||
|
$this->_xhtml = $this->_def->doctype->xml;
|
||||||
|
|
||||||
if (!$tokens) return '';
|
if (!$tokens) return '';
|
||||||
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
|
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
|
||||||
|
@ -147,6 +147,10 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
*/
|
*/
|
||||||
var $info_content_sets = array();
|
var $info_content_sets = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Doctype object
|
||||||
|
*/
|
||||||
|
var $doctype;
|
||||||
|
|
||||||
|
|
||||||
/** PUBLIC BUT INTERNAL VARIABLES */
|
/** PUBLIC BUT INTERNAL VARIABLES */
|
||||||
@ -160,11 +164,49 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
* Performs low-cost, preliminary initialization.
|
* Performs low-cost, preliminary initialization.
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_HTMLDefinition(&$config) {
|
function HTMLPurifier_HTMLDefinition($config) {
|
||||||
$this->config =& $config;
|
$this->config = $config;
|
||||||
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve definition object from cache
|
||||||
|
*/
|
||||||
|
function getCache($config) {
|
||||||
|
static $cache = array();
|
||||||
|
$file = HTMLPurifier_HTMLDefinition::getCacheFile($config);
|
||||||
|
if (isset($cache[$file])) return $cache[$file]; // unit test optimization
|
||||||
|
if (!file_exists($file)) return false;
|
||||||
|
$cache[$file] = unserialize(file_get_contents($file));
|
||||||
|
return $cache[$file];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines a cache key identifier for a particular configuration
|
||||||
|
*/
|
||||||
|
function getCacheKey($config) {
|
||||||
|
return md5(serialize(array($config->getBatch('HTML'), $config->getBatch('Attr'))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines file a particular configuration's definition is stored in
|
||||||
|
*/
|
||||||
|
function getCacheFile($config) {
|
||||||
|
$key = HTMLPurifier_HTMLDefinition::getCacheKey($config);
|
||||||
|
return dirname(__FILE__) . '/HTMLDefinition/' . $key . '.ser';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves HTMLDefinition to cache
|
||||||
|
*/
|
||||||
|
function saveCache($config) {
|
||||||
|
$file = $this->getCacheFile($config);
|
||||||
|
$contents = serialize($this);
|
||||||
|
$fh = fopen($file, 'w');
|
||||||
|
fwrite($fh, $contents);
|
||||||
|
fclose($fh);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes internals into form usable by HTMLPurifier internals.
|
* Processes internals into form usable by HTMLPurifier internals.
|
||||||
* Modifying the definition after calling this function should not
|
* Modifying the definition after calling this function should not
|
||||||
@ -190,6 +232,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
function processModules() {
|
function processModules() {
|
||||||
|
|
||||||
$this->manager->setup($this->config);
|
$this->manager->setup($this->config);
|
||||||
|
$this->doctype = $this->manager->doctype;
|
||||||
|
|
||||||
foreach ($this->manager->modules as $module) {
|
foreach ($this->manager->modules as $module) {
|
||||||
foreach($module->info_tag_transform as $k => $v) {
|
foreach($module->info_tag_transform as $k => $v) {
|
||||||
|
@ -99,6 +99,12 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
*/
|
*/
|
||||||
var $doctypes;
|
var $doctypes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of current doctype
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $doctype;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_AttrTypes
|
* Instance of HTMLPurifier_AttrTypes
|
||||||
* @public
|
* @public
|
||||||
@ -288,8 +294,8 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
$this->trusted = $config->get('HTML', 'Trusted');
|
$this->trusted = $config->get('HTML', 'Trusted');
|
||||||
|
|
||||||
// generate
|
// generate
|
||||||
$doctype = $this->doctypes->make($config);
|
$this->doctype = $this->doctypes->make($config);
|
||||||
$modules = $doctype->modules;
|
$modules = $this->doctype->modules;
|
||||||
|
|
||||||
// take out the default modules that aren't allowed
|
// take out the default modules that aren't allowed
|
||||||
$lookup = $config->get('HTML', 'AllowedModules');
|
$lookup = $config->get('HTML', 'AllowedModules');
|
||||||
@ -309,7 +315,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
$this->processModule($module);
|
$this->processModule($module);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($doctype->tidyModules as $module) {
|
foreach ($this->doctype->tidyModules as $module) {
|
||||||
$this->processModule($module);
|
$this->processModule($module);
|
||||||
if (method_exists($this->modules[$module], 'construct')) {
|
if (method_exists($this->modules[$module], 'construct')) {
|
||||||
$this->modules[$module]->construct($config);
|
$this->modules[$module]->construct($config);
|
||||||
|
@ -209,17 +209,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->scheme = new HTMLPurifier_URISchemeMock($this);
|
$this->scheme = new HTMLPurifier_URISchemeMock($this);
|
||||||
|
|
||||||
// here are the schemes we will support with overloaded mocks
|
// here are the schemes we will support with overloaded mocks
|
||||||
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context));
|
$registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*'));
|
||||||
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context));
|
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*'));
|
||||||
|
|
||||||
// default return value is false (meaning no scheme defined: reject)
|
// default return value is false (meaning no scheme defined: reject)
|
||||||
$registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context));
|
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
|
||||||
|
|
||||||
if ($this->components === false) {
|
if ($this->components === false) {
|
||||||
$this->scheme->expectNever('validateComponents');
|
$this->scheme->expectNever('validateComponents');
|
||||||
} else {
|
} else {
|
||||||
$this->components[] = $this->config; // append the configuration
|
$this->components[] = '*'; // append the configuration
|
||||||
$this->components[] =& $this->context; // append context
|
$this->components[] = '*'; // append context
|
||||||
$this->scheme->setReturnValue(
|
$this->scheme->setReturnValue(
|
||||||
'validateComponents', $this->return_components, $this->components);
|
'validateComponents', $this->return_components, $this->components);
|
||||||
$this->scheme->expectOnce('validateComponents', $this->components);
|
$this->scheme->expectOnce('validateComponents', $this->components);
|
||||||
|
Loading…
Reference in New Issue
Block a user