mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
[1.7.0] Implement HTMLDefinition cache (very hacked together, but long unit test times were driving me crazy!)
- Add extra protection in AttrDef_URI against phantom Schemes - Doctype moved from config to HTMLDefinition - AttrDef_URITest mocks have more generic object parameters to deal with PHP4's copy-happy behavior git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1089 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
7579932948
commit
e180b7689e
3
NEWS
3
NEWS
@ -27,6 +27,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. AttrTypes now has accessor functions that should be used instead
|
||||
of directly manipulating info
|
||||
. TagTransform_Center deprecated in favor of generic TagTransform_Simple
|
||||
. Add extra protection in AttrDef_URI against phantom Schemes
|
||||
. Doctype object added to HTMLDefinition which describes certain aspects
|
||||
of the operational document type
|
||||
|
||||
1.6.1, released 2007-05-05
|
||||
! Support for more deprecated attributes via transformations:
|
||||
|
11
TODO
11
TODO
@ -11,14 +11,15 @@ TODO List
|
||||
# Complete advanced API, and fully document it
|
||||
- Add framework for unsafe attributes
|
||||
- Reorganize configuration directives
|
||||
- Set up doctype object inside configuration object
|
||||
- Set up anonymous module management by HTMLDefinition
|
||||
# Implement HTMLDefinition caching using serialize
|
||||
- Set up anonymous module management by HTMLDefinition (Advanced API)
|
||||
- Get all AttrTypes into string form
|
||||
# Clean up HTMLDefinition caching, need easy cache invalidation,
|
||||
versioning of caches, etc.
|
||||
# Implement all deprecated tags and attributes
|
||||
# Create parsing/standards compliance smoketest
|
||||
# Reorganize Unit Tests
|
||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
|
||||
do this earlier)
|
||||
- Refactor loop tests (esp. AttrDef_URI)
|
||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists
|
||||
? HTML interface for tweaking configuration to see changes
|
||||
|
||||
1.8 release [Refactor, refactor!]
|
||||
|
@ -158,6 +158,14 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
);
|
||||
}
|
||||
|
||||
// something funky weird happened in the registry, abort!
|
||||
if (!$scheme_obj) {
|
||||
trigger_error(
|
||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
|
@ -53,11 +53,6 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
var $autoFinalize = true;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_Doctype, representing current doctype
|
||||
*/
|
||||
var $doctype;
|
||||
|
||||
/**
|
||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||
* are allowed.
|
||||
@ -201,10 +196,17 @@ class HTMLPurifier_Config
|
||||
empty($this->html_definition) || // hasn't ever been setup
|
||||
($raw && $this->html_definition->setup) // requesting new one
|
||||
) {
|
||||
if (!$raw) {
|
||||
$this->html_definition = HTMLPurifier_HTMLDefinition::getCache($this);
|
||||
if ($this->html_definition) return $this->html_definition;
|
||||
}
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||
if ($raw) return $this->html_definition; // no setup!
|
||||
}
|
||||
if (!$this->html_definition->setup) $this->html_definition->setup();
|
||||
if (!$this->html_definition->setup) {
|
||||
$this->html_definition->setup();
|
||||
$this->html_definition->saveCache($this);
|
||||
}
|
||||
return $this->html_definition;
|
||||
}
|
||||
|
||||
@ -243,16 +245,6 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current doctype object
|
||||
*/
|
||||
function getDoctype() {
|
||||
if (!$this->doctype) {
|
||||
$this->getHTMLDefinition();
|
||||
}
|
||||
return $this->doctype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an ini file
|
||||
* @param $filename Name of ini file
|
||||
|
@ -77,7 +77,8 @@ class HTMLPurifier_DoctypeRegistry
|
||||
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
|
||||
if (!isset($this->doctypes[$doctype])) {
|
||||
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
|
||||
$null = null; return $null;
|
||||
$anon = new HTMLPurifier_Doctype($doctype);
|
||||
return $anon;
|
||||
}
|
||||
return $this->doctypes[$doctype];
|
||||
}
|
||||
@ -93,7 +94,6 @@ class HTMLPurifier_DoctypeRegistry
|
||||
function make($config) {
|
||||
$original_doctype = $this->get($this->getDoctypeFromConfig($config));
|
||||
$doctype = $original_doctype->copy();
|
||||
$config->doctype = $doctype;
|
||||
return $doctype;
|
||||
}
|
||||
|
||||
|
@ -71,10 +71,8 @@ class HTMLPurifier_Generator
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
|
||||
|
||||
$doctype = $config->getDoctype();
|
||||
$this->_xhtml = $doctype->xml;
|
||||
|
||||
$this->_def = $config->getHTMLDefinition();
|
||||
$this->_xhtml = $this->_def->doctype->xml;
|
||||
|
||||
if (!$tokens) return '';
|
||||
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
|
||||
|
@ -147,6 +147,10 @@ class HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
var $info_content_sets = array();
|
||||
|
||||
/**
|
||||
* Doctype object
|
||||
*/
|
||||
var $doctype;
|
||||
|
||||
|
||||
/** PUBLIC BUT INTERNAL VARIABLES */
|
||||
@ -160,11 +164,49 @@ class HTMLPurifier_HTMLDefinition
|
||||
* Performs low-cost, preliminary initialization.
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
*/
|
||||
function HTMLPurifier_HTMLDefinition(&$config) {
|
||||
$this->config =& $config;
|
||||
function HTMLPurifier_HTMLDefinition($config) {
|
||||
$this->config = $config;
|
||||
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve definition object from cache
|
||||
*/
|
||||
function getCache($config) {
|
||||
static $cache = array();
|
||||
$file = HTMLPurifier_HTMLDefinition::getCacheFile($config);
|
||||
if (isset($cache[$file])) return $cache[$file]; // unit test optimization
|
||||
if (!file_exists($file)) return false;
|
||||
$cache[$file] = unserialize(file_get_contents($file));
|
||||
return $cache[$file];
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines a cache key identifier for a particular configuration
|
||||
*/
|
||||
function getCacheKey($config) {
|
||||
return md5(serialize(array($config->getBatch('HTML'), $config->getBatch('Attr'))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines file a particular configuration's definition is stored in
|
||||
*/
|
||||
function getCacheFile($config) {
|
||||
$key = HTMLPurifier_HTMLDefinition::getCacheKey($config);
|
||||
return dirname(__FILE__) . '/HTMLDefinition/' . $key . '.ser';
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves HTMLDefinition to cache
|
||||
*/
|
||||
function saveCache($config) {
|
||||
$file = $this->getCacheFile($config);
|
||||
$contents = serialize($this);
|
||||
$fh = fopen($file, 'w');
|
||||
fwrite($fh, $contents);
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes internals into form usable by HTMLPurifier internals.
|
||||
* Modifying the definition after calling this function should not
|
||||
@ -190,6 +232,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
function processModules() {
|
||||
|
||||
$this->manager->setup($this->config);
|
||||
$this->doctype = $this->manager->doctype;
|
||||
|
||||
foreach ($this->manager->modules as $module) {
|
||||
foreach($module->info_tag_transform as $k => $v) {
|
||||
|
@ -99,6 +99,12 @@ class HTMLPurifier_HTMLModuleManager
|
||||
*/
|
||||
var $doctypes;
|
||||
|
||||
/**
|
||||
* Instance of current doctype
|
||||
* @public
|
||||
*/
|
||||
var $doctype;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrTypes
|
||||
* @public
|
||||
@ -288,8 +294,8 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->trusted = $config->get('HTML', 'Trusted');
|
||||
|
||||
// generate
|
||||
$doctype = $this->doctypes->make($config);
|
||||
$modules = $doctype->modules;
|
||||
$this->doctype = $this->doctypes->make($config);
|
||||
$modules = $this->doctype->modules;
|
||||
|
||||
// take out the default modules that aren't allowed
|
||||
$lookup = $config->get('HTML', 'AllowedModules');
|
||||
@ -309,7 +315,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->processModule($module);
|
||||
}
|
||||
|
||||
foreach ($doctype->tidyModules as $module) {
|
||||
foreach ($this->doctype->tidyModules as $module) {
|
||||
$this->processModule($module);
|
||||
if (method_exists($this->modules[$module], 'construct')) {
|
||||
$this->modules[$module]->construct($config);
|
||||
|
@ -209,17 +209,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$this->scheme = new HTMLPurifier_URISchemeMock($this);
|
||||
|
||||
// here are the schemes we will support with overloaded mocks
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*'));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*'));
|
||||
|
||||
// default return value is false (meaning no scheme defined: reject)
|
||||
$registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context));
|
||||
$registry->setReturnValue('getScheme', false, array('*', '*', '*'));
|
||||
|
||||
if ($this->components === false) {
|
||||
$this->scheme->expectNever('validateComponents');
|
||||
} else {
|
||||
$this->components[] = $this->config; // append the configuration
|
||||
$this->components[] =& $this->context; // append context
|
||||
$this->components[] = '*'; // append the configuration
|
||||
$this->components[] = '*'; // append context
|
||||
$this->scheme->setReturnValue(
|
||||
'validateComponents', $this->return_components, $this->components);
|
||||
$this->scheme->expectOnce('validateComponents', $this->components);
|
||||
|
Loading…
Reference in New Issue
Block a user