From f80de908bd100664dc451a880875cada88ee20d6 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 31 Jul 2007 01:04:38 +0000 Subject: [PATCH] [2.1.0] Optimize ConfigSchema to only perform safety checks when HTMLPURIFIER_SCHEMA_STRICT is true - Remove useless ->revision check in Config.php - Add simple trace file to benchmarks folder git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1319 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 + benchmarks/Trace.php | 12 ++ configdoc/generate.php | 2 + library/HTMLPurifier/Config.php | 1 - library/HTMLPurifier/ConfigSchema.php | 185 ++++++++++++++------------ tests/index.php | 1 + 6 files changed, 120 insertions(+), 83 deletions(-) create mode 100644 benchmarks/Trace.php diff --git a/NEWS b/NEWS index 7e48c753..f5f89d32 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Custom ChildDef added to default include list . URIScheme reflection improved: will not attempt to include file if class already exists. May clobber autoload, so I need to keep an eye on it +. ConfigSchema heavily optimized, will only collect information and validate + definitions when HTMLPURIFIER_SCHEMA_STRICT is true. 2.0.1, released 2007-06-27 ! Tag auto-closing now based on a ChildDef heuristic rather than a diff --git a/benchmarks/Trace.php b/benchmarks/Trace.php new file mode 100644 index 00000000..fa98ffac --- /dev/null +++ b/benchmarks/Trace.php @@ -0,0 +1,12 @@ +purify(file_get_contents('samples/Lexer/4.html')); +xdebug_stop_trace(); diff --git a/configdoc/generate.php b/configdoc/generate.php index 97e96433..9e73f4c7 100644 --- a/configdoc/generate.php +++ b/configdoc/generate.php @@ -18,6 +18,8 @@ TODO: if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.'); error_reporting(E_ALL); // probably not possible to use E_STRICT +define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected + // load dual-libraries require_once '../library/HTMLPurifier.auto.php'; require_once 'library/ConfigDoc.auto.php'; diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 02594db4..2b811fa9 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -106,7 +106,6 @@ class HTMLPurifier_Config $ret = HTMLPurifier_Config::createDefault(); if (is_string($config)) $ret->loadIni($config); elseif (is_array($config)) $ret->loadArray($config); - if (isset($revision)) $ret->revision = $revision; return $ret; } diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php index e17a3f07..d6700e6e 100644 --- a/library/HTMLPurifier/ConfigSchema.php +++ b/library/HTMLPurifier/ConfigSchema.php @@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php'; require_once 'HTMLPurifier/ConfigDef/Directive.php'; require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php'; +if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false); + /** * Configuration definition, defines directives and their defaults. * @note If you update this, please update Printer_ConfigForm @@ -102,27 +104,30 @@ class HTMLPurifier_ConfigSchema { * HTMLPurifier_DirectiveDef::$type for allowed values * @param $description Description of directive for documentation */ - function define( - $namespace, $name, $default, $type, - $description - ) { + function define($namespace, $name, $default, $type, $description) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace])) { - trigger_error('Cannot define directive for undefined namespace', - E_USER_ERROR); - return; - } - if (!ctype_alnum($name)) { - trigger_error('Directive name must be alphanumeric', - E_USER_ERROR); - return; - } - if (empty($description)) { - trigger_error('Description must be non-empty', - E_USER_ERROR); - return; + + // basic sanity checks + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive for undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } } + if (isset($def->info[$namespace][$name])) { + // already defined if ( $def->info[$namespace][$name]->type !== $type || $def->defaults[$namespace][$name] !== $default @@ -131,29 +136,35 @@ class HTMLPurifier_ConfigSchema { return; } } else { - // process modifiers + // needs defining + + // process modifiers (OPTIMIZE!) $type_values = explode('/', $type, 2); $type = $type_values[0]; $modifier = isset($type_values[1]) ? $type_values[1] : false; $allow_null = ($modifier === 'null'); - if (!isset($def->types[$type])) { - trigger_error('Invalid type for configuration directive', - E_USER_ERROR); - return; - } - $default = $def->validate($default, $type, $allow_null); - if ($def->isError($default)) { - trigger_error('Default value does not match directive type', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->types[$type])) { + trigger_error('Invalid type for configuration directive', + E_USER_ERROR); + return; + } + $default = $def->validate($default, $type, $allow_null); + if ($def->isError($default)) { + trigger_error('Default value does not match directive type', + E_USER_ERROR); + return; + } } + $def->info[$namespace][$name] = new HTMLPurifier_ConfigDef_Directive(); $def->info[$namespace][$name]->type = $type; $def->info[$namespace][$name]->allow_null = $allow_null; $def->defaults[$namespace][$name] = $default; } + if (!HTMLPURIFIER_SCHEMA_STRICT) return; $backtrace = debug_backtrace(); $file = $def->mungeFilename($backtrace[0]['file']); $line = $backtrace[0]['line']; @@ -168,19 +179,21 @@ class HTMLPurifier_ConfigSchema { */ function defineNamespace($namespace, $description) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (isset($def->info[$namespace])) { - trigger_error('Cannot redefine namespace', E_USER_ERROR); - return; - } - if (!ctype_alnum($namespace)) { - trigger_error('Namespace name must be alphanumeric', - E_USER_ERROR); - return; - } - if (empty($description)) { - trigger_error('Description must be non-empty', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (isset($def->info[$namespace])) { + trigger_error('Cannot redefine namespace', E_USER_ERROR); + return; + } + if (!ctype_alnum($namespace)) { + trigger_error('Namespace name must be alphanumeric', + E_USER_ERROR); + return; + } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } } $def->info[$namespace] = array(); $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace(); @@ -201,23 +214,25 @@ class HTMLPurifier_ConfigSchema { */ function defineValueAliases($namespace, $name, $aliases) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace][$name])) { + if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { trigger_error('Cannot set value alias for non-existant directive', E_USER_ERROR); return; } foreach ($aliases as $alias => $real) { - if (!$def->info[$namespace][$name] !== true && - !isset($def->info[$namespace][$name]->allowed[$real]) - ) { - trigger_error('Cannot define alias to value that is not allowed', - E_USER_ERROR); - return; - } - if (isset($def->info[$namespace][$name]->allowed[$alias])) { - trigger_error('Cannot define alias over allowed value', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!$def->info[$namespace][$name] !== true && + !isset($def->info[$namespace][$name]->allowed[$real]) + ) { + trigger_error('Cannot define alias to value that is not allowed', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name]->allowed[$alias])) { + trigger_error('Cannot define alias over allowed value', + E_USER_ERROR); + return; + } } $def->info[$namespace][$name]->aliases[$alias] = $real; } @@ -232,14 +247,14 @@ class HTMLPurifier_ConfigSchema { */ function defineAllowedValues($namespace, $name, $allowed_values) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace][$name])) { + if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { trigger_error('Cannot define allowed values for undefined directive', E_USER_ERROR); return; } $directive =& $def->info[$namespace][$name]; $type = $directive->type; - if ($type != 'string' && $type != 'istring') { + if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') { trigger_error('Cannot define allowed values for directive whose type is not string', E_USER_ERROR); return; @@ -250,8 +265,11 @@ class HTMLPurifier_ConfigSchema { foreach ($allowed_values as $value) { $directive->allowed[$value] = true; } - if ($def->defaults[$namespace][$name] !== null && - !isset($directive->allowed[$def->defaults[$namespace][$name]])) { + if ( + HTMLPURIFIER_SCHEMA_STRICT && + $def->defaults[$namespace][$name] !== null && + !isset($directive->allowed[$def->defaults[$namespace][$name]]) + ) { trigger_error('Default value must be in allowed range of variables', E_USER_ERROR); $directive->allowed = true; // undo undo! @@ -269,30 +287,32 @@ class HTMLPurifier_ConfigSchema { */ function defineAlias($namespace, $name, $new_namespace, $new_name) { $def =& HTMLPurifier_ConfigSchema::instance(); - if (!isset($def->info[$namespace])) { - trigger_error('Cannot define directive alias in undefined namespace', - E_USER_ERROR); - return; - } - if (!ctype_alnum($name)) { - trigger_error('Directive name must be alphanumeric', - E_USER_ERROR); - return; - } - if (isset($def->info[$namespace][$name])) { - trigger_error('Cannot define alias over directive', - E_USER_ERROR); - return; - } - if (!isset($def->info[$new_namespace][$new_name])) { - trigger_error('Cannot define alias to undefined directive', - E_USER_ERROR); - return; - } - if ($def->info[$new_namespace][$new_name]->class == 'alias') { - trigger_error('Cannot define alias to alias', - E_USER_ERROR); - return; + if (HTMLPURIFIER_SCHEMA_STRICT) { + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive alias in undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + trigger_error('Cannot define alias over directive', + E_USER_ERROR); + return; + } + if (!isset($def->info[$new_namespace][$new_name])) { + trigger_error('Cannot define alias to undefined directive', + E_USER_ERROR); + return; + } + if ($def->info[$new_namespace][$new_name]->class == 'alias') { + trigger_error('Cannot define alias to alias', + E_USER_ERROR); + return; + } } $def->info[$namespace][$name] = new HTMLPurifier_ConfigDef_DirectiveAlias( @@ -396,6 +416,7 @@ class HTMLPurifier_ConfigSchema { * Takes an absolute path and munges it into a more manageable relative path */ function mungeFilename($filename) { + if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename; $offset = strrpos($filename, 'HTMLPurifier'); $filename = substr($filename, $offset); $filename = str_replace('\\', '/', $filename); diff --git a/tests/index.php b/tests/index.php index d5841c50..1034e44f 100644 --- a/tests/index.php +++ b/tests/index.php @@ -5,6 +5,7 @@ error_reporting(E_ALL); define('HTMLPurifierTest', 1); +define('HTMLPURIFIER_SCHEMA_STRICT', true); // wishlist: automated calling of this file from multiple PHP versions so we // don't have to constantly switch around