0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-18 11:41:52 +00:00

[2.1.0] Optimize ConfigSchema to only perform safety checks when HTMLPURIFIER_SCHEMA_STRICT is true

- Remove useless ->revision check in Config.php
- Add simple trace file to benchmarks folder

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1319 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-07-31 01:04:38 +00:00
parent 349c4de75b
commit f80de908bd
6 changed files with 120 additions and 83 deletions

2
NEWS
View File

@ -41,6 +41,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Custom ChildDef added to default include list . Custom ChildDef added to default include list
. URIScheme reflection improved: will not attempt to include file if class . URIScheme reflection improved: will not attempt to include file if class
already exists. May clobber autoload, so I need to keep an eye on it already exists. May clobber autoload, so I need to keep an eye on it
. ConfigSchema heavily optimized, will only collect information and validate
definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
2.0.1, released 2007-06-27 2.0.1, released 2007-06-27
! Tag auto-closing now based on a ChildDef heuristic rather than a ! Tag auto-closing now based on a ChildDef heuristic rather than a

12
benchmarks/Trace.php Normal file
View File

@ -0,0 +1,12 @@
<?php
ini_set('xdebug.trace_format', 1);
ini_set('xdebug.show_mem_delta', true);
xdebug_start_trace(dirname(__FILE__) . '/Trace');
require_once '../library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier();
$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
xdebug_stop_trace();

View File

@ -18,6 +18,8 @@ TODO:
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.'); if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
error_reporting(E_ALL); // probably not possible to use E_STRICT error_reporting(E_ALL); // probably not possible to use E_STRICT
define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
// load dual-libraries // load dual-libraries
require_once '../library/HTMLPurifier.auto.php'; require_once '../library/HTMLPurifier.auto.php';
require_once 'library/ConfigDoc.auto.php'; require_once 'library/ConfigDoc.auto.php';

View File

@ -106,7 +106,6 @@ class HTMLPurifier_Config
$ret = HTMLPurifier_Config::createDefault(); $ret = HTMLPurifier_Config::createDefault();
if (is_string($config)) $ret->loadIni($config); if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config); elseif (is_array($config)) $ret->loadArray($config);
if (isset($revision)) $ret->revision = $revision;
return $ret; return $ret;
} }

View File

@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
require_once 'HTMLPurifier/ConfigDef/Directive.php'; require_once 'HTMLPurifier/ConfigDef/Directive.php';
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php'; require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
/** /**
* Configuration definition, defines directives and their defaults. * Configuration definition, defines directives and their defaults.
* @note If you update this, please update Printer_ConfigForm * @note If you update this, please update Printer_ConfigForm
@ -102,27 +104,30 @@ class HTMLPurifier_ConfigSchema {
* HTMLPurifier_DirectiveDef::$type for allowed values * HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation * @param $description Description of directive for documentation
*/ */
function define( function define($namespace, $name, $default, $type, $description) {
$namespace, $name, $default, $type,
$description
) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive for undefined namespace', // basic sanity checks
E_USER_ERROR); if (HTMLPURIFIER_SCHEMA_STRICT) {
return; if (!isset($def->info[$namespace])) {
} trigger_error('Cannot define directive for undefined namespace',
if (!ctype_alnum($name)) { E_USER_ERROR);
trigger_error('Directive name must be alphanumeric', return;
E_USER_ERROR); }
return; if (!ctype_alnum($name)) {
} trigger_error('Directive name must be alphanumeric',
if (empty($description)) { E_USER_ERROR);
trigger_error('Description must be non-empty', return;
E_USER_ERROR); }
return; if (empty($description)) {
trigger_error('Description must be non-empty',
E_USER_ERROR);
return;
}
} }
if (isset($def->info[$namespace][$name])) { if (isset($def->info[$namespace][$name])) {
// already defined
if ( if (
$def->info[$namespace][$name]->type !== $type || $def->info[$namespace][$name]->type !== $type ||
$def->defaults[$namespace][$name] !== $default $def->defaults[$namespace][$name] !== $default
@ -131,29 +136,35 @@ class HTMLPurifier_ConfigSchema {
return; return;
} }
} else { } else {
// process modifiers // needs defining
// process modifiers (OPTIMIZE!)
$type_values = explode('/', $type, 2); $type_values = explode('/', $type, 2);
$type = $type_values[0]; $type = $type_values[0];
$modifier = isset($type_values[1]) ? $type_values[1] : false; $modifier = isset($type_values[1]) ? $type_values[1] : false;
$allow_null = ($modifier === 'null'); $allow_null = ($modifier === 'null');
if (!isset($def->types[$type])) { if (HTMLPURIFIER_SCHEMA_STRICT) {
trigger_error('Invalid type for configuration directive', if (!isset($def->types[$type])) {
E_USER_ERROR); trigger_error('Invalid type for configuration directive',
return; E_USER_ERROR);
} return;
$default = $def->validate($default, $type, $allow_null); }
if ($def->isError($default)) { $default = $def->validate($default, $type, $allow_null);
trigger_error('Default value does not match directive type', if ($def->isError($default)) {
E_USER_ERROR); trigger_error('Default value does not match directive type',
return; E_USER_ERROR);
return;
}
} }
$def->info[$namespace][$name] = $def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_Directive(); new HTMLPurifier_ConfigDef_Directive();
$def->info[$namespace][$name]->type = $type; $def->info[$namespace][$name]->type = $type;
$def->info[$namespace][$name]->allow_null = $allow_null; $def->info[$namespace][$name]->allow_null = $allow_null;
$def->defaults[$namespace][$name] = $default; $def->defaults[$namespace][$name] = $default;
} }
if (!HTMLPURIFIER_SCHEMA_STRICT) return;
$backtrace = debug_backtrace(); $backtrace = debug_backtrace();
$file = $def->mungeFilename($backtrace[0]['file']); $file = $def->mungeFilename($backtrace[0]['file']);
$line = $backtrace[0]['line']; $line = $backtrace[0]['line'];
@ -168,19 +179,21 @@ class HTMLPurifier_ConfigSchema {
*/ */
function defineNamespace($namespace, $description) { function defineNamespace($namespace, $description) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (isset($def->info[$namespace])) { if (HTMLPURIFIER_SCHEMA_STRICT) {
trigger_error('Cannot redefine namespace', E_USER_ERROR); if (isset($def->info[$namespace])) {
return; trigger_error('Cannot redefine namespace', E_USER_ERROR);
} return;
if (!ctype_alnum($namespace)) { }
trigger_error('Namespace name must be alphanumeric', if (!ctype_alnum($namespace)) {
E_USER_ERROR); trigger_error('Namespace name must be alphanumeric',
return; E_USER_ERROR);
} return;
if (empty($description)) { }
trigger_error('Description must be non-empty', if (empty($description)) {
E_USER_ERROR); trigger_error('Description must be non-empty',
return; E_USER_ERROR);
return;
}
} }
$def->info[$namespace] = array(); $def->info[$namespace] = array();
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace(); $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
@ -201,23 +214,25 @@ class HTMLPurifier_ConfigSchema {
*/ */
function defineValueAliases($namespace, $name, $aliases) { function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) { if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive', trigger_error('Cannot set value alias for non-existant directive',
E_USER_ERROR); E_USER_ERROR);
return; return;
} }
foreach ($aliases as $alias => $real) { foreach ($aliases as $alias => $real) {
if (!$def->info[$namespace][$name] !== true && if (HTMLPURIFIER_SCHEMA_STRICT) {
!isset($def->info[$namespace][$name]->allowed[$real]) if (!$def->info[$namespace][$name] !== true &&
) { !isset($def->info[$namespace][$name]->allowed[$real])
trigger_error('Cannot define alias to value that is not allowed', ) {
E_USER_ERROR); trigger_error('Cannot define alias to value that is not allowed',
return; E_USER_ERROR);
} return;
if (isset($def->info[$namespace][$name]->allowed[$alias])) { }
trigger_error('Cannot define alias over allowed value', if (isset($def->info[$namespace][$name]->allowed[$alias])) {
E_USER_ERROR); trigger_error('Cannot define alias over allowed value',
return; E_USER_ERROR);
return;
}
} }
$def->info[$namespace][$name]->aliases[$alias] = $real; $def->info[$namespace][$name]->aliases[$alias] = $real;
} }
@ -232,14 +247,14 @@ class HTMLPurifier_ConfigSchema {
*/ */
function defineAllowedValues($namespace, $name, $allowed_values) { function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) { if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive', trigger_error('Cannot define allowed values for undefined directive',
E_USER_ERROR); E_USER_ERROR);
return; return;
} }
$directive =& $def->info[$namespace][$name]; $directive =& $def->info[$namespace][$name];
$type = $directive->type; $type = $directive->type;
if ($type != 'string' && $type != 'istring') { if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
trigger_error('Cannot define allowed values for directive whose type is not string', trigger_error('Cannot define allowed values for directive whose type is not string',
E_USER_ERROR); E_USER_ERROR);
return; return;
@ -250,8 +265,11 @@ class HTMLPurifier_ConfigSchema {
foreach ($allowed_values as $value) { foreach ($allowed_values as $value) {
$directive->allowed[$value] = true; $directive->allowed[$value] = true;
} }
if ($def->defaults[$namespace][$name] !== null && if (
!isset($directive->allowed[$def->defaults[$namespace][$name]])) { HTMLPURIFIER_SCHEMA_STRICT &&
$def->defaults[$namespace][$name] !== null &&
!isset($directive->allowed[$def->defaults[$namespace][$name]])
) {
trigger_error('Default value must be in allowed range of variables', trigger_error('Default value must be in allowed range of variables',
E_USER_ERROR); E_USER_ERROR);
$directive->allowed = true; // undo undo! $directive->allowed = true; // undo undo!
@ -269,30 +287,32 @@ class HTMLPurifier_ConfigSchema {
*/ */
function defineAlias($namespace, $name, $new_namespace, $new_name) { function defineAlias($namespace, $name, $new_namespace, $new_name) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace])) { if (HTMLPURIFIER_SCHEMA_STRICT) {
trigger_error('Cannot define directive alias in undefined namespace', if (!isset($def->info[$namespace])) {
E_USER_ERROR); trigger_error('Cannot define directive alias in undefined namespace',
return; E_USER_ERROR);
} return;
if (!ctype_alnum($name)) { }
trigger_error('Directive name must be alphanumeric', if (!ctype_alnum($name)) {
E_USER_ERROR); trigger_error('Directive name must be alphanumeric',
return; E_USER_ERROR);
} return;
if (isset($def->info[$namespace][$name])) { }
trigger_error('Cannot define alias over directive', if (isset($def->info[$namespace][$name])) {
E_USER_ERROR); trigger_error('Cannot define alias over directive',
return; E_USER_ERROR);
} return;
if (!isset($def->info[$new_namespace][$new_name])) { }
trigger_error('Cannot define alias to undefined directive', if (!isset($def->info[$new_namespace][$new_name])) {
E_USER_ERROR); trigger_error('Cannot define alias to undefined directive',
return; E_USER_ERROR);
} return;
if ($def->info[$new_namespace][$new_name]->class == 'alias') { }
trigger_error('Cannot define alias to alias', if ($def->info[$new_namespace][$new_name]->class == 'alias') {
E_USER_ERROR); trigger_error('Cannot define alias to alias',
return; E_USER_ERROR);
return;
}
} }
$def->info[$namespace][$name] = $def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_DirectiveAlias( new HTMLPurifier_ConfigDef_DirectiveAlias(
@ -396,6 +416,7 @@ class HTMLPurifier_ConfigSchema {
* Takes an absolute path and munges it into a more manageable relative path * Takes an absolute path and munges it into a more manageable relative path
*/ */
function mungeFilename($filename) { function mungeFilename($filename) {
if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
$offset = strrpos($filename, 'HTMLPurifier'); $offset = strrpos($filename, 'HTMLPurifier');
$filename = substr($filename, $offset); $filename = substr($filename, $offset);
$filename = str_replace('\\', '/', $filename); $filename = str_replace('\\', '/', $filename);

View File

@ -5,6 +5,7 @@
error_reporting(E_ALL); error_reporting(E_ALL);
define('HTMLPurifierTest', 1); define('HTMLPurifierTest', 1);
define('HTMLPURIFIER_SCHEMA_STRICT', true);
// wishlist: automated calling of this file from multiple PHP versions so we // wishlist: automated calling of this file from multiple PHP versions so we
// don't have to constantly switch around // don't have to constantly switch around