From cac22f01cfc2ebba5fa049f1e5c5cc4950c1cbd8 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 14 Feb 2007 01:44:06 +0000 Subject: [PATCH] [1.5.0] - More framework work (modules now are treated first class) - Config will regenerate definitions when appropriate entries are set - Add HTMLModule->setup for pre-processing stuff - Constructor receives $definition not $config - Config rolled inside $definition->config until end of setup() git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@741 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Config.php | 25 +-- library/HTMLPurifier/HTMLDefinition.php | 177 +++++++++++++----- library/HTMLPurifier/HTMLModule.php | 13 +- .../HTMLModule/TransformToStrict.php | 4 +- tests/HTMLPurifier/ConfigTest.php | 35 +++- 5 files changed, 183 insertions(+), 71 deletions(-) diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 3e2b09a7..c94e01f6 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -149,6 +149,13 @@ class HTMLPurifier_Config return; } $this->conf[$namespace][$key] = $value; + if ($namespace == 'HTML' || $namespace == 'Attr') { + // reset HTML definition if relevant attributes changed + $this->html_definition = null; + } + if ($namespace == 'CSS') { + $this->css_definition = null; + } } /** @@ -157,20 +164,14 @@ class HTMLPurifier_Config * called before it's been setup, otherwise won't work. */ function &getHTMLDefinition($raw = false) { - if ($this->html_definition === null) { + if ( + empty($this->html_definition) || // hasn't ever been setup + ($raw && $this->html_definition->setup) // requesting new one + ) { $this->html_definition = new HTMLPurifier_HTMLDefinition($this); - if ($raw) { - return $this->html_definition; // no setup! - } - $this->html_definition->setup($this); - } - if ($raw && $this->html_definition->setup) { - trigger_error('HTMLDefinition already setup, overwriting old '. - 'definition (set $config->definition manually to null '. - 'if this is desired behavior).', E_USER_NOTICE); - $this->html_definition = new HTMLPurifier_HTMLDefinition($this); - return $this->html_definition; + if ($raw) return $this->html_definition; // no setup! } + if (!$this->html_definition->setup) $this->html_definition->setup(); return $this->html_definition; } diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index a14e6e9b..f39e43ff 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -38,6 +38,8 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php'; +// this definition and its modules MUST NOT define configuration directives +// outside of the HTML or Attr namespaces HTMLPurifier_ConfigSchema::define( 'HTML', 'EnableAttrID', false, 'bool', 'Allows the ID attribute in HTML. This is disabled by default '. @@ -112,9 +114,6 @@ HTMLPurifier_ConfigSchema::define( * Definition of the purified HTML that describes allowed children, * attributes, and many other things. * - * @note This is the next-gen definition that will be renamed to - * HTMLDefinition soon! - * * Conventions: * * All member variables that are prefixed with info @@ -127,7 +126,6 @@ HTMLPurifier_ConfigSchema::define( * internally by the HTMLDefinition and MUST NOT be used by other HTML * Purifier internals. Many of them, however, are public, and may be * edited by userspace code to tweak the behavior of HTMLDefinition. - * In practice, there will not be too many of them. * * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this * rule: in the interest of comprehensiveness, it will sniff everything. @@ -205,11 +203,25 @@ class HTMLPurifier_HTMLDefinition var $strict; /** - * Array of HTMLPurifier_Module instances, indexed by module name + * Array of HTMLPurifier_Module instances, indexed by module's class name * @public */ var $modules = array(); + /** + * Associative array of module class name to module order keywords or + * numbers (keyword is preferred, all keywords are resolved at beginning + * of setup()) + * @public + */ + var $modules_order = array(); + + /** + * List of prefixes HTML Purifier should try to resolve short names to. + * @public + */ + var $module_prefixes = array('HTMLPurifier_HTMLModule_'); + /** * Instance of HTMLPurifier_AttrTypes * @public @@ -223,7 +235,7 @@ class HTMLPurifier_HTMLDefinition var $attr_collections; /** - * Is setup? + * Has setup() been called yet? * @public */ var $setup = false; @@ -234,45 +246,89 @@ class HTMLPurifier_HTMLDefinition */ var $content_sets; + /** + * Lookup table of module order "names" and an integer index + * @public + */ + var $order_keywords = array( + 'setup' => 10, + 'early' => 20, + 'main' => 30, + 'late' => 40, + 'cleanup' => 50, + ); + + /** + * Temporary instance of HTMLPurifier_Config for convenience reasons, + * is removed after setup(). + * @public + */ + var $config; + /** * Performs low-cost, preliminary initialization. * @param $config Instance of HTMLPurifier_Config */ - function HTMLPurifier_HTMLDefinition($config) { + function HTMLPurifier_HTMLDefinition(&$config) { - // setup some cached config variables - // this will eventually influence module loading - $this->strict = $config->get('HTML', 'Strict'); - - // order is important! - $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); - $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); - $this->modules['List'] = new HTMLPurifier_HTMLModule_List(); - $this->modules['Presentation'] = new HTMLPurifier_HTMLModule_Presentation(); - $this->modules['Edit'] = new HTMLPurifier_HTMLModule_Edit(); - $this->modules['Bdo'] = new HTMLPurifier_HTMLModule_Bdo(); - $this->modules['Tables'] = new HTMLPurifier_HTMLModule_Tables(); - $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image(); - $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute(); - - $this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config); - if (!$this->strict) $this->modules['Legacy'] = new HTMLPurifier_HTMLModule_Legacy($config); + $this->config =& $config; + // set up public internals + $this->strict = $config->get('HTML', 'Strict'); $this->attr_types = new HTMLPurifier_AttrTypes(); $this->attr_collections = new HTMLPurifier_AttrCollections(); $this->content_sets = new HTMLPurifier_ContentSets(); - // some compat stuff, will be factored to modules + // modules - // remove ID module + // main + $main_modules = array('Text', 'Hypertext', 'List', 'Presentation', + 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute'); + foreach ($main_modules as $module) $this->addModule($module, 'main'); + + // late + if (!$this->strict) $this->addModule('Legacy', 'late'); + + // cleanup + $this->addModule('TransformToStrict', 'cleanup'); + + // remove ID module (refactor to module) if (!$config->get('HTML', 'EnableAttrID')) { $this->attr_collections->info['Core']['id'] = false; } } - + /** + * Adds a module to the ordered list. + * @param $module Mixed: string module name, with or without + * HTMLPurifier_HTMLModule prefix, or instance of + * subclass of HTMLPurifier_HTMLModule. + */ + function addModule($module, $order = 'main') { + if (is_string($module)) { + $original_module = $module; + if (!class_exists($module)) { + foreach ($this->module_prefixes as $prefix) { + $module = $prefix . $original_module; + if (class_exists($module)) break; + } + } + if (!class_exists($module)) { + trigger_error($original_module . ' module does not exist', E_USER_ERROR); + return; + } + $module = new $module($this); + } + if (!isset($this->order_keywords[$order])) { + trigger_error('Order keyword does not exist', E_USER_ERROR); + return; + } + $name = strtolower(get_class($module)); + $this->modules[$name] = $module; + $this->modules_order[$name] = $order; + } /** * Processes internals into form usable by HTMLPurifier internals. @@ -280,33 +336,55 @@ class HTMLPurifier_HTMLDefinition * be done. * @param $config Instance of HTMLPurifier_Config */ - function setup($config) { + function setup() { // multiple call guard if ($this->setup) {return;} else {$this->setup = true;} - // would be nice if we could put each of these in their - // own object, would make this hookable too! - $this->processModules($config); - $this->setupAttrTransform($config); - $this->setupBlockWrapper($config); - $this->setupParent($config); - $this->setupCompat($config); + $this->processModules(); + $this->setupAttrTransform(); + $this->setupBlockWrapper(); + $this->setupParent(); + $this->setupCompat(); + + unset($this->config); } /** * Processes the modules, setting up related info variables - * @param $config Instance of HTMLPurifier_Config */ - function processModules($config) { + function processModules() { + + // substitute out the order keywords + foreach ($this->modules_order as $name => $order) { + if (empty($this->modules[$name])) { + trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR); + return; + } + if (is_int($order)) continue; + if (empty($this->order_keywords[$order])) { + trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR); + return; + } + $this->modules_order[$name] = $this->order_keywords[$order]; + } + + // sort modules member variable + array_multisort( + $this->modules_order, SORT_ASC, SORT_NUMERIC, + $this->modules + ); + + // setup the global registries $this->attr_collections->setup($this->attr_types, $this->modules); $this->content_sets->setup($this->modules); $this->info_content_sets = $this->content_sets->lookup; + // process the modules foreach ($this->modules as $module_i => $module) { - $module->preProcess($this, $config); + $module->preProcess($this); // process element-wise definitions foreach ($module->info as $name => $def) { @@ -352,7 +430,7 @@ class HTMLPurifier_HTMLDefinition foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; - $module->postProcess($this, $config); + $module->postProcess($this); } @@ -360,18 +438,16 @@ class HTMLPurifier_HTMLDefinition /** * Sets up attribute transformations - * @param $config Instance of HTMLPurifier_Config */ - function setupAttrTransform($config) { + function setupAttrTransform() { $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); } /** * Sets up block wrapper based on config - * @param $config Instance of HTMLPurifier_Config */ - function setupBlockWrapper($config) { - $block_wrapper = $config->get('HTML', 'BlockWrapper'); + function setupBlockWrapper() { + $block_wrapper = $this->config->get('HTML', 'BlockWrapper'); if (isset($this->info_content_sets['Block'][$block_wrapper])) { $this->info_block_wrapper = $block_wrapper; } else { @@ -382,10 +458,9 @@ class HTMLPurifier_HTMLDefinition /** * Sets up parent of fragment based on config - * @param $config Instance of HTMLPurifier_Config */ - function setupParent($config) { - $parent = $config->get('HTML', 'Parent'); + function setupParent() { + $parent = $this->config->get('HTML', 'Parent'); if (isset($this->info[$parent])) { $this->info_parent = $parent; } else { @@ -399,10 +474,10 @@ class HTMLPurifier_HTMLDefinition * Sets up compat code from HTMLDefinition that has not been * delegated to modules yet */ - function setupCompat($config) { + function setupCompat() { // deprecated config setting, implement in DisableURI module - if ($config->get('Attr', 'DisableURI')) { + if ($this->config->get('Attr', 'DisableURI')) { $this->info['a']->attr['href'] = $this->info['img']->attr['longdesc'] = $this->info['del']->attr['cite'] = @@ -413,13 +488,13 @@ class HTMLPurifier_HTMLDefinition } // setup allowed elements, SubtractiveWhitelist module - $allowed_elements = $config->get('HTML', 'AllowedElements'); + $allowed_elements = $this->config->get('HTML', 'AllowedElements'); if (is_array($allowed_elements)) { foreach ($this->info as $name => $d) { if(!isset($allowed_elements[$name])) unset($this->info[$name]); } } - $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); if (is_array($allowed_attributes)) { foreach ($this->info_global_attr as $attr_key => $info) { if (!isset($allowed_attributes["*.$attr_key"])) { diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index 6538952b..a8de3f11 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -95,17 +95,22 @@ class HTMLPurifier_HTMLModule * Hook method that lets module perform arbitrary operations on * HTMLPurifier_HTMLDefinition before the module gets processed. * @param $definition Reference to HTMLDefinition being setup - * @param $config Instance of HTMLPurifier_Config */ - function preProcess(&$definition, $config) {} + function preProcess(&$definition) {} /** * Hook method that lets module perform arbitrary operations * on HTMLPurifier_HTMLDefinition after the module gets processed. * @param $definition Reference to HTMLDefinition being setup - * @param $config Instance of HTMLPurifier_Config */ - function postProcess(&$definition, $config) {} + function postProcess(&$definition) {} + + /** + * Hook method that is called when a module gets registered to + * the definition. + * @param $definition Reference to HTMLDefinition being setup + */ + function setup(&$definition) {} } diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php index 6623b6fe..933f77fd 100644 --- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php +++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -31,7 +31,9 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule 'lang' => false // placeholder ); - function HTMLPurifier_HTMLModule_TransformToStrict($config) { + function HTMLPurifier_HTMLModule_TransformToStrict(&$definition) { + $config = $definition->config; + // deprecated tag transforms $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php index e04ac416..7283700f 100644 --- a/tests/HTMLPurifier/ConfigTest.php +++ b/tests/HTMLPurifier/ConfigTest.php @@ -216,7 +216,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase } - function test_getDefinition() { + function test_getHTMLDefinition() { // we actually want to use the old copy, because the definition // generation routines have dependencies on configuration values @@ -224,12 +224,41 @@ class HTMLPurifier_ConfigTest extends UnitTestCase $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy); $config = HTMLPurifier_Config::createDefault(); - $def = $config->getHTMLDefinition(); - $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); $def = $config->getCSSDefinition(); $this->assertIsA($def, 'HTMLPurifier_CSSDefinition'); + $def = $config->getHTMLDefinition(); + $def2 = $config->getHTMLDefinition(); + $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); + $this->assertEqual($def, $def2); + $this->assertTrue($def->setup); + + // test re-calculation if HTML changes + $config->set('HTML', 'Strict', true); + $def = $config->getHTMLDefinition(); + $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); + $this->assertNotEqual($def, $def2); + $this->assertTrue($def->setup); + + // test retrieval of raw definition + $def =& $config->getHTMLDefinition(true); + $this->assertNotEqual($def, $def2); + $this->assertFalse($def->setup); + + // auto initialization + $config->getHTMLDefinition(); + $this->assertTrue($def->setup); + + } + + function test_getCSSDefinition() { + $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy); + + $config = HTMLPurifier_Config::createDefault(); + + $def = $config->getCSSDefinition(); + $this->assertIsA($def, 'HTMLPurifier_CSSDefinition'); } function test_loadArray() {