0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-10 16:01:53 +00:00
- More framework work (modules now are treated first class)
- Config will regenerate definitions when appropriate entries are set
- Add HTMLModule->setup for pre-processing stuff
- Constructor receives $definition not $config
- Config rolled inside $definition->config until end of setup()

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@741 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-14 01:44:06 +00:00
parent 94d2dbaa74
commit cac22f01cf
5 changed files with 183 additions and 71 deletions

View File

@ -149,6 +149,13 @@ class HTMLPurifier_Config
return; return;
} }
$this->conf[$namespace][$key] = $value; $this->conf[$namespace][$key] = $value;
if ($namespace == 'HTML' || $namespace == 'Attr') {
// reset HTML definition if relevant attributes changed
$this->html_definition = null;
}
if ($namespace == 'CSS') {
$this->css_definition = null;
}
} }
/** /**
@ -157,20 +164,14 @@ class HTMLPurifier_Config
* called before it's been setup, otherwise won't work. * called before it's been setup, otherwise won't work.
*/ */
function &getHTMLDefinition($raw = false) { function &getHTMLDefinition($raw = false) {
if ($this->html_definition === null) { if (
empty($this->html_definition) || // hasn't ever been setup
($raw && $this->html_definition->setup) // requesting new one
) {
$this->html_definition = new HTMLPurifier_HTMLDefinition($this); $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
if ($raw) { if ($raw) return $this->html_definition; // no setup!
return $this->html_definition; // no setup!
}
$this->html_definition->setup($this);
}
if ($raw && $this->html_definition->setup) {
trigger_error('HTMLDefinition already setup, overwriting old '.
'definition (set $config->definition manually to null '.
'if this is desired behavior).', E_USER_NOTICE);
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
return $this->html_definition;
} }
if (!$this->html_definition->setup) $this->html_definition->setup();
return $this->html_definition; return $this->html_definition;
} }

View File

@ -38,6 +38,8 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php';
// this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'EnableAttrID', false, 'bool', 'HTML', 'EnableAttrID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '. 'Allows the ID attribute in HTML. This is disabled by default '.
@ -112,9 +114,6 @@ HTMLPurifier_ConfigSchema::define(
* Definition of the purified HTML that describes allowed children, * Definition of the purified HTML that describes allowed children,
* attributes, and many other things. * attributes, and many other things.
* *
* @note This is the next-gen definition that will be renamed to
* HTMLDefinition soon!
*
* Conventions: * Conventions:
* *
* All member variables that are prefixed with info * All member variables that are prefixed with info
@ -127,7 +126,6 @@ HTMLPurifier_ConfigSchema::define(
* internally by the HTMLDefinition and MUST NOT be used by other HTML * internally by the HTMLDefinition and MUST NOT be used by other HTML
* Purifier internals. Many of them, however, are public, and may be * Purifier internals. Many of them, however, are public, and may be
* edited by userspace code to tweak the behavior of HTMLDefinition. * edited by userspace code to tweak the behavior of HTMLDefinition.
* In practice, there will not be too many of them.
* *
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
* rule: in the interest of comprehensiveness, it will sniff everything. * rule: in the interest of comprehensiveness, it will sniff everything.
@ -205,11 +203,25 @@ class HTMLPurifier_HTMLDefinition
var $strict; var $strict;
/** /**
* Array of HTMLPurifier_Module instances, indexed by module name * Array of HTMLPurifier_Module instances, indexed by module's class name
* @public * @public
*/ */
var $modules = array(); var $modules = array();
/**
* Associative array of module class name to module order keywords or
* numbers (keyword is preferred, all keywords are resolved at beginning
* of setup())
* @public
*/
var $modules_order = array();
/**
* List of prefixes HTML Purifier should try to resolve short names to.
* @public
*/
var $module_prefixes = array('HTMLPurifier_HTMLModule_');
/** /**
* Instance of HTMLPurifier_AttrTypes * Instance of HTMLPurifier_AttrTypes
* @public * @public
@ -223,7 +235,7 @@ class HTMLPurifier_HTMLDefinition
var $attr_collections; var $attr_collections;
/** /**
* Is setup? * Has setup() been called yet?
* @public * @public
*/ */
var $setup = false; var $setup = false;
@ -234,45 +246,89 @@ class HTMLPurifier_HTMLDefinition
*/ */
var $content_sets; var $content_sets;
/**
* Lookup table of module order "names" and an integer index
* @public
*/
var $order_keywords = array(
'setup' => 10,
'early' => 20,
'main' => 30,
'late' => 40,
'cleanup' => 50,
);
/**
* Temporary instance of HTMLPurifier_Config for convenience reasons,
* is removed after setup().
* @public
*/
var $config;
/** /**
* Performs low-cost, preliminary initialization. * Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function HTMLPurifier_HTMLDefinition($config) { function HTMLPurifier_HTMLDefinition(&$config) {
// setup some cached config variables $this->config =& $config;
// this will eventually influence module loading
// set up public internals
$this->strict = $config->get('HTML', 'Strict'); $this->strict = $config->get('HTML', 'Strict');
// order is important!
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
$this->modules['List'] = new HTMLPurifier_HTMLModule_List();
$this->modules['Presentation'] = new HTMLPurifier_HTMLModule_Presentation();
$this->modules['Edit'] = new HTMLPurifier_HTMLModule_Edit();
$this->modules['Bdo'] = new HTMLPurifier_HTMLModule_Bdo();
$this->modules['Tables'] = new HTMLPurifier_HTMLModule_Tables();
$this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
$this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
$this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config);
if (!$this->strict) $this->modules['Legacy'] = new HTMLPurifier_HTMLModule_Legacy($config);
$this->attr_types = new HTMLPurifier_AttrTypes(); $this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections(); $this->attr_collections = new HTMLPurifier_AttrCollections();
$this->content_sets = new HTMLPurifier_ContentSets(); $this->content_sets = new HTMLPurifier_ContentSets();
// some compat stuff, will be factored to modules // modules
// remove ID module // main
$main_modules = array('Text', 'Hypertext', 'List', 'Presentation',
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute');
foreach ($main_modules as $module) $this->addModule($module, 'main');
// late
if (!$this->strict) $this->addModule('Legacy', 'late');
// cleanup
$this->addModule('TransformToStrict', 'cleanup');
// remove ID module (refactor to module)
if (!$config->get('HTML', 'EnableAttrID')) { if (!$config->get('HTML', 'EnableAttrID')) {
$this->attr_collections->info['Core']['id'] = false; $this->attr_collections->info['Core']['id'] = false;
} }
} }
/**
* Adds a module to the ordered list.
* @param $module Mixed: string module name, with or without
* HTMLPurifier_HTMLModule prefix, or instance of
* subclass of HTMLPurifier_HTMLModule.
*/
function addModule($module, $order = 'main') {
if (is_string($module)) {
$original_module = $module;
if (!class_exists($module)) {
foreach ($this->module_prefixes as $prefix) {
$module = $prefix . $original_module;
if (class_exists($module)) break;
}
}
if (!class_exists($module)) {
trigger_error($original_module . ' module does not exist', E_USER_ERROR);
return;
}
$module = new $module($this);
}
if (!isset($this->order_keywords[$order])) {
trigger_error('Order keyword does not exist', E_USER_ERROR);
return;
}
$name = strtolower(get_class($module));
$this->modules[$name] = $module;
$this->modules_order[$name] = $order;
}
/** /**
* Processes internals into form usable by HTMLPurifier internals. * Processes internals into form usable by HTMLPurifier internals.
@ -280,33 +336,55 @@ class HTMLPurifier_HTMLDefinition
* be done. * be done.
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function setup($config) { function setup() {
// multiple call guard // multiple call guard
if ($this->setup) {return;} else {$this->setup = true;} if ($this->setup) {return;} else {$this->setup = true;}
// would be nice if we could put each of these in their $this->processModules();
// own object, would make this hookable too! $this->setupAttrTransform();
$this->processModules($config); $this->setupBlockWrapper();
$this->setupAttrTransform($config); $this->setupParent();
$this->setupBlockWrapper($config); $this->setupCompat();
$this->setupParent($config);
$this->setupCompat($config); unset($this->config);
} }
/** /**
* Processes the modules, setting up related info variables * Processes the modules, setting up related info variables
* @param $config Instance of HTMLPurifier_Config
*/ */
function processModules($config) { function processModules() {
// substitute out the order keywords
foreach ($this->modules_order as $name => $order) {
if (empty($this->modules[$name])) {
trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR);
return;
}
if (is_int($order)) continue;
if (empty($this->order_keywords[$order])) {
trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR);
return;
}
$this->modules_order[$name] = $this->order_keywords[$order];
}
// sort modules member variable
array_multisort(
$this->modules_order, SORT_ASC, SORT_NUMERIC,
$this->modules
);
// setup the global registries
$this->attr_collections->setup($this->attr_types, $this->modules); $this->attr_collections->setup($this->attr_types, $this->modules);
$this->content_sets->setup($this->modules); $this->content_sets->setup($this->modules);
$this->info_content_sets = $this->content_sets->lookup; $this->info_content_sets = $this->content_sets->lookup;
// process the modules
foreach ($this->modules as $module_i => $module) { foreach ($this->modules as $module_i => $module) {
$module->preProcess($this, $config); $module->preProcess($this);
// process element-wise definitions // process element-wise definitions
foreach ($module->info as $name => $def) { foreach ($module->info as $name => $def) {
@ -352,7 +430,7 @@ class HTMLPurifier_HTMLDefinition
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
$module->postProcess($this, $config); $module->postProcess($this);
} }
@ -360,18 +438,16 @@ class HTMLPurifier_HTMLDefinition
/** /**
* Sets up attribute transformations * Sets up attribute transformations
* @param $config Instance of HTMLPurifier_Config
*/ */
function setupAttrTransform($config) { function setupAttrTransform() {
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
} }
/** /**
* Sets up block wrapper based on config * Sets up block wrapper based on config
* @param $config Instance of HTMLPurifier_Config
*/ */
function setupBlockWrapper($config) { function setupBlockWrapper() {
$block_wrapper = $config->get('HTML', 'BlockWrapper'); $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) { if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper; $this->info_block_wrapper = $block_wrapper;
} else { } else {
@ -382,10 +458,9 @@ class HTMLPurifier_HTMLDefinition
/** /**
* Sets up parent of fragment based on config * Sets up parent of fragment based on config
* @param $config Instance of HTMLPurifier_Config
*/ */
function setupParent($config) { function setupParent() {
$parent = $config->get('HTML', 'Parent'); $parent = $this->config->get('HTML', 'Parent');
if (isset($this->info[$parent])) { if (isset($this->info[$parent])) {
$this->info_parent = $parent; $this->info_parent = $parent;
} else { } else {
@ -399,10 +474,10 @@ class HTMLPurifier_HTMLDefinition
* Sets up compat code from HTMLDefinition that has not been * Sets up compat code from HTMLDefinition that has not been
* delegated to modules yet * delegated to modules yet
*/ */
function setupCompat($config) { function setupCompat() {
// deprecated config setting, implement in DisableURI module // deprecated config setting, implement in DisableURI module
if ($config->get('Attr', 'DisableURI')) { if ($this->config->get('Attr', 'DisableURI')) {
$this->info['a']->attr['href'] = $this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] = $this->info['img']->attr['longdesc'] =
$this->info['del']->attr['cite'] = $this->info['del']->attr['cite'] =
@ -413,13 +488,13 @@ class HTMLPurifier_HTMLDefinition
} }
// setup allowed elements, SubtractiveWhitelist module // setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $config->get('HTML', 'AllowedElements'); $allowed_elements = $this->config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) { if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) { foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]); if(!isset($allowed_elements[$name])) unset($this->info[$name]);
} }
} }
$allowed_attributes = $config->get('HTML', 'AllowedAttributes'); $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) { if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) { foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) { if (!isset($allowed_attributes["*.$attr_key"])) {

View File

@ -95,17 +95,22 @@ class HTMLPurifier_HTMLModule
* Hook method that lets module perform arbitrary operations on * Hook method that lets module perform arbitrary operations on
* HTMLPurifier_HTMLDefinition before the module gets processed. * HTMLPurifier_HTMLDefinition before the module gets processed.
* @param $definition Reference to HTMLDefinition being setup * @param $definition Reference to HTMLDefinition being setup
* @param $config Instance of HTMLPurifier_Config
*/ */
function preProcess(&$definition, $config) {} function preProcess(&$definition) {}
/** /**
* Hook method that lets module perform arbitrary operations * Hook method that lets module perform arbitrary operations
* on HTMLPurifier_HTMLDefinition after the module gets processed. * on HTMLPurifier_HTMLDefinition after the module gets processed.
* @param $definition Reference to HTMLDefinition being setup * @param $definition Reference to HTMLDefinition being setup
* @param $config Instance of HTMLPurifier_Config
*/ */
function postProcess(&$definition, $config) {} function postProcess(&$definition) {}
/**
* Hook method that is called when a module gets registered to
* the definition.
* @param $definition Reference to HTMLDefinition being setup
*/
function setup(&$definition) {}
} }

View File

@ -31,7 +31,9 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
'lang' => false // placeholder 'lang' => false // placeholder
); );
function HTMLPurifier_HTMLModule_TransformToStrict($config) { function HTMLPurifier_HTMLModule_TransformToStrict(&$definition) {
$config = $definition->config;
// deprecated tag transforms // deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');

View File

@ -216,7 +216,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
} }
function test_getDefinition() { function test_getHTMLDefinition() {
// we actually want to use the old copy, because the definition // we actually want to use the old copy, because the definition
// generation routines have dependencies on configuration values // generation routines have dependencies on configuration values
@ -224,12 +224,41 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
$this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy); $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy);
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$def = $config->getHTMLDefinition();
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$def = $config->getCSSDefinition(); $def = $config->getCSSDefinition();
$this->assertIsA($def, 'HTMLPurifier_CSSDefinition'); $this->assertIsA($def, 'HTMLPurifier_CSSDefinition');
$def = $config->getHTMLDefinition();
$def2 = $config->getHTMLDefinition();
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$this->assertEqual($def, $def2);
$this->assertTrue($def->setup);
// test re-calculation if HTML changes
$config->set('HTML', 'Strict', true);
$def = $config->getHTMLDefinition();
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$this->assertNotEqual($def, $def2);
$this->assertTrue($def->setup);
// test retrieval of raw definition
$def =& $config->getHTMLDefinition(true);
$this->assertNotEqual($def, $def2);
$this->assertFalse($def->setup);
// auto initialization
$config->getHTMLDefinition();
$this->assertTrue($def->setup);
}
function test_getCSSDefinition() {
$this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy);
$config = HTMLPurifier_Config::createDefault();
$def = $config->getCSSDefinition();
$this->assertIsA($def, 'HTMLPurifier_CSSDefinition');
} }
function test_loadArray() { function test_loadArray() {