From 32025a12e195a36a575f207d9184f4c7f72fa1f4 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 9 Jun 2008 01:23:05 +0000 Subject: [PATCH] [3.1.1] Allow injectors to be specified by modules. - Make method for URI implemented - Split out checkNeeded in Injector from prepare git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1779 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 4 ++ library/HTMLPurifier/AttrDef/URI.php | 5 ++ library/HTMLPurifier/HTMLDefinition.php | 23 +++++++- library/HTMLPurifier/HTMLModule.php | 8 +++ library/HTMLPurifier/HTMLModuleManager.php | 13 +++++ library/HTMLPurifier/Injector.php | 38 ++++++++---- .../HTMLPurifier/Strategy/MakeWellFormed.php | 5 ++ tests/HTMLPurifier/AttrDef/URITest.php | 11 ++++ tests/HTMLPurifier/HTMLDefinitionTest.php | 58 +++++++++++++++++++ tests/HTMLPurifier/HTMLModuleManagerTest.php | 16 ++++- 10 files changed, 165 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index c180fd17..e34a4acd 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier for sponsoring this feature. ! Implemented post URI filtering. Set member variable $post to true to set a URIFilter as such. +! Allow modules to define injectors via $info_injector. Injectors are + automatically disabled if injector's needed elements are not found. - Disable percent height/width attributes for img - AttrValidator operations are now atomic; updates to attributes are not manifest in token until end of operations. This prevents naughty internal @@ -61,6 +63,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . HTML/CSS Printers must be primed with prepareGenerator($gen_config), otherwise fatal errors will ensue. . URIFilter->prepare can return false in order to abort loading of the filter +. Factory for AttrDef_URI implemented, URI#embedded to indicate URI that embeds + an external resource. 3.1.0, released 2008-05-18 # Unnecessary references to objects (vestiges of PHP4) removed from method diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 99bdbcc3..f63edcef 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -18,6 +18,11 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $this->embedsResource = (bool) $embeds_resource; } + public function make($string) { + $embeds = (bool) $string; + return new HTMLPurifier_AttrDef_URI($embeds); + } + public function validate($uri, $config, $context) { if ($config->get('URI', 'Disable')) return false; diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index ce299f79..e647228c 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -76,6 +76,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition */ public $info_content_sets = array(); + /** + * Indexed list of HTMLPurifier_Injector to be used. + */ + public $info_injector = array(); + /** * Doctype object */ @@ -186,18 +191,22 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition $this->doctype = $this->manager->doctype; foreach ($this->manager->modules as $module) { - foreach($module->info_tag_transform as $k => $v) { + foreach($module->info_tag_transform as $k => $v) { if ($v === false) unset($this->info_tag_transform[$k]); else $this->info_tag_transform[$k] = $v; } - foreach($module->info_attr_transform_pre as $k => $v) { + foreach($module->info_attr_transform_pre as $k => $v) { if ($v === false) unset($this->info_attr_transform_pre[$k]); else $this->info_attr_transform_pre[$k] = $v; } - foreach($module->info_attr_transform_post as $k => $v) { + foreach($module->info_attr_transform_post as $k => $v) { if ($v === false) unset($this->info_attr_transform_post[$k]); else $this->info_attr_transform_post[$k] = $v; } + foreach ($module->info_injector as $k => $v) { + if ($v === false) unset($this->info_injector[$k]); + else $this->info_injector[$k] = $v; + } } $this->info = $this->manager->getElements(); @@ -356,6 +365,14 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition } } + // setup injectors ----------------------------------------------------- + foreach ($this->info_injector as $i => $injector) { + if ($injector->checkNeeded($config) !== false) { + // remove injector that does not have it's required + // elements/attributes present, and is thus not needed. + unset($this->info_injector[$i]); + } + } } /** diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index a6241f06..22af1546 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -71,6 +71,14 @@ class HTMLPurifier_HTMLModule */ public $info_attr_transform_post = array(); + /** + * List of HTMLPurifier_Injector to be performed during well-formedness fixing. + * An injector will only be invoked if all of it's pre-requisites are met; + * if an injector fails setup, there will be no error; it will simply be + * silently disabled. + */ + public $info_injector = array(); + /** * Boolean flag that indicates whether or not getChildDef is implemented. * For optimization reasons: may save a call to a function. Be sure diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index 1a012fe5..d1620e37 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -231,6 +231,19 @@ class HTMLPurifier_HTMLModuleManager $this->modules[$module]->setup($config); } + // prepare any injectors + foreach ($this->modules as $module) { + $n = array(); + foreach ($module->info_injector as $i => $injector) { + if (!is_object($injector)) { + $class = "HTMLPurifier_Injector_$injector"; + $injector = new $class; + } + $n[$injector->name] = $injector; + } + $module->info_injector = $n; + } + // setup lookup table based on all valid modules foreach ($this->modules as $module) { foreach ($module->info as $name => $def) { diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php index c9f9f2eb..1947c340 100644 --- a/library/HTMLPurifier/Injector.php +++ b/library/HTMLPurifier/Injector.php @@ -58,29 +58,45 @@ abstract class HTMLPurifier_Injector * Prepares the injector by giving it the config and context objects: * this allows references to important variables to be made within * the injector. This function also checks if the HTML environment - * will work with the Injector: if p tags are not allowed, the - * Auto-Paragraphing injector should not be enabled. + * will work with the Injector (see checkNeeded()). * @param $config Instance of HTMLPurifier_Config * @param $context Instance of HTMLPurifier_Context * @return Boolean false if success, string of missing needed element/attribute if failure */ public function prepare($config, $context) { $this->htmlDefinition = $config->getHTMLDefinition(); - // perform $needed checks - foreach ($this->needed as $element => $attributes) { - if (is_int($element)) $element = $attributes; - if (!isset($this->htmlDefinition->info[$element])) return $element; - if (!is_array($attributes)) continue; - foreach ($attributes as $name) { - if (!isset($this->htmlDefinition->info[$element]->attr[$name])) return "$element.$name"; - } - } + // Even though this might fail, some unit tests ignore this and + // still test checkNeeded, so be careful. Maybe get rid of that + // dependency. + $result = $this->checkNeeded($config); + if ($result !== false) return $result; $this->currentNesting =& $context->get('CurrentNesting'); $this->inputTokens =& $context->get('InputTokens'); $this->inputIndex =& $context->get('InputIndex'); return false; } + /** + * This function checks if the HTML environment + * will work with the Injector: if p tags are not allowed, the + * Auto-Paragraphing injector should not be enabled. + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Boolean false if success, string of missing needed element/attribute if failure + */ + public function checkNeeded($config) { + $def = $config->getHTMLDefinition(); + foreach ($this->needed as $element => $attributes) { + if (is_int($element)) $element = $attributes; + if (!isset($def->info[$element])) return $element; + if (!is_array($attributes)) continue; + foreach ($attributes as $name) { + if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; + } + } + return false; + } + /** * Tests if the context node allows a certain element * @param $name Name of element to test for diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index fcbb75c0..1ca62711 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -38,6 +38,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->injectors = array(); $injectors = $config->getBatch('AutoFormat'); + $def_injectors = $definition->info_injector; $custom_injectors = $injectors['Custom']; unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { @@ -45,6 +46,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if (!$b) continue; $this->injectors[] = new $injector; } + foreach ($def_injectors as $injector) { + // assumed to be objects + $this->injectors[] = $injector; + } foreach ($custom_injectors as $injector) { if (is_string($injector)) { $injector = "HTMLPurifier_Injector_$injector"; diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 8e65c73b..9e83412a 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -107,6 +107,17 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness HTMLPurifier_DefinitionCacheFactory::instance($old); } + function test_make() { + $factory = new HTMLPurifier_AttrDef_URI(); + $def = $factory->make(''); + $def2 = new HTMLPurifier_AttrDef_URI(); + $this->assertIdentical($def, $def2); + + $def = $factory->make('embedded'); + $def2 = new HTMLPurifier_AttrDef_URI(true); + $this->assertIdentical($def, $def2); + } + /* function test_validate_configWhitelist() { diff --git a/tests/HTMLPurifier/HTMLDefinitionTest.php b/tests/HTMLPurifier/HTMLDefinitionTest.php index b64c60a4..fd33609a 100644 --- a/tests/HTMLPurifier/HTMLDefinitionTest.php +++ b/tests/HTMLPurifier/HTMLDefinitionTest.php @@ -287,7 +287,65 @@ a[href|title] } + function test_injector() { + $this->config->set('HTML', 'DefinitionID', 'HTMLPurifier_HTMLDefinitionTest->test_injector'); + + generate_mock_once('HTMLPurifier_Injector'); + $injector = new HTMLPurifier_InjectorMock(); + $injector->name = 'MyInjector'; + $injector->setReturnValue('checkNeeded', false); + + $module = $this->config->getHTMLDefinition(true)->getAnonymousModule(); + $module->info_injector[] = $injector; + + $this->assertIdentical($this->config->getHTMLDefinition()->info_injector, + array( + 'MyInjector' => $injector, + ) + ); + } + function test_injectorMissingNeeded() { + $this->config->set('HTML', 'DefinitionID', 'HTMLPurifier_HTMLDefinitionTest->test_injectorMissingNeeded'); + + generate_mock_once('HTMLPurifier_Injector'); + $injector = new HTMLPurifier_InjectorMock(); + $injector->name = 'MyInjector'; + $injector->setReturnValue('checkNeeded', 'a'); + + $module = $this->config->getHTMLDefinition(true)->getAnonymousModule(); + $module->info_injector[] = $injector; + + $this->assertIdentical($this->config->getHTMLDefinition()->info_injector, + array() + ); + } + + function test_injectorIntegration() { + $this->config->set('HTML', 'DefinitionID', 'HTMLPurifier_HTMLDefinitionTest->test_injectorIntegration'); + + $module = $this->config->getHTMLDefinition(true)->getAnonymousModule(); + $module->info_injector[] = 'Linkify'; + + $this->assertIdentical( + $this->config->getHTMLDefinition()->info_injector, + array('Linkify' => new HTMLPurifier_Injector_Linkify()) + ); + } + + function test_injectorIntegrationFail() { + $this->config->set('HTML', 'DefinitionID', 'HTMLPurifier_HTMLDefinitionTest->test_injectorIntegrationFail'); + + $this->config->set('HTML', 'Allowed', 'p'); + + $module = $this->config->getHTMLDefinition(true)->getAnonymousModule(); + $module->info_injector[] = 'Linkify'; + + $this->assertIdentical( + $this->config->getHTMLDefinition()->info_injector, + array() + ); + } } diff --git a/tests/HTMLPurifier/HTMLModuleManagerTest.php b/tests/HTMLPurifier/HTMLModuleManagerTest.php index 1aeed0b6..2daa734c 100644 --- a/tests/HTMLPurifier/HTMLModuleManagerTest.php +++ b/tests/HTMLPurifier/HTMLModuleManagerTest.php @@ -3,9 +3,11 @@ class HTMLPurifier_HTMLModuleManagerTest extends HTMLPurifier_Harness { - function test_addModule() { + protected function createManager() { $manager = new HTMLPurifier_HTMLModuleManager(); - $manager->doctypes->register('Blank'); // doctype normally is blank... + + $this->config->set('HTML', 'CustomDoctype', 'Blank'); + $manager->doctypes->register('Blank'); $attrdef_nmtokens = new HTMLPurifier_AttrDef_HTML_Nmtokens(); @@ -13,6 +15,12 @@ class HTMLPurifier_HTMLModuleManagerTest extends HTMLPurifier_Harness $attrdef = new HTMLPurifier_AttrDefMock(); $attrdef->setReturnValue('make', $attrdef_nmtokens); $manager->attrTypes->set('NMTOKENS', $attrdef); + return $manager; + } + + function test_addModule() { + + $manager = $this->createManager(); // ...but we add user modules @@ -44,6 +52,8 @@ class HTMLPurifier_HTMLModuleManagerTest extends HTMLPurifier_Harness $manager->setup($config); + $attrdef_nmtokens = new HTMLPurifier_AttrDef_HTML_Nmtokens(); + $p = new HTMLPurifier_ElementDef(); $p->attr['class'] = $attrdef_nmtokens; $p->child = new HTMLPurifier_ChildDef_Optional(array('em', '#PCDATA')); @@ -103,5 +113,7 @@ class HTMLPurifier_HTMLModuleManagerTest extends HTMLPurifier_Harness } + + }