diff --git a/docs/enduser-customize.html b/docs/enduser-customize.html new file mode 100644 index 00000000..6821e561 --- /dev/null +++ b/docs/enduser-customize.html @@ -0,0 +1,403 @@ + + +
+ + + + ++ You may have heard of the Advanced API. + If you're interested in reading dry prose and boring functional + specifications, feel free to click that link to get a no-nonsense overview + on the Advanced API. For the rest of us, there's this tutorial. By the time + you're finished reading this, you should have a pretty good idea on + how to implement custom tags and attributes that HTML Purifier may not have. +
+ ++ Before we even write any code, it is paramount to consider whether or + not the code we're writing is necessary or not. HTML Purifier, by default, + contains a large set of elements and attributes: large enough so that + any element or attribute in XHTML 1.0 (and its HTML variant) + that can be safely used by the general public is implemented. +
+ ++ So what needs to be implemented? (Feel free to skip this section if + you know what you want). +
+ ++ All of the modules listed below are based off of the + modularization of + XHTML, which, while technically for XHTML 1.1, is quite a useful + resource. +
+ ++ If you don't recognize it, you probably don't need it. But the curious + can look all of these modules up in the above-mentioned document. Note + that inline scripting comes packaged with HTML Purifier (more on this + later). +
+ ++ We have not implemented the + Ruby module, + which defines a set of tags + for publishing short annotations for text, used mostly in Japanese + and Chinese school texts. +
+ ++ XHTML 2.0 is still a + working draft, so any elements introduced in the + specification have not been implemented and will not be implemented + until we get a recommendation or proposal. Because XHTML 2.0 is + an entirely new markup language, implementing rules for it will be + no easy task. +
+ +
+ HTML 5
+ is a fork of HTML 4.01 by WHATWG, who believed that XHTML 2.0 was headed
+ in the wrong direction. It too is a working draft, and may change
+ drastically before publication, but it should be noted that the
+ canvas
tag has been implemented by many browser vendors.
+
+ There are a number of proprietary tags still in the wild. Many of them + have been documented in ref-proprietary-tags.txt, + but there is currently no implementation for any of them. +
+ ++ There are also a number of other XML languages out there that can + be embedded in HTML documents: two of the most popular are MathML and + SVG, and I frequently get requests to implement these. But they are + expansive, comprehensive specifications, and it would take far too long + to implement them correctly (most systems I've seen go as far + as whitelisting tags and no further; come on, what about nesting!) +
+ ++ Word of warning: HTML Purifier is currently not namespace + aware. +
+ ++ As you may imagine from the details above (don't be abashed if you didn't + read it all: a glance over would have done), there's quite a bit that + HTML Purifier doesn't implement. Recent architectural changes have + allowed HTML Purifier to implement elements and attributes that are not + safe! Don't worry, they won't be activated unless you set %HTML.Trusted + to true, but they certainly help out users who need to put, say, forms + on their page and don't want to go through the trouble of reading this + and implementing it themself. +
+ ++ So any of the above that you implement for your own application could + help out some other poor sap on the other side of the globe. Help us + out, and send back code so that it can be hammered into a module and + released with the core. Any code would be greatly appreciated! +
+ ++ Enough philosophical talk, time for some code: +
+ +$config = HTMLPurifier_Config::createDefault(); +$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML', 'DefinitionRev', 1); +$def =& $config->getHTMLDefinition(true);+ +
+ Assuming that HTML Purifier has already been properly loaded (hint:
+ include HTMLPurifier.auto.php
), this code will set up
+ the environment that you need to start customizing the HTML definition.
+ What's going on?
+
HTMLPurifier_HTMLDefinition
+ object that we will be tweaking. If the parameter was removed, we
+ would be retrieving a fully formed definition object, which is somewhat
+ useless for customization purposes.
+ + Those of you who have already been twiddling around with the raw + HTML definition object, you'll be noticing that you're getting an error + when you attempt to retrieve the raw definition object without specifying + a DefinitionID. It is vital to caching (see below) that you make a unique + name for your customized definition, so make up something right now and + things will operate again. +
+ ++ To make development easier, we're going to temporarily turn off + definition caching: +
+ +$config = HTMLPurifier_Config::createDefault(); +$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML', 'DefinitionRev', 1); +$config->set('Core', 'DefinitionCache', null); // remove this later! +$def =& $config->getHTMLDefinition(true);+ +
+ A few things should be mentioned about the caching mechanism before
+ we move on. For performance reasons, HTML Purifier caches generated
+ HTMLPurifier_Definition
objects in serialized files
+ stored (by default) in library/HTMLPurifier/DefinitionCache/Serializer
.
+ A lot of processing is done in order to create these objects, so it
+ makes little sense to repeat the same processing over and over again
+ whenever HTML Purifier is called.
+
+ In order to identify a cache entry, HTML Purifier uses three variables: + the library's version number, the value of %HTML.DefinitionRev and + a serial of relevant configuration. Whenever any of these changes, + a new HTML definition is generated. Notice that there is no way + for the definition object to track changes to customizations: here, it + is up to you to supply appropriate information to DefinitionID and + DefinitionRev. +
+ +
+ For this example, we're going to implement the target
attribute found
+ on a
elements. To implement an attribute, we have to
+ ask a few questions:
+
+ The first two are easy: the element is a
and the attribute
+ is target
. The third question is a little trickier.
+ Lets allow the special values: _blank, _self, _target and _top.
+ The form of this is called an enumeration, a list of
+ valid values, although only one can be used at a time. To translate
+ this into code form, we write:
+
$config = HTMLPurifier_Config::createDefault(); +$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML', 'DefinitionRev', 1); +$config->set('Core', 'DefinitionCache', null); // remove this later! +$def =& $config->getHTMLDefinition(true); +$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');+ +
+ The Enum#_blank,_self,_target,_top
does all the magic.
+ The string is split into two parts, separated by a hash mark (#):
+
AttrDef
AttrDef
+ If that sounds vague and generic, it's because it is! HTML Purifier defines + an assortment of different attribute types one can use, and each of these + has their own specialized parameter format. Here are some of the more useful + ones: +
+ +Type | +Format | +Description | +
---|---|---|
Enum | +[s:]value1,value2,... | ++ Attribute with a number of valid values, one of which may be used. When + s: is present, the enumeration is case sensitive. + | +
Bool | +attribute_name | ++ Boolean attribute, with only one valid value: the name + of the attribute. + | +
CDATA | ++ | + Attribute of arbitrary text. Can also be referred to as Text + (the specification makes a semantic distinction between the two). + | +
ID | ++ | + Attribute that specifies a unique ID + | +
Pixels | ++ | + Attribute that specifies an integer pixel length + | +
Length | ++ | + Attribute that specifies a pixel or percentage length + | +
NMTOKENS | ++ |
+ Attribute that specifies a number of name tokens, example: the
+ class attribute
+ |
+
URI | ++ |
+ Attribute that specifies a URI, example: the href
+ attribute
+ |
+
Number | ++ | + Attribute that specifies an positive integer number + | +
+ For a complete list, consult library/HTMLPurifier/AttrTypes.php
;
+ more information on attributes that accept parameters can be found on their
+ respective includes in library/HTMLPurifier/AttrDef
.
+
+ Sometimes, the restrictive list in AttrTypes just doesn't cut it. Don't + sweat: you can also use a fully instantiated object as the value. The + equivalent, verbose form of the above example is: +
+ +$config = HTMLPurifier_Config::createDefault(); +$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); +$config->set('HTML', 'DefinitionRev', 1); +$config->set('Core', 'DefinitionCache', null); // remove this later! +$def =& $config->getHTMLDefinition(true); +$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum( + array('_blank','_self','_target','_top') +));+ +
+ Trust me, you'll learn to love the shorthand. +
+ ++ To be written... +
+ ++ Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. This directive has been available + since 1.7.0. +
+'); + /** * Defines allowed CSS attributes and what their values are. * @see HTMLPurifier_HTMLDefinition diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 6e078eb7..cf5eb66f 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -43,16 +43,6 @@ class HTMLPurifier_Config */ var $version = '1.6.1'; - /** - * Integer key users can use to indicate they have manually - * overridden some internal behavior and would like the - * cache to invalidate itself. This is used in conjunction - * with the %NAMESPACE.DefinitionID, the semantic difference is - * that the configuration directive determines "semantic identity", - * while this integer indicates "chronological identity". - */ - var $revision = 1; - /** * Two-level associative array of configuration directives */ diff --git a/library/HTMLPurifier/DefinitionCache.php b/library/HTMLPurifier/DefinitionCache.php index bf3f9896..81cd7b33 100644 --- a/library/HTMLPurifier/DefinitionCache.php +++ b/library/HTMLPurifier/DefinitionCache.php @@ -36,7 +36,7 @@ class HTMLPurifier_DefinitionCache */ function generateKey($config) { return $config->version . '-' . // possibly replace with function calls - $config->revision . '-' . + $config->get($this->type, 'DefinitionRev') . '-' . $config->getBatchSerial($this->type); } @@ -50,7 +50,7 @@ class HTMLPurifier_DefinitionCache list($version, $revision, $hash) = explode('-', $key, 3); $compare = version_compare($version, $config->version); if ($compare > 0) return false; - if ($compare == 0 && $revision >= $config->revision) return false; + if ($compare == 0 && $revision >= $config->get($this->type, 'DefinitionRev')) return false; return true; } diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index b621fb6f..c998aed6 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -37,6 +37,19 @@ $def->addAttribute(\'a\', \'tabindex\', \'Number\'); '); +HTMLPurifier_ConfigSchema::define( + 'HTML', 'DefinitionRev', 1, 'int', ' ++ Revision identifier for your custom definition specified in + %HTML.DefinitionID. This serves the same purpose: uniquely identifying + your custom definition, but this one does so in a chronological + context: revision 3 is more up-to-date then revision 2. Thus, when + this gets incremented, the cache handling is smart enough to clean + up any older revisions of your definition as well as flush the + cache. This directive has been available since 1.7.0. +
+'); + HTMLPurifier_ConfigSchema::define( 'HTML', 'BlockWrapper', 'p', 'string', 'diff --git a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php index 20d877d0..99867b03 100644 --- a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php +++ b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php @@ -11,10 +11,10 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $cache = new HTMLPurifier_DefinitionCache_Serializer('Test'); $config = $this->generateConfigMock('serial'); + $config->setReturnValue('get', 2, array('Test', 'DefinitionRev')); $config->version = '1.0.0'; - $config->revision = 2; - $config_md5 = '1.0.0-' . $config->revision . '-serial'; + $config_md5 = '1.0.0-2-serial'; $file = realpath( $rel_file = dirname(__FILE__) . @@ -120,12 +120,12 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $config1 = $this->generateConfigMock(); $config1->version = '0.9.0'; - $config1->revision = 574; + $config1->setReturnValue('get', 574, array('Test', 'DefinitionRev')); $def1 = $this->generateDefinition(array('info' => 1)); $config2 = $this->generateConfigMock(); $config2->version = '1.0.0beta'; - $config2->revision = 1; + $config2->setReturnValue('get', 1, array('Test', 'DefinitionRev')); $def2 = $this->generateDefinition(array('info' => 3)); $cache->set($def1, $config1); @@ -158,7 +158,7 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $cache = new HTMLPurifier_DefinitionCache_Serializer('Test'); $config = $this->generateConfigMock('serial'); $config->version = '1.0.0'; - $config->revision = 1; + $config->setReturnValue('get', 1, array('Test', 'DefinitionRev')); $dir = dirname(__FILE__) . '/SerializerTest'; $config->setReturnValue('get', $dir, array('Cache', 'SerializerPath')); diff --git a/tests/HTMLPurifier/DefinitionCacheHarness.php b/tests/HTMLPurifier/DefinitionCacheHarness.php index ebc15d68..efcb49a4 100644 --- a/tests/HTMLPurifier/DefinitionCacheHarness.php +++ b/tests/HTMLPurifier/DefinitionCacheHarness.php @@ -13,7 +13,6 @@ class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase $config = new HTMLPurifier_ConfigMock($this); $config->setReturnValue('getBatchSerial', $serial, array('Test')); $config->version = '1.0.0'; - $config->revision = 1; return $config; } diff --git a/tests/HTMLPurifier/DefinitionCacheTest.php b/tests/HTMLPurifier/DefinitionCacheTest.php index eb78a11c..702712ff 100644 --- a/tests/HTMLPurifier/DefinitionCacheTest.php +++ b/tests/HTMLPurifier/DefinitionCacheTest.php @@ -8,9 +8,16 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase function test_isOld() { $cache = new HTMLPurifier_DefinitionCache('Test'); // non-functional + $old_copy = HTMLPurifier_ConfigSchema::instance(); + $o = new HTMLPurifier_ConfigSchema(); + HTMLPurifier_ConfigSchema::instance($o); + + HTMLPurifier_ConfigSchema::defineNamespace('Test', 'Test namespace'); + HTMLPurifier_ConfigSchema::define('Test', 'DefinitionRev', 1, 'int', 'Definition revision.'); + $config = HTMLPurifier_Config::createDefault(); $config->version = '1.0.0'; - $config->revision = 10; + $config->set('Test', 'DefinitionRev', 10); $this->assertIdentical($cache->isOld('1.0.0-10-hashstuffhere', $config), false); $this->assertIdentical($cache->isOld('1.5.0-1-hashstuffhere', $config), false); @@ -18,6 +25,9 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase $this->assertIdentical($cache->isOld('0.9.0-1-hashstuffhere', $config), true); $this->assertIdentical($cache->isOld('1.0.0-1-hashstuffhere', $config), true); $this->assertIdentical($cache->isOld('1.0.0beta-11-hashstuffhere', $config), true); + + HTMLPurifier_ConfigSchema::instance($old_copy); + } }