diff --git a/docs/enduser-customize.html b/docs/enduser-customize.html new file mode 100644 index 00000000..6821e561 --- /dev/null +++ b/docs/enduser-customize.html @@ -0,0 +1,403 @@ + + + + + + + +Customize - HTML Purifier + + + +

Customize!

+
HTML Purifier is a Swiss-Army Knife
+ +
Filed under End-User
+
Return to the index.
+
HTML Purifier End-User Documentation
+ +
+ This document covers currently unreleased functionality and + only applies to recent SVN checkouts. +
+ +

+ You may have heard of the Advanced API. + If you're interested in reading dry prose and boring functional + specifications, feel free to click that link to get a no-nonsense overview + on the Advanced API. For the rest of us, there's this tutorial. By the time + you're finished reading this, you should have a pretty good idea on + how to implement custom tags and attributes that HTML Purifier may not have. +

+ +

Is it necessary?

+ +

+ Before we even write any code, it is paramount to consider whether or + not the code we're writing is necessary or not. HTML Purifier, by default, + contains a large set of elements and attributes: large enough so that + any element or attribute in XHTML 1.0 (and its HTML variant) + that can be safely used by the general public is implemented. +

+ +

+ So what needs to be implemented? (Feel free to skip this section if + you know what you want). +

+ +

XHTML 1.0

+ +

+ All of the modules listed below are based off of the + modularization of + XHTML, which, while technically for XHTML 1.1, is quite a useful + resource. +

+ + + +

+ If you don't recognize it, you probably don't need it. But the curious + can look all of these modules up in the above-mentioned document. Note + that inline scripting comes packaged with HTML Purifier (more on this + later). +

+ +

XHTML 1.1

+ +

+ We have not implemented the + Ruby module, + which defines a set of tags + for publishing short annotations for text, used mostly in Japanese + and Chinese school texts. +

+ +

XHTML 2.0

+ +

+ XHTML 2.0 is still a + working draft, so any elements introduced in the + specification have not been implemented and will not be implemented + until we get a recommendation or proposal. Because XHTML 2.0 is + an entirely new markup language, implementing rules for it will be + no easy task. +

+ +

HTML 5

+ +

+ HTML 5 + is a fork of HTML 4.01 by WHATWG, who believed that XHTML 2.0 was headed + in the wrong direction. It too is a working draft, and may change + drastically before publication, but it should be noted that the + canvas tag has been implemented by many browser vendors. +

+ +

Proprietary

+ +

+ There are a number of proprietary tags still in the wild. Many of them + have been documented in ref-proprietary-tags.txt, + but there is currently no implementation for any of them. +

+ +

Extensions

+ +

+ There are also a number of other XML languages out there that can + be embedded in HTML documents: two of the most popular are MathML and + SVG, and I frequently get requests to implement these. But they are + expansive, comprehensive specifications, and it would take far too long + to implement them correctly (most systems I've seen go as far + as whitelisting tags and no further; come on, what about nesting!) +

+ +

+ Word of warning: HTML Purifier is currently not namespace + aware. +

+ +

Giving back

+ +

+ As you may imagine from the details above (don't be abashed if you didn't + read it all: a glance over would have done), there's quite a bit that + HTML Purifier doesn't implement. Recent architectural changes have + allowed HTML Purifier to implement elements and attributes that are not + safe! Don't worry, they won't be activated unless you set %HTML.Trusted + to true, but they certainly help out users who need to put, say, forms + on their page and don't want to go through the trouble of reading this + and implementing it themself. +

+ +

+ So any of the above that you implement for your own application could + help out some other poor sap on the other side of the globe. Help us + out, and send back code so that it can be hammered into a module and + released with the core. Any code would be greatly appreciated! +

+ +

And now...

+ +

+ Enough philosophical talk, time for some code: +

+ +
$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML', 'DefinitionRev', 1);
+$def =& $config->getHTMLDefinition(true);
+ +

+ Assuming that HTML Purifier has already been properly loaded (hint: + include HTMLPurifier.auto.php), this code will set up + the environment that you need to start customizing the HTML definition. + What's going on? +

+ + + +

Broken backwards-compatibility

+ +

+ Those of you who have already been twiddling around with the raw + HTML definition object, you'll be noticing that you're getting an error + when you attempt to retrieve the raw definition object without specifying + a DefinitionID. It is vital to caching (see below) that you make a unique + name for your customized definition, so make up something right now and + things will operate again. +

+ +

Turn off caching

+ +

+ To make development easier, we're going to temporarily turn off + definition caching: +

+ +
$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML', 'DefinitionRev', 1);
+$config->set('Core', 'DefinitionCache', null); // remove this later!
+$def =& $config->getHTMLDefinition(true);
+ +

+ A few things should be mentioned about the caching mechanism before + we move on. For performance reasons, HTML Purifier caches generated + HTMLPurifier_Definition objects in serialized files + stored (by default) in library/HTMLPurifier/DefinitionCache/Serializer. + A lot of processing is done in order to create these objects, so it + makes little sense to repeat the same processing over and over again + whenever HTML Purifier is called. +

+ +

+ In order to identify a cache entry, HTML Purifier uses three variables: + the library's version number, the value of %HTML.DefinitionRev and + a serial of relevant configuration. Whenever any of these changes, + a new HTML definition is generated. Notice that there is no way + for the definition object to track changes to customizations: here, it + is up to you to supply appropriate information to DefinitionID and + DefinitionRev. +

+ +

Add an attribute

+ +

+ For this example, we're going to implement the target attribute found + on a elements. To implement an attribute, we have to + ask a few questions: +

+ +
    +
  1. What element is it found on?
  2. +
  3. What is its name?
  4. +
  5. What are valid values for it?
  6. +
+ +

+ The first two are easy: the element is a and the attribute + is target. The third question is a little trickier. + Lets allow the special values: _blank, _self, _target and _top. + The form of this is called an enumeration, a list of + valid values, although only one can be used at a time. To translate + this into code form, we write: +

+ +
$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML', 'DefinitionRev', 1);
+$config->set('Core', 'DefinitionCache', null); // remove this later!
+$def =& $config->getHTMLDefinition(true);
+$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
+ +

+ The Enum#_blank,_self,_target,_top does all the magic. + The string is split into two parts, separated by a hash mark (#): +

+ +
    +
  1. The first part is the name of what we call an AttrDef
  2. +
  3. The second part is the parameter of the above-mentioned AttrDef
  4. +
+ +

+ If that sounds vague and generic, it's because it is! HTML Purifier defines + an assortment of different attribute types one can use, and each of these + has their own specialized parameter format. Here are some of the more useful + ones: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeFormatDescription
Enum[s:]value1,value2,... + Attribute with a number of valid values, one of which may be used. When + s: is present, the enumeration is case sensitive. +
Boolattribute_name + Boolean attribute, with only one valid value: the name + of the attribute. +
CDATA + Attribute of arbitrary text. Can also be referred to as Text + (the specification makes a semantic distinction between the two). +
ID + Attribute that specifies a unique ID +
Pixels + Attribute that specifies an integer pixel length +
Length + Attribute that specifies a pixel or percentage length +
NMTOKENS + Attribute that specifies a number of name tokens, example: the + class attribute +
URI + Attribute that specifies a URI, example: the href + attribute +
Number + Attribute that specifies an positive integer number +
+ +

+ For a complete list, consult library/HTMLPurifier/AttrTypes.php; + more information on attributes that accept parameters can be found on their + respective includes in library/HTMLPurifier/AttrDef. +

+ +

+ Sometimes, the restrictive list in AttrTypes just doesn't cut it. Don't + sweat: you can also use a fully instantiated object as the value. The + equivalent, verbose form of the above example is: +

+ +
$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
+$config->set('HTML', 'DefinitionRev', 1);
+$config->set('Core', 'DefinitionCache', null); // remove this later!
+$def =& $config->getHTMLDefinition(true);
+$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
+  array('_blank','_self','_target','_top')
+));
+ +

+ Trust me, you'll learn to love the shorthand. +

+ +

Add an element

+ +

+ To be written... +

+ +
$Id: enduser-tidy.html 1158 2007-06-18 19:26:29Z Edward $
+ + \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 4a8dabb1..69ee6207 100644 --- a/docs/index.html +++ b/docs/index.html @@ -37,6 +37,9 @@ information for casual developers using HTML Purifier.

Tidy
Tutorial for tweaking HTML Purifier's Tidy-like behavior.
+
Customize
+
Tutorial for customizing HTML Purifier's tag and attribute sets.
+

Development

diff --git a/library/HTMLPurifier/CSSDefinition.php b/library/HTMLPurifier/CSSDefinition.php index e5d963f3..af6c6058 100644 --- a/library/HTMLPurifier/CSSDefinition.php +++ b/library/HTMLPurifier/CSSDefinition.php @@ -17,6 +17,15 @@ require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require_once 'HTMLPurifier/AttrDef/CSS/URI.php'; require_once 'HTMLPurifier/AttrDef/Enum.php'; +HTMLPurifier_ConfigSchema::define( + 'CSS', 'DefinitionRev', 1, 'int', ' +

+ Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. This directive has been available + since 1.7.0. +

+'); + /** * Defines allowed CSS attributes and what their values are. * @see HTMLPurifier_HTMLDefinition diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 6e078eb7..cf5eb66f 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -43,16 +43,6 @@ class HTMLPurifier_Config */ var $version = '1.6.1'; - /** - * Integer key users can use to indicate they have manually - * overridden some internal behavior and would like the - * cache to invalidate itself. This is used in conjunction - * with the %NAMESPACE.DefinitionID, the semantic difference is - * that the configuration directive determines "semantic identity", - * while this integer indicates "chronological identity". - */ - var $revision = 1; - /** * Two-level associative array of configuration directives */ diff --git a/library/HTMLPurifier/DefinitionCache.php b/library/HTMLPurifier/DefinitionCache.php index bf3f9896..81cd7b33 100644 --- a/library/HTMLPurifier/DefinitionCache.php +++ b/library/HTMLPurifier/DefinitionCache.php @@ -36,7 +36,7 @@ class HTMLPurifier_DefinitionCache */ function generateKey($config) { return $config->version . '-' . // possibly replace with function calls - $config->revision . '-' . + $config->get($this->type, 'DefinitionRev') . '-' . $config->getBatchSerial($this->type); } @@ -50,7 +50,7 @@ class HTMLPurifier_DefinitionCache list($version, $revision, $hash) = explode('-', $key, 3); $compare = version_compare($version, $config->version); if ($compare > 0) return false; - if ($compare == 0 && $revision >= $config->revision) return false; + if ($compare == 0 && $revision >= $config->get($this->type, 'DefinitionRev')) return false; return true; } diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index b621fb6f..c998aed6 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -37,6 +37,19 @@ $def->addAttribute(\'a\', \'tabindex\', \'Number\');

'); +HTMLPurifier_ConfigSchema::define( + 'HTML', 'DefinitionRev', 1, 'int', ' +

+ Revision identifier for your custom definition specified in + %HTML.DefinitionID. This serves the same purpose: uniquely identifying + your custom definition, but this one does so in a chronological + context: revision 3 is more up-to-date then revision 2. Thus, when + this gets incremented, the cache handling is smart enough to clean + up any older revisions of your definition as well as flush the + cache. This directive has been available since 1.7.0. +

+'); + HTMLPurifier_ConfigSchema::define( 'HTML', 'BlockWrapper', 'p', 'string', '

diff --git a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php index 20d877d0..99867b03 100644 --- a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php +++ b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php @@ -11,10 +11,10 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $cache = new HTMLPurifier_DefinitionCache_Serializer('Test'); $config = $this->generateConfigMock('serial'); + $config->setReturnValue('get', 2, array('Test', 'DefinitionRev')); $config->version = '1.0.0'; - $config->revision = 2; - $config_md5 = '1.0.0-' . $config->revision . '-serial'; + $config_md5 = '1.0.0-2-serial'; $file = realpath( $rel_file = dirname(__FILE__) . @@ -120,12 +120,12 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $config1 = $this->generateConfigMock(); $config1->version = '0.9.0'; - $config1->revision = 574; + $config1->setReturnValue('get', 574, array('Test', 'DefinitionRev')); $def1 = $this->generateDefinition(array('info' => 1)); $config2 = $this->generateConfigMock(); $config2->version = '1.0.0beta'; - $config2->revision = 1; + $config2->setReturnValue('get', 1, array('Test', 'DefinitionRev')); $def2 = $this->generateDefinition(array('info' => 3)); $cache->set($def1, $config1); @@ -158,7 +158,7 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio $cache = new HTMLPurifier_DefinitionCache_Serializer('Test'); $config = $this->generateConfigMock('serial'); $config->version = '1.0.0'; - $config->revision = 1; + $config->setReturnValue('get', 1, array('Test', 'DefinitionRev')); $dir = dirname(__FILE__) . '/SerializerTest'; $config->setReturnValue('get', $dir, array('Cache', 'SerializerPath')); diff --git a/tests/HTMLPurifier/DefinitionCacheHarness.php b/tests/HTMLPurifier/DefinitionCacheHarness.php index ebc15d68..efcb49a4 100644 --- a/tests/HTMLPurifier/DefinitionCacheHarness.php +++ b/tests/HTMLPurifier/DefinitionCacheHarness.php @@ -13,7 +13,6 @@ class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase $config = new HTMLPurifier_ConfigMock($this); $config->setReturnValue('getBatchSerial', $serial, array('Test')); $config->version = '1.0.0'; - $config->revision = 1; return $config; } diff --git a/tests/HTMLPurifier/DefinitionCacheTest.php b/tests/HTMLPurifier/DefinitionCacheTest.php index eb78a11c..702712ff 100644 --- a/tests/HTMLPurifier/DefinitionCacheTest.php +++ b/tests/HTMLPurifier/DefinitionCacheTest.php @@ -8,9 +8,16 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase function test_isOld() { $cache = new HTMLPurifier_DefinitionCache('Test'); // non-functional + $old_copy = HTMLPurifier_ConfigSchema::instance(); + $o = new HTMLPurifier_ConfigSchema(); + HTMLPurifier_ConfigSchema::instance($o); + + HTMLPurifier_ConfigSchema::defineNamespace('Test', 'Test namespace'); + HTMLPurifier_ConfigSchema::define('Test', 'DefinitionRev', 1, 'int', 'Definition revision.'); + $config = HTMLPurifier_Config::createDefault(); $config->version = '1.0.0'; - $config->revision = 10; + $config->set('Test', 'DefinitionRev', 10); $this->assertIdentical($cache->isOld('1.0.0-10-hashstuffhere', $config), false); $this->assertIdentical($cache->isOld('1.5.0-1-hashstuffhere', $config), false); @@ -18,6 +25,9 @@ class HTMLPurifier_DefinitionCacheTest extends UnitTestCase $this->assertIdentical($cache->isOld('0.9.0-1-hashstuffhere', $config), true); $this->assertIdentical($cache->isOld('1.0.0-1-hashstuffhere', $config), true); $this->assertIdentical($cache->isOld('1.0.0beta-11-hashstuffhere', $config), true); + + HTMLPurifier_ConfigSchema::instance($old_copy); + } }