From 79df79b2fd35ca76269d521e62777bcae1e6777d Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 2 Aug 2007 23:34:30 +0000 Subject: [PATCH] [2.1.0] Add tutorial for creating URI Filters - Update NEWS git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1348 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 3 + docs/enduser-uri-filter.html | 201 +++++++++++++++++++++++++ docs/index.html | 3 + docs/style.css | 3 + library/HTMLPurifier/URIDefinition.php | 5 + 5 files changed, 215 insertions(+) create mode 100644 docs/enduser-uri-filter.html diff --git a/NEWS b/NEWS index a6509bdd..6796ddd3 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier standalone folder) ! Relative URIs can now be transformed into their absolute equivalents using %URI.Base and %URI.MakeAbsolute +! Ruby implemented for XHTML 1.1 +! You can now define custom URI filtering behavior, see enduser-uri-filter.html + for more details - AutoFormatters emit friendly error messages if tags or attributes they need are not allowed - ConfigForm's compactification of directive names is now configurable diff --git a/docs/enduser-uri-filter.html b/docs/enduser-uri-filter.html new file mode 100644 index 00000000..1c6b733b --- /dev/null +++ b/docs/enduser-uri-filter.html @@ -0,0 +1,201 @@ + + + + + + + +URI Filters - HTML Purifier + + + +

URI Filters

+ +
Filed under End-User
+
Return to the index.
+
HTML Purifier End-User Documentation
+ +

+ This is a quick and dirty document to get you on your way to writing + custom URI filters for your own URL filtering needs. Why would you + want to write a URI filter? If you need URIs your users put into + HTML to magically change into a different URI, this is + exactly what you need! +

+ +

Creating the class

+ +

+ Any URI filter you make will be a subclass of HTMLPurifier_URIFilter. + The scaffolding is thus: +

+ +
class HTMLPurifier_URIFilter_NameOfFilter extends HTMLPurifier_URIFilter
+{
+    var $name = 'NameOfFilter';
+    function prepare($config) {}
+    function filter(&$uri, $config, &$context) {}
+}
+ +

+ Fill in the variable $name with the name of your filter, and + take a look at the two methods. prepare() is an initialization + method that is called only once, before any filtering has been done of the + HTML. Use it to perform any costly setup work that only needs to be done + once. filter() is the guts and innards of our filter: + it takes the URI and does whatever needs to be done to it. +

+ +

+ If you've worked with HTML Purifier, you'll recognize the $config + and $context parameters. On the other hand, $uri + is something unique to this section of the application: it's a + HTMLPurifier_URI object. The interface is thus: +

+ +
class HTMLPurifier_URI
+{
+    var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+    function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
+    function toString();
+    function copy();
+    function getSchemeObj($config, &$context);
+    function validate($config, &$context);
+}
+ +

+ The first three methods are fairly self-explanatory: you have a constructor, + a serializer, and a cloner. Generally, you won't be using them when + you are manipulating the URI objects themselves. + getSchemeObj() is a special purpose method that returns + a HTMLPurifier_URIScheme object corresponding to the specific + URI at hand. validate() performs general-purpose validation + on the internal components of a URI. Once again, you don't need to + worry about these: they've already been handled for you. +

+ +

URI format

+ +

+ As a URIFilter, we're interested in the member variables of the URI object. +

+ + + + + + + + + +
Scheme The protocol for identifying (and possibly locating) a resource (http, ftp, https)
Userinfo User information such as a username (bob)
Host Domain name or IP address of the server (example.com, 127.0.0.1)
Port Network port number for the server (80, 12345)
Path Data that identifies the resource, possibly hierarchical (/path/to, ed@example.com)
Query String of information to be interpreted by the resource (?q=search-term)
Fragment Additional information for the resource after retrieval (#bookmark)
+ +

+ Because the URI is presented to us in this form, and not + http://bob@example.com:8080/foo.php?q=string#hash, it saves us + a lot of trouble in having to parse the URI every time we want to filter + it. For the record, the above URI has the following components: +

+ + + + + + + + + +
Scheme http
Userinfo bob
Host example.com
Port 8080
Path /foo.php
Query q=string
Fragment hash
+ +

+ Note that there is no question mark or octothorpe in the query or + fragment: these get removed during parsing. +

+ +

+ With this information, you can get straight to implementing your + filter() method. But one more thing... +

+ +

Return value: Boolean, not URI

+ +

+ You may have noticed that the URI is being passed in by reference. + This means that whatever changes you make to it, those changes will + be reflected in the URI object the callee had. Do not + return the URI object: it is unnecessary and will cause bugs. + Instead, return a boolean value, true if the filtering was successful, + or false if the URI is beyond repair and needs to be axed. +

+ +

+ Let's suppose I wanted to write a filter that de-internationalized domain + names by converting them to Punycode. + Assuming that punycode_encode($input) converts $input to + Punycode and returns false on failure: +

+ +
class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
+{
+    var $name = 'ConvertIDNToPunycode';
+    function filter(&$uri, $config, &$context) {
+        if (is_null($uri->host)) return true;
+        if ($uri->host == utf8_decode($uri->host) {
+            // is ASCII, abort
+            return true;
+        }
+        $host = punycode_encode($uri->host);
+        if ($host === false) return false;
+        $uri->host = $host;
+        return true;
+    }
+}
+ +

+ Notice I did not return $uri;. +

+ +

Activating your filter

+ +

+ Having a filter is all well and good, but you need to tell HTML Purifier + to use it. Fortunately, this part's simple: +

+ +
$uri =& $config->getDefinition('URI');
+$uri->addFilter(new HTMLPurifier_URIFilter_NameOfFilter());
+ +

+ If you want to be really fancy, you can define a configuration directive + for your filter and have HTML Purifier automatically manage whether or + not your filter gets loaded or not (this is how internal filters manage + things): +

+ +
HTMLPurifier_ConfigSchema::define(
+    'URI', 'NameOfFilter', false, 'bool',
+    'What your filter does.'
+);
+$uri =& $config->getDefinition('URI', true);
+$uri->registerFilter(new HTMLPurifier_URIFilter_NameOfFilter());
+
+ +

+ Now, your filter will only be called when %URI.NameOfFilter + is set to true. +

+ +

Examples

+ +

+ Check the + URIFilter + directory for more implementation examples, and see the + new directives proposal document for ideas on what could be implemented + as a filter. +

+ +
$Id$
+ + diff --git a/docs/index.html b/docs/index.html index 437a8bfd..8d295dda 100644 --- a/docs/index.html +++ b/docs/index.html @@ -40,6 +40,9 @@ information for casual developers using HTML Purifier.

Customize
Tutorial for customizing HTML Purifier's tag and attribute sets.
+
URI Filters
+
Tutorial for creating custom URI filters.
+

Development

diff --git a/docs/style.css b/docs/style.css index db2dd7d7..40e732c1 100644 --- a/docs/style.css +++ b/docs/style.css @@ -33,6 +33,9 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em; .table thead th:first-child {-moz-border-radius-topleft:1em;} .table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;} +/* A quick table*/ +table.quick tbody th {text-align:right; padding-right:1em;} + /* Category of the file */ #filing {font-weight:bold; font-size:smaller; } diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php index 131c95de..8e421343 100644 --- a/library/HTMLPurifier/URIDefinition.php +++ b/library/HTMLPurifier/URIDefinition.php @@ -102,6 +102,11 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition $this->registeredFilters[$filter->name] = $filter; } + function addFilter($filter, $config) { + $filter->setup($config); + $this->filter[$filter->name] = $filter; + } + function doSetup($config) { $this->setupFilters($config); $this->setupMemberVariables($config);