diff --git a/Doxyfile b/Doxyfile
index 8ecf65ae..9076573d 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -4,7 +4,7 @@
 # Project related configuration options
 PROJECT_NAME           = HTML Purifier
-PROJECT_NUMBER         = 2.0.1
+PROJECT_NUMBER         = 2.1.1
 OUTPUT_DIRECTORY       = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
 OUTPUT_LANGUAGE        = English
diff --git a/NEWS b/NEWS
index 19b70259..04bfa37d 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,68 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
     . Internal change
+2.1.1, released 2007-08-04
+- Fix show-stopper bug in %URI.MakeAbsolute functionality
+- Fix PHP4 syntax error in standalone version
+. Add prefix directory to include path for standalone, this prevents
+  other installations from clobbering the standalone's URI schemes
+. Single test methods can be invoked by prefixing with __only
+2.1.0, released 2007-08-02
+# flush-htmldefinition-cache.php superseded in favor of a generic
+  flush-definition-cache.php script, you can clear a specific cache
+  by passing its name as a parameter to the script
+! Phorum mod implemented for HTML Purifier
+! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
+  trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
+  for PHP4 (DirectLex).
+! Standalone file now available, which greatly reduces the amount of
+  includes (although there are still a few files that reside in the
+  standalone folder)
+! Relative URIs can now be transformed into their absolute equivalents
+  using %URI.Base and %URI.MakeAbsolute
+! Ruby implemented for XHTML 1.1
+! You can now define custom URI filtering behavior, see enduser-uri-filter.html
+  for more details
+! UTF-8 font names now supported in CSS
+- AutoFormatters emit friendly error messages if tags or attributes they
+  need are not allowed
+- ConfigForm's compactification of directive names is now configurable
+- AutoParagraph autoformatter algorithm refined after field-testing
+- XHTML 1.1 now applies XHTML 1.0 Strict cleanup routines, namely
+  blockquote wrapping
+- Contents of <style> tags removed by default when tags are removed
+. HTMLPurifier_Config->getSerial() implemented, this is extremely useful
+  for output cache invalidation
+. ConfigForm printer now can retrieve CSS and JS files as strings, in
+  case HTML Purifier's directory is not publically accessible
+. Introduce new text/itext configuration directive values: these represent
+  longer strings that would be more appropriately edited with a textarea
+. Allow newlines to act as separators for lists, hashes, lookups and
+  %HTML.Allowed
+. ConfigForm generates textareas instead of text inputs for lists, hashes,
+  lookups, text and itext fields
+. Hidden element content removal genericized: %Core.HiddenElements can
+  be used to customize this behavior, by default <script> and <style> are
+  hidden
+. Added HTMLPURIFIER_PREFIX constant, should be used instead of dirname(__FILE__)
+. Custom ChildDef added to default include list
+. URIScheme reflection improved: will not attempt to include file if class
+  already exists. May clobber autoload, so I need to keep an eye on it
+. ConfigSchema heavily optimized, will only collect information and validate
+  definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
+. AttrDef_URI unit tests and implementation refactored
+. benchmarks/ directory now protected from public view with .htaccess file;
+  run the tests via command line
+. URI scheme is munged off if there is no authority and the scheme is the
+  default one
+. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
+. Interface for URIScheme changed
+. Generic URI object to hold components of URI added, most systems involved
+  in URI validation have been migrated to use it
+. Custom filtering for URIs factored out to URIDefinition interface for
+  maximum extensibility
 2.0.1, released 2007-06-27
 ! Tag auto-closing now based on a ChildDef heuristic rather than a
   manually set auto_close array; some behavior may change
diff --git a/TODO b/TODO
index 5bce0a60..0fa3eb08 100644
--- a/TODO
+++ b/TODO
@@ -6,14 +6,9 @@ TODO List
     ? Maybe I'll Do It
-2.1 release [Refactor, refactor!]
- # URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
- # Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
- # Ruby support
- - Configuration profiles: predefined directives set with one func call
- - Implement IDREF support (harder than it seems, since you cannot have
-   IDREFs to non-existent IDs)
- - Allow non-ASCII characters in font names
+If no interest is expressed for a feature that may required a considerable
+amount of effort to implement, it may get endlessly delayed. Do not be
+afraid to cast your vote for the next feature to be implemented!
 2.2 release [Error'ed]
  # Error logging for filtering/cleanup procedures
@@ -36,6 +31,8 @@ TODO List
 2.4 release [It's All About Trust] (floating)
  # Implement untrusted, dangerous elements/attributes
+ # Implement IDREF support (harder than it seems, since you cannot have
+   IDREFs to non-existent IDs)
 3.0 release [Beyond HTML]
  # Legit token based CSS parsing (will require revamping almost every
@@ -60,9 +57,7 @@ TODO List
  - Lots of profiling, make it faster!
  - Plugins for major CMSes (COMPLEX)
-    - WordPress (mostly written, needs beta-testing)
     - phpBB
-    - Phorum
     - eFiction
     - more! (look for ones that use WYSIWYGs)
  - Complete basic smoketests
@@ -71,13 +66,15 @@ Unknown release (on a scratch-an-itch basis)
  ? Semi-lossy dumb alternate character encoding transfor
  ? Have 'lang' attribute be checked against official lists, achieved by
    encoding all characters that have string entity equivalents
- - Explain how to use HTML Purifier in non-PHP languages / create
-   a simple command line stub
  - Abstract ChildDef_BlockQuote to work with all elements that only
    allow blocks in them, required or optional
  - Reorganize Unit Tests
-    - Refactor loop tests (esp. AttrDef_URI)
+    - Refactor loop tests: Lexer
  - Reorganize configuration directives (Create more namespaces! Get messy!)
+ - Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
+ - Implement lenient <ruby> child validation
+ - Explain how to use HTML Purifier in non-PHP languages / create
+   a simple command line stub (or complicated?)
diff --git a/VERSION b/VERSION
index 10bf840e..7c327287 100644
@@ -1 +1 @@
\ No newline at end of file
\ No newline at end of file
diff --git a/WHATSNEW b/WHATSNEW
index 2f0b2d9d..a08edbb5 100644
@@ -1,12 +1,10 @@
-The 2.0.1 release introduces a number of stability and usability fixes,
-as well as a number of (disabled by default) experimental features.  The
-security-minded should note that a reflected XSS vulnerability was patched
-in smoketests/configForm.php; if you cannot upgrade immediately, please
-delete that file (if that directory is not publically accessible, there
-is no security risk).  The maintenance changes include more helpful file
-permissions errors, internal newline normalization, reordered includes
-to prevent a missing class definition in some setups, and better cache
-revision and id handling.  The two experimental features are auto-formatting
-(auto-paragraphing and linkification) and error collection, these can
-be enabled with %AutoFormat.AutoParagraph, %AutoFormat.Linkify and
-%Core.CollectErrors respectively.
+In version 2.1, HTML Purifier's URI validation and filtering handling
+system has been revamped with a new, extensible URIFilter system. Also
+notable features include preservation of emoticons in PHP5 with
+%Core.AggressivelyFixLt, standalone and lite download versions,
+transforming relative URIs to absolute URIs, Ruby in XHTML 1.1, a Phorum
+mod, and UTF-8 font names.  Notable bug-fixes include refinement of
+the auto-paragraphing algorithm (no longer experimental), better XHTML
+1.1 support and the removal of the contents of <style> elements. Version
+2.1.1 amends a few bugs in some of newly introduced features, namely
+running the standalone download version in PHP4 and %URI.MakeAbsolute.
diff --git a/benchmarks/.htaccess b/benchmarks/.htaccess
new file mode 100644
index 00000000..3a428827
--- /dev/null
+++ b/benchmarks/.htaccess
@@ -0,0 +1 @@
+Deny from all
diff --git a/benchmarks/Trace.php b/benchmarks/Trace.php
new file mode 100644
index 00000000..fa98ffac
--- /dev/null
+++ b/benchmarks/Trace.php
@@ -0,0 +1,12 @@
+ini_set('xdebug.trace_format', 1);
+ini_set('xdebug.show_mem_delta', true);
+xdebug_start_trace(dirname(__FILE__) . '/Trace');
+require_once '../library/HTMLPurifier.auto.php';
+$purifier = new HTMLPurifier();
+$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
diff --git a/configdoc/generate.php b/configdoc/generate.php
index 97e96433..9e73f4c7 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -18,6 +18,8 @@ TODO:
 if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
 error_reporting(E_ALL); // probably not possible to use E_STRICT
+define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
 // load dual-libraries
 require_once '../library/HTMLPurifier.auto.php';
 require_once 'library/ConfigDoc.auto.php';
diff --git a/docs/dev-code-quality.txt b/docs/dev-code-quality.txt
index 7c09a22c..10e21cb7 100644
--- a/docs/dev-code-quality.txt
+++ b/docs/dev-code-quality.txt
@@ -11,8 +11,7 @@ docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
 AttrDef - a lot of duplication, more generic classes need to be created;
 a lot of strtolower() calls, no legit casing
-    Class - doesn't support Unicode characters (fringe); uses regular
-        expressions
+    Class - doesn't support Unicode characters (fringe); uses regular expressions
     Lang - code duplication; premature optimization
     Length - easily mistaken for CSSLength
     URI - multiple regular expressions; missing validation for parts (?)
@@ -22,9 +21,6 @@ ConfigSchema - redefinition is a mess
     FixNesting - cannot bubble nodes out of structures, duplicated checks
         for special-case parent node
-    MakeWellFormed - insufficient automatic closing definitions (check HTML
-        spec for optional end tags, also, closing based on type (block/inline)
-        might be efficient).
     RemoveForeignElements - should be run in parallel with MakeWellFormed
 URIScheme - needs to have callable generic checks
     mailto - doesn't validate emails, doesn't validate querystring
diff --git a/docs/enduser-security.txt b/docs/enduser-security.txt
index 49aff331..dd856395 100644
--- a/docs/enduser-security.txt
+++ b/docs/enduser-security.txt
@@ -10,9 +10,7 @@ to be effective. Things to remember:
 2. IDs: see enduser-id.html for more info
-3. Links: document pending feature completion
-Rudimentary blacklisting, we should also allow only relative URIs. We
-need a doc to explain the stuff.
+3. URIs: see enduser-uri-filter.html
 4. CSS: document pending
 Explain which CSS styles we blocked and why.
diff --git a/docs/enduser-uri-filter.html b/docs/enduser-uri-filter.html
new file mode 100644
index 00000000..04a611f8
--- /dev/null
+++ b/docs/enduser-uri-filter.html
@@ -0,0 +1,201 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<meta name="description" content="Tutorial for creating custom URI filters." />
+<link rel="stylesheet" type="text/css" href="style.css" />
+<title>URI Filters - HTML Purifier</title>
+<h1>URI Filters</h1>
+<div id="filing">Filed under End-User</div>
+<div id="index">Return to the <a href="index.html">index</a>.</div>
+<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
+  This is a quick and dirty document to get you on your way to writing
+  custom URI filters for your own URL filtering needs.  Why would you
+  want to write a URI filter?  If you need URIs your users put into
+  HTML to magically change into a different URI, this is
+  exactly what you need!
+<h2>Creating the class</h2>
+  Any URI filter you make will be a subclass of <code>HTMLPurifier_URIFilter</code>.
+  The scaffolding is thus:
+<pre>class HTMLPurifier_URIFilter_<strong>NameOfFilter</strong> extends HTMLPurifier_URIFilter
+    var $name = '<strong>NameOfFilter</strong>';
+    function prepare($config) {}
+    function filter(&$uri, $config, &$context) {}
+  Fill in the variable <code>$name</code> with the name of your filter, and
+  take a look at the two methods. <code>prepare()</code> is an initialization
+  method that is called only once, before any filtering has been done of the
+  HTML. Use it to perform any costly setup work that only needs to be done
+  once. <code>filter()</code> is the guts and innards of our filter:
+  it takes the URI and does whatever needs to be done to it.
+  If you've worked with HTML Purifier, you'll recognize the <code>$config</code>
+  and <code>$context</code> parameters.  On the other hand, <code>$uri</code>
+  is something unique to this section of the application: it's a
+  <code>HTMLPurifier_URI</code> object. The interface is thus:
+<pre>class HTMLPurifier_URI
+    var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+    function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
+    function toString();
+    function copy();
+    function getSchemeObj($config, &$context);
+    function validate($config, &$context);
+  The first three methods are fairly self-explanatory: you have a constructor,
+  a serializer, and a cloner.  Generally, you won't be using them when
+  you are manipulating the URI objects themselves.
+  <code>getSchemeObj()</code> is a special purpose method that returns
+  a <code>HTMLPurifier_URIScheme</code> object corresponding to the specific
+  URI at hand. <code>validate()</code> performs general-purpose validation
+  on the internal components of a URI. Once again, you don't need to
+  worry about these: they've already been handled for you.
+<h2>URI format</h2>
+  As a URIFilter, we're interested in the member variables of the URI object.
+<table class="quick"><tbody>
+  <tr><th>Scheme</th>   <td>The protocol for identifying (and possibly locating) a resource (http, ftp, https)</td></tr>
+  <tr><th>Userinfo</th> <td>User information such as a username (bob)</td></tr>
+  <tr><th>Host</th>     <td>Domain name or IP address of the server (example.com,</td></tr>
+  <tr><th>Port</th>     <td>Network port number for the server (80, 12345)</td></tr>
+  <tr><th>Path</th>     <td>Data that identifies the resource, possibly hierarchical (/path/to, ed@example.com)</td></tr>
+  <tr><th>Query</th>    <td>String of information to be interpreted by the resource (?q=search-term)</td></tr>
+  <tr><th>Fragment</th> <td>Additional information for the resource after retrieval (#bookmark)</td></tr>
+  Because the URI is presented to us in this form, and not 
+  <code>http://bob@example.com:8080/foo.php?q=string#hash</code>, it saves us
+  a lot of trouble in having to parse the URI every time we want to filter
+  it. For the record, the above URI has the following components:
+<table class="quick"><tbody>
+  <tr><th>Scheme</th>   <td>http</td></tr>
+  <tr><th>Userinfo</th> <td>bob</td></tr>
+  <tr><th>Host</th>     <td>example.com</td></tr>
+  <tr><th>Port</th>     <td>8080</td></tr>
+  <tr><th>Path</th>     <td>/foo.php</td></tr>
+  <tr><th>Query</th>    <td>q=string</td></tr>
+  <tr><th>Fragment</th> <td>hash</td></tr>
+  Note that there is no question mark or octothorpe in the query or
+  fragment: these get removed during parsing.
+  With this information, you can get straight to implementing your
+  <code>filter()</code> method. But one more thing...
+<h2>Return value: Boolean, not URI</h2>
+  You may have noticed that the URI is being passed in by reference.
+  This means that whatever changes you make to it, those changes will
+  be reflected in the URI object the callee had.  <strong>Do not
+  return the URI object: it is unnecessary and will cause bugs.</strong>
+  Instead, return a boolean value, true if the filtering was successful,
+  or false if the URI is beyond repair and needs to be axed.
+  Let's suppose I wanted to write a filter that de-internationalized domain
+  names by converting them to <a href="http://en.wikipedia.org/wiki/Punycode">Punycode</a>.
+  Assuming that <code>punycode_encode($input)</code> converts <code>$input</code> to
+  Punycode and returns <code>false</code> on failure:
+<pre>class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
+    var $name = 'ConvertIDNToPunycode';
+    function filter(&$uri, $config, &$context) {
+        if (is_null($uri->host)) return true;
+        if ($uri->host == utf8_decode($uri->host)) {
+            // is ASCII, abort
+            return true;
+        }
+        $host = punycode_encode($uri->host);
+        if ($host === false) return false;
+        $uri->host = $host;
+        return true;
+    }
+  Notice I did not <code>return $uri;</code>.
+<h2>Activating your filter</h2>
+  Having a filter is all well and good, but you need to tell HTML Purifier
+  to use it. Fortunately, this part's simple:
+<pre>$uri =& $config->getDefinition('URI');
+$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
+  If you want to be really fancy, you can define a configuration directive
+  for your filter and have HTML Purifier automatically manage whether or
+  not your filter gets loaded or not (this is how internal filters manage
+  things):
+    'URI', '<strong>NameOfFilter</strong>', false, 'bool',
+    '<strong>What your filter does.</strong>'
+$uri =& $config->getDefinition('URI', true);
+$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
+  Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
+  is set to true.
+  Check the
+  <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
+  directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
+  new directives proposal document</a> for ideas on what could be implemented
+  as a filter.
+<div id="version">$Id$</div>
diff --git a/docs/enduser-utf8.html b/docs/enduser-utf8.html
index ef8c136b..b8cee57d 100644
--- a/docs/enduser-utf8.html
+++ b/docs/enduser-utf8.html
@@ -231,7 +231,7 @@ of your real encoding.</p>
     why the character encoding should be explicitly stated. When the
     browser isn't told what the character encoding of a text is, it
     has to guess: and sometimes the guess is wrong. Hackers can manipulate
-    this guess in order to slip XSS pass filters and then fool the
+    this guess in order to slip XSS past filters and then fool the
     browser into executing it as active code. A great example of this
     is the <a href="http://shiflett.org/archive/177">Google UTF-7
@@ -567,10 +567,11 @@ which may be used by POST, and is required when you want to upload
 <p>The following is a summarization of notes from
-<a href="http://ppewww.physics.gla.ac.uk/~flavell/charset/form-i18n.html">
+<a href="http://web.archive.org/web/20060427015200/ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html">
 <code>FORM</code> submission and i18n</a>. That document contains lots
 of useful information, but is written in a rambly manner, so
-here I try to get right to the point.</p>
+here I try to get right to the point. (Note: the original has 
+disappeared off the web, so I am linking to the Web Archive copy.)</p>
 <h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
diff --git a/docs/index.html b/docs/index.html
index 437a8bfd..8d295dda 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -40,6 +40,9 @@ information for casual developers using HTML Purifier.</p>
 <dt><a href="enduser-customize.html">Customize</a></dt>
 <dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
+<dt><a href="enduser-uri-filter.html">URI Filters</a></dt>
+<dd>Tutorial for creating custom URI filters.</dd>
diff --git a/docs/proposal-filter-levels.txt b/docs/proposal-filter-levels.txt
index 9e9cfbb0..3118c644 100644
--- a/docs/proposal-filter-levels.txt
+++ b/docs/proposal-filter-levels.txt
@@ -32,7 +32,7 @@ Here are some fuzzy levels you could set:
 One final note: when you start axing tags that are more commonly used, you
 run the risk of accidentally destroying user data, especially if the data
-is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
+is incoming from a WYSIWYG editor that hasn't been synced accordingly. This may
 make forbidden element to text transformations desirable (for example, images).
diff --git a/docs/proposal-new-directives.txt b/docs/proposal-new-directives.txt
index 2c08ddbb..1ce1b93b 100644
--- a/docs/proposal-new-directives.txt
+++ b/docs/proposal-new-directives.txt
@@ -2,7 +2,8 @@
 Configuration Ideas
 Here are some theoretical configuration ideas that we could implement some
-time.  Note the naming convention: %Namespace.Directive
+time.  Note the naming convention: %Namespace.Directive. If you want one
+implemented, give us a ring, and we'll move it up the priority chain.
 %Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
     rewrite the URLs we parse too.  However, we can only do it when it's a pure
@@ -22,8 +23,6 @@ time.  Note the naming convention: %Namespace.Directive
 %URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
     spread of ill-gotten pagerank
-%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
 %URI.HostBlacklistRegex - regexes that if matching the host are disallowed
 %URI.HostWhitelist - domain names that are excluded from the host blacklist
 %URI.HostPolicy - determines whether or not its reject all and then whitelist
diff --git a/docs/ref-css-length.txt b/docs/ref-css-length.txt
new file mode 100644
index 00000000..284ec8b2
--- /dev/null
+++ b/docs/ref-css-length.txt
@@ -0,0 +1,28 @@
+CSS Length Reference
+  To bound, or not to bound, that is the question
+It's quite a reasonable request, really, and it's already been implemented
+for HTML.  That is, length bounding.  It makes little sense to let users
+define text blocks that have a font-size of 63,360 inches (that's a mile,
+by the way) or a width of forty-fold the parent container.
+But it's a little more complicated then that. There are multiple units
+one can use, and we have to a little unit conversion to get things working.
+Here's what we have:
+    1 in ~= 2.54 cm
+    1 cm = 10 mm
+    1 pt = 1/72 in
+    1 pc = 12 pt
+    1 em ~= 10.0667 px
+    1 ex ~= 0.5 em, though Mozilla Firefox says 1 ex = 6px
+    1 px ~= 1 pt
+Watch out: font-sizes can also be nested to get successively larger 
+(although I do not relish having to keep track of context font-sizes,
+this may be necessary, especially for some of the more advanced features
+for preventing things like white on white).
diff --git a/docs/style.css b/docs/style.css
index db2dd7d7..40e732c1 100644
--- a/docs/style.css
+++ b/docs/style.css
@@ -33,6 +33,9 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
 .table thead th:first-child {-moz-border-radius-topleft:1em;}
 .table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
+/* A quick table*/
+table.quick tbody th {text-align:right; padding-right:1em;}
 /* Category of the file */
 #filing {font-weight:bold; font-size:smaller; }
diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index ebbde869..af61751b 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -22,7 +22,7 @@
-    HTML Purifier 2.0.1 - Standards Compliant HTML Filtering
+    HTML Purifier 2.1.1 - Standards Compliant HTML Filtering
     Copyright (C) 2006 Edward Z. Yang
     This library is free software; you can redistribute it and/or
@@ -40,6 +40,9 @@
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+// constants are slow, but we'll make one exception
+define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
 // almost every class has an undocumented dependency to these, so make sure
 // they get included
 require_once 'HTMLPurifier/ConfigSchema.php'; // important
@@ -74,7 +77,7 @@ This directive has been available since 2.0.0.
 class HTMLPurifier
-    var $version = '2.0.1';
+    var $version = '2.1.1';
     var $config;
     var $filters;
@@ -196,13 +199,13 @@ class HTMLPurifier
      * Singleton for enforcing just one HTML Purifier in your system
-    function &getInstance($prototype = null) {
+    static function &getInstance($prototype = null) {
         static $htmlpurifier;
         if (!$htmlpurifier || $prototype) {
-            if (is_a($prototype, 'HTMLPurifier')) {
+            if ($prototype instanceof HTMLPurifier) {
                 $htmlpurifier = $prototype;
             } elseif ($prototype) {
-                $htmlpurifier = new HTMLPurifier(HTMLPurifier_Config::create($prototype));
+                $htmlpurifier = new HTMLPurifier($prototype);
             } else {
                 $htmlpurifier = new HTMLPurifier();
diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
index 223e7769..dfd89b95 100644
--- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -38,19 +38,24 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
                 $quote = $font[0];
                 if ($font[$length - 1] !== $quote) continue;
                 $font = substr($font, 1, $length - 2);
+                // double-backslash processing is buggy
+                $font = str_replace("\\$quote", $quote, $font); // de-escape quote
+                $font = str_replace("\\\n", "\n", $font);       // de-escape newlines
-            // process font
+            // $font is a pure representation of the font name
             if (ctype_alnum($font)) {
                 // very simple font, allow it in unharmed
                 $final .= $font . ', ';
-            $nospace = str_replace(array(' ', '.', '!'), '', $font);
-            if (ctype_alnum($nospace)) {
-                // font with spaces in it
-                $final .= "'$font', ";
-                continue;
-            }
+            // complicated font, requires quoting
+            // armor single quotes and new lines
+            $font = str_replace("'", "\\'", $font);
+            $font = str_replace("\n", "\\\n", $font);
+            $final .= "'$font', ";
         $final = rtrim($final, ', ');
         if ($final === '') return false;
diff --git a/library/HTMLPurifier/AttrDef/CSS/URI.php b/library/HTMLPurifier/AttrDef/CSS/URI.php
index 107545cc..b71a8585 100644
--- a/library/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/library/HTMLPurifier/AttrDef/CSS/URI.php
@@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
     function HTMLPurifier_AttrDef_CSS_URI() {
-        $this->HTMLPurifier_AttrDef_URI(true); // always embedded
+        parent::HTMLPurifier_AttrDef_URI(true); // always embedded
     function validate($uri_string, $config, &$context) {
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index 6250d08e..dcf9849c 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -1,90 +1,65 @@
 require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/URIParser.php';
 require_once 'HTMLPurifier/URIScheme.php';
 require_once 'HTMLPurifier/URISchemeRegistry.php';
 require_once 'HTMLPurifier/AttrDef/URI/Host.php';
 require_once 'HTMLPurifier/PercentEncoder.php';
-    'URI', 'DefaultScheme', 'http', 'string',
-    'Defines through what scheme the output will be served, in order to '.
-    'select the proper object validator when no scheme information is present.'
+// special case filtering directives 
-    'URI', 'Host', null, 'string/null',
-    'Defines the domain name of the server, so we can determine whether or '.
-    'an absolute URI is from your website or not.  Not strictly necessary, '.
-    'as users should be using relative URIs to reference resources on your '.
-    'website.  It will, however, let you use absolute URIs to link to '.
-    'subdomains of the domain you post here: i.e. example.com will allow '.
-    'sub.example.com.  However, higher up domains will still be excluded: '.
-    'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
-    'This directive has been available since 1.2.0.'
+    'URI', 'Munge', null, 'string/null', '
+    Munges all browsable (usually http, https and ftp)
+    absolute URI\'s into another URI, usually a URI redirection service.
+    This directive accepts a URI, formatted with a <code>%s</code> where 
+    the url-encoded original URI should be inserted (sample: 
+    <code>http://www.google.com/url?q=%s</code>).
+    Uses for this directive:
+    <li>
+        Prevent PageRank leaks, while being fairly transparent 
+        to users (you may also want to add some client side JavaScript to 
+        override the text in the statusbar). <strong>Notice</strong>:
+        Many security experts believe that this form of protection does not deter spam-bots. 
+    </li>
+    <li>
+        Redirect users to a splash page telling them they are leaving your
+        website. While this is poor usability practice, it is often mandated
+        in corporate environments.
+    </li>
+    This directive has been available since 1.3.0.
+// disabling directives
-    'URI', 'DisableExternal', false, 'bool',
-    'Disables links to external websites.  This is a highly effective '.
-    'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
-    'links or images outside of your domain will be allowed.  Non-linkified '.
-    'URIs will still be preserved.  If you want to be able to link to '.
-    'subdomains or use absolute URIs, specify %URI.Host for your website. '.
-    'This directive has been available since 1.2.0.'
-    'URI', 'DisableExternalResources', false, 'bool',
-    'Disables the embedding of external resources, preventing users from '.
-    'embedding things like images from other hosts. This prevents '.
-    'access tracking (good for email viewers), bandwidth leeching, '.
-    'cross-site request forging, goatse.cx posting, and '.
-    'other nasties, but also results in '.
-    'a loss of end-user functionality (they can\'t directly post a pic '.
-    'they posted from Flickr anymore). Use it if you don\'t have a '.
-    'robust user-content moderation team. This directive has been '.
-    'available since 1.3.0.'
-    'URI', 'DisableResources', false, 'bool',
-    'Disables embedding resources, essentially meaning no pictures. You can '.
-    'still link to them though. See %URI.DisableExternalResources for why '.
-    'this might be a good idea. This directive has been available since 1.3.0.'
-    'URI', 'Munge', null, 'string/null',
-    'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
-    'redirection service. Pass this directive a URI, with %s inserted where '.
-    'the url-encoded original URI should be inserted (sample: '.
-    '<code>http://www.google.com/url?q=%s</code>). '.
-    'This prevents PageRank leaks, while being as transparent as possible '.
-    'to users (you may also want to add some client side JavaScript to '.
-    'override the text in the statusbar). Warning: many security experts '.
-    'believe that this form of protection does not deter spam-bots. '.
-    'You can also use this directive to redirect users to a splash page '.
-    'telling them they are leaving your website. '.
-    'This directive has been available since 1.3.0.'
-    'URI', 'HostBlacklist', array(), 'list',
-    'List of strings that are forbidden in the host of any URI. Use it to '.
-    'kill domain names of spam, etc. Note that it will catch anything in '.
-    'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
-    'This directive has been available since 1.3.0.'
-    'URI', 'Disable', false, 'bool',
-    'Disables all URIs in all forms. Not sure why you\'d want to do that '.
-    '(after all, the Internet\'s founded on the notion of a hyperlink). '.
-    'This directive has been available since 1.3.0.'
+    'URI', 'Disable', false, 'bool', '
+    Disables all URIs in all forms. Not sure why you\'d want to do that 
+    (after all, the Internet\'s founded on the notion of a hyperlink). 
+    This directive has been available since 1.3.0.
 HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
+    'URI', 'DisableResources', false, 'bool', '
+    Disables embedding resources, essentially meaning no pictures. You can 
+    still link to them though. See %URI.DisableExternalResources for why 
+    this might be a good idea. This directive has been available since 1.3.0.
  * Validates a URI as defined by RFC 3986.
  * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@@ -92,214 +67,83 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
 class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
-    var $host;
-    var $embeds_resource;
+    var $parser, $percentEncoder;
+    var $embedsResource;
      * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
     function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
-        $this->host = new HTMLPurifier_AttrDef_URI_Host();
-        $this->embeds_resource = (bool) $embeds_resource;
+        $this->parser = new HTMLPurifier_URIParser();
+        $this->percentEncoder = new HTMLPurifier_PercentEncoder();
+        $this->embedsResource = (bool) $embeds_resource;
     function validate($uri, $config, &$context) {
-        static $PercentEncoder = null;
-        if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
-        // We'll write stack-based parsers later, for now, use regexps to
-        // get things working as fast as possible (irony)
         if ($config->get('URI', 'Disable')) return false;
-        // parse as CDATA
+        // initial operations
         $uri = $this->parseCDATA($uri);
+        $uri = $this->percentEncoder->normalize($uri);
-        // fix up percent-encoding
-        $uri = $PercentEncoder->normalize($uri);
+        // parse the URI
+        $uri = $this->parser->parse($uri);
+        if ($uri === false) return false;
-        // while it would be nice to use parse_url(), that's specifically
-        // for HTTP and thus won't work for our generic URI parsing
+        // add embedded flag to context for validators
+        $context->register('EmbeddedURI', $this->embedsResource); 
-        // according to the RFC... (but this cuts corners, i.e. non-validating)
-        $r_URI = '!'.
-            '(([^:/?#<>\'"]+):)?'. // 2. Scheme
-            '(//([^/?#<>\'"]*))?'. // 4. Authority
-            '([^?#<>\'"]*)'.       // 5. Path
-            '(\?([^#<>\'"]*))?'.   // 7. Query
-            '(#([^<>\'"]*))?'.     // 8. Fragment
-            '!';
-        $matches = array();
-        $result = preg_match($r_URI, $uri, $matches);
-        if (!$result) return false; // invalid URI
-        // seperate out parts
-        $scheme     = !empty($matches[1]) ? $matches[2] : null;
-        $authority  = !empty($matches[3]) ? $matches[4] : null;
-        $path       = $matches[5]; // always present, can be empty
-        $query      = !empty($matches[6]) ? $matches[7] : null;
-        $fragment   = !empty($matches[8]) ? $matches[9] : null;
-        $registry =& HTMLPurifier_URISchemeRegistry::instance();
-        if ($scheme !== null) {
-            // no need to validate the scheme's fmt since we do that when we
-            // retrieve the specific scheme object from the registry
-            $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
-            $scheme_obj = $registry->getScheme($scheme, $config, $context);
-            if (!$scheme_obj) return false; // invalid scheme, clean it out
-        } else {
-            $scheme_obj = $registry->getScheme(
-                $config->get('URI', 'DefaultScheme'), $config, $context
-            );
-        }
-        // something funky weird happened in the registry, abort!
-        if (!$scheme_obj) {
-            trigger_error(
-                'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
-                E_USER_WARNING
-            );
-            return false;
-        }
-        // the URI we're processing embeds_resource a resource in the page, but the URI
-        // it references cannot be located
-        if ($this->embeds_resource && !$scheme_obj->browsable) {
-            return false;
-        }
-        if ($authority !== null) {
+        $ok = false;
+        do {
-            // remove URI if it's absolute and we disabled externals or
-            // if it's absolute and embedded and we disabled external resources
-            unset($our_host);
-            if (
-                $config->get('URI', 'DisableExternal') ||
-                (
-                    $config->get('URI', 'DisableExternalResources') &&
-                    $this->embeds_resource
-                )
-            ) {
-                $our_host = $config->get('URI', 'Host');
-                if ($our_host === null) return false;
+            // generic validation
+            $result = $uri->validate($config, $context);
+            if (!$result) break;
+            // chained validation
+            $uri_def =& $config->getDefinition('URI');
+            $result = $uri_def->filter($uri, $config, $context);
+            if (!$result) break;
+            // scheme-specific validation 
+            $scheme_obj = $uri->getSchemeObj($config, $context);
+            if (!$scheme_obj) break;
+            if ($this->embedsResource && !$scheme_obj->browsable) break;
+            $result = $scheme_obj->validate($uri, $config, $context);
+            if (!$result) break;
+            // survived gauntlet
+            $ok = true;
+        } while (false);
+        $context->destroy('EmbeddedURI');
+        if (!$ok) return false;
+        // munge scheme off if necessary (this must be last)
+        if (!is_null($uri->scheme) && is_null($uri->host)) {
+            if ($uri_def->defaultScheme == $uri->scheme) {
+                $uri->scheme = null;
-            $HEXDIG = '[A-Fa-f0-9]';
-            $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
-            $sub_delims = '!$&\'()'; // needs []
-            $pct_encoded = "%$HEXDIG$HEXDIG";
-            $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
-            $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
-            $matches = array();
-            preg_match($r_authority, $authority, $matches);
-            // overloads regexp!
-            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
-            $host       = !empty($matches[3]) ? $matches[3] : null;
-            $port       = !empty($matches[4]) ? $matches[5] : null;
-            // validate port
-            if ($port !== null) {
-                $port = (int) $port;
-                if ($port < 1 || $port > 65535) $port = null;
-            }
-            $host = $this->host->validate($host, $config, $context);
-            if ($host === false) $host = null;
-            if ($this->checkBlacklist($host, $config, $context)) return false;
-            // more lenient absolute checking
-            if (isset($our_host)) {
-                $host_parts = array_reverse(explode('.', $host));
-                // could be cached
-                $our_host_parts = array_reverse(explode('.', $our_host));
-                foreach ($our_host_parts as $i => $discard) {
-                    if (!isset($host_parts[$i])) return false;
-                    if ($host_parts[$i] != $our_host_parts[$i]) return false;
-                }
-            }
-            // userinfo and host are validated within the regexp
-        } else {
-            $port = $host = $userinfo = null;
+        // back to string
+        $result = $uri->toString();
-        // query and fragment are quite simple in terms of definition:
-        // *( pchar / "/" / "?" ), so define their validation routines
-        // when we start fixing percent encoding
-        // path gets to be validated against a hodge-podge of rules depending
-        // on the status of authority and scheme, but it's not that important,
-        // esp. since it won't be applicable to everyone
-        // okay, now we defer execution to the subobject for more processing
-        // note that $fragment is omitted
-        list($userinfo, $host, $port, $path, $query) = 
-            $scheme_obj->validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context
-            );
-        // reconstruct authority
-        $authority = null;
-        if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
-            $authority = '';
-            if($userinfo !== null) $authority .= $userinfo . '@';
-            $authority .= $host;
-            if($port !== null) $authority .= ':' . $port;
-        }
-        // reconstruct the result
-        $result = '';
-        if ($scheme !== null) $result .= "$scheme:";
-        if ($authority !== null) $result .= "//$authority";
-        $result .= $path;
-        if ($query !== null) $result .= "?$query";
-        if ($fragment !== null) $result .= "#$fragment";
-        // munge if necessary
-        $munge = $config->get('URI', 'Munge');
-        if (!empty($scheme_obj->browsable) && $munge !== null) {
-            if ($authority !== null) {
-                $result = str_replace('%s', rawurlencode($result), $munge);
-            }
+        // munge entire URI if necessary
+        if (
+            !is_null($uri->host) && // indicator for authority
+            !empty($scheme_obj->browsable) &&
+            !is_null($munge = $config->get('URI', 'Munge'))
+        ) {
+            $result = str_replace('%s', rawurlencode($result), $munge);
         return $result;
-    /**
-     * Checks a host against an array blacklist
-     * @param $host Host to check
-     * @param $config HTMLPurifier_Config instance
-     * @param $context HTMLPurifier_Context instance
-     * @return bool Is spam?
-     */
-    function checkBlacklist($host, &$config, &$context) {
-        $blacklist = $config->get('URI', 'HostBlacklist');
-        if (!empty($blacklist)) {
-            foreach($blacklist as $blacklisted_host_fragment) {
-                if (strpos($host, $blacklisted_host_fragment) !== false) {
-                    return true;
-                }
-            }
-        }
-        return false;
-    }
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index b6dff4b5..73be27b1 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ConfigSchema.php';
 // member variables
 require_once 'HTMLPurifier/HTMLDefinition.php';
 require_once 'HTMLPurifier/CSSDefinition.php';
+require_once 'HTMLPurifier/URIDefinition.php';
 require_once 'HTMLPurifier/Doctype.php';
 require_once 'HTMLPurifier/DefinitionCacheFactory.php';
@@ -41,7 +42,7 @@ class HTMLPurifier_Config
      * HTML Purifier's version
-    var $version = '2.0.1';
+    var $version = '2.1.1';
      * Two-level associative array of configuration directives
@@ -75,6 +76,11 @@ class HTMLPurifier_Config
     var $serials = array();
+    /**
+     * Serial for entire configuration object
+     */
+    var $serial;
      * @param $definition HTMLPurifier_ConfigSchema that defines what directives
      *                    are allowed.
@@ -98,7 +104,6 @@ class HTMLPurifier_Config
         $ret = HTMLPurifier_Config::createDefault();
         if (is_string($config)) $ret->loadIni($config);
         elseif (is_array($config)) $ret->loadArray($config);
-        if (isset($revision)) $ret->revision = $revision;
         return $ret;
@@ -165,6 +170,17 @@ class HTMLPurifier_Config
         return $this->serials[$namespace];
+    /**
+     * Returns a md5 signature for the entire configuration object
+     * that uniquely identifies that particular configuration
+     */
+    function getSerial() {
+        if (empty($this->serial)) {
+            $this->serial = md5(serialize($this->getAll()));
+        }
+        return $this->serial;
+    }
      * Retrieves all directives, organized by namespace
@@ -295,6 +311,8 @@ class HTMLPurifier_Config
             $this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
         } elseif ($type == 'CSS') {
             $this->definitions[$type] = new HTMLPurifier_CSSDefinition();
+        } elseif ($type == 'URI') {
+            $this->definitions[$type] = new HTMLPurifier_URIDefinition();
         } else {
             trigger_error("Definition of $type type not supported");
             $false = false;
@@ -393,6 +411,26 @@ class HTMLPurifier_Config
      * @static
     static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
+        $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
+        $config = HTMLPurifier_Config::create($ret);
+        return $config;
+    }
+    /**
+     * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
+     * @note Same parameters as loadArrayFromForm
+     */
+    function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
+         $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
+         $this->loadArray($ret);
+    }
+    /**
+     * Prepares an array from a form into something usable for the more
+     * strict parts of HTMLPurifier_Config
+     * @static
+     */
+    static function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
         $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
         $mq = get_magic_quotes_gpc() && $mq_fix;
@@ -409,9 +447,7 @@ class HTMLPurifier_Config
             $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
             $ret[$ns][$directive] = $value;
-        $config = HTMLPurifier_Config::create($ret);
-        return $config;
+        return $ret;
diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php
index 13ad6036..83e1616e 100644
--- a/library/HTMLPurifier/ConfigSchema.php
+++ b/library/HTMLPurifier/ConfigSchema.php
@@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
 require_once 'HTMLPurifier/ConfigDef/Directive.php';
 require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
  * Configuration definition, defines directives and their defaults.
  * @note If you update this, please update Printer_ConfigForm
@@ -49,6 +51,8 @@ class HTMLPurifier_ConfigSchema {
     var $types = array(
         'string'    => 'String',
         'istring'   => 'Case-insensitive string',
+        'text'      => 'Text',
+        'itext'      => 'Case-insensitive text',
         'int'       => 'Integer',
         'float'     => 'Float',
         'bool'      => 'Boolean',
@@ -100,27 +104,30 @@ class HTMLPurifier_ConfigSchema {
      *      HTMLPurifier_DirectiveDef::$type for allowed values
      * @param $description Description of directive for documentation
-    static function define(
-        $namespace, $name, $default, $type, 
-        $description
-    ) {
+    static function define($namespace, $name, $default, $type, $description) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace])) {
-            trigger_error('Cannot define directive for undefined namespace',
-                E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($name)) {
-            trigger_error('Directive name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (empty($description)) {
-            trigger_error('Description must be non-empty',
-                E_USER_ERROR);
-            return;
+        // basic sanity checks
+            if (!isset($def->info[$namespace])) {
+                trigger_error('Cannot define directive for undefined namespace',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($name)) {
+                trigger_error('Directive name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (empty($description)) {
+                trigger_error('Description must be non-empty',
+                    E_USER_ERROR);
+                return;
+            }
         if (isset($def->info[$namespace][$name])) {
+            // already defined
             if (
                 $def->info[$namespace][$name]->type !== $type ||
                 $def->defaults[$namespace][$name]   !== $default
@@ -129,29 +136,35 @@ class HTMLPurifier_ConfigSchema {
         } else {
-            // process modifiers
+            // needs defining
+            // process modifiers (OPTIMIZE!)
             $type_values = explode('/', $type, 2);
             $type = $type_values[0];
             $modifier = isset($type_values[1]) ? $type_values[1] : false;
             $allow_null = ($modifier === 'null');
-            if (!isset($def->types[$type])) {
-                trigger_error('Invalid type for configuration directive',
-                    E_USER_ERROR);
-                return;
-            }
-            $default = $def->validate($default, $type, $allow_null);
-            if ($def->isError($default)) {
-                trigger_error('Default value does not match directive type',
-                    E_USER_ERROR);
-                return;
+                if (!isset($def->types[$type])) {
+                    trigger_error('Invalid type for configuration directive',
+                        E_USER_ERROR);
+                    return;
+                }
+                $default = $def->validate($default, $type, $allow_null);
+                if ($def->isError($default)) {
+                    trigger_error('Default value does not match directive type',
+                        E_USER_ERROR);
+                    return;
+                }
             $def->info[$namespace][$name] =
                 new HTMLPurifier_ConfigDef_Directive();
             $def->info[$namespace][$name]->type = $type;
             $def->info[$namespace][$name]->allow_null = $allow_null;
             $def->defaults[$namespace][$name]   = $default;
+        if (!HTMLPURIFIER_SCHEMA_STRICT) return;
         $backtrace = debug_backtrace();
         $file = $def->mungeFilename($backtrace[0]['file']);
         $line = $backtrace[0]['line'];
@@ -166,19 +179,21 @@ class HTMLPurifier_ConfigSchema {
     static function defineNamespace($namespace, $description) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (isset($def->info[$namespace])) {
-            trigger_error('Cannot redefine namespace', E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($namespace)) {
-            trigger_error('Namespace name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (empty($description)) {
-            trigger_error('Description must be non-empty',
-                E_USER_ERROR);
-            return;
+            if (isset($def->info[$namespace])) {
+                trigger_error('Cannot redefine namespace', E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($namespace)) {
+                trigger_error('Namespace name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (empty($description)) {
+                trigger_error('Description must be non-empty',
+                    E_USER_ERROR);
+                return;
+            }
         $def->info[$namespace] = array();
         $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
@@ -199,23 +214,25 @@ class HTMLPurifier_ConfigSchema {
     static function defineValueAliases($namespace, $name, $aliases) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace][$name])) {
+        if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
             trigger_error('Cannot set value alias for non-existant directive',
         foreach ($aliases as $alias => $real) {
-            if (!$def->info[$namespace][$name] !== true &&
-                !isset($def->info[$namespace][$name]->allowed[$real])
-            ) {
-                trigger_error('Cannot define alias to value that is not allowed',
-                    E_USER_ERROR);
-                return;
-            }
-            if (isset($def->info[$namespace][$name]->allowed[$alias])) {
-                trigger_error('Cannot define alias over allowed value',
-                    E_USER_ERROR);
-                return;
+                if (!$def->info[$namespace][$name] !== true &&
+                    !isset($def->info[$namespace][$name]->allowed[$real])
+                ) {
+                    trigger_error('Cannot define alias to value that is not allowed',
+                        E_USER_ERROR);
+                    return;
+                }
+                if (isset($def->info[$namespace][$name]->allowed[$alias])) {
+                    trigger_error('Cannot define alias over allowed value',
+                        E_USER_ERROR);
+                    return;
+                }
             $def->info[$namespace][$name]->aliases[$alias] = $real;
@@ -230,14 +247,14 @@ class HTMLPurifier_ConfigSchema {
     static function defineAllowedValues($namespace, $name, $allowed_values) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace][$name])) {
+        if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
             trigger_error('Cannot define allowed values for undefined directive',
         $directive =& $def->info[$namespace][$name];
         $type = $directive->type;
-        if ($type != 'string' && $type != 'istring') {
+        if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
             trigger_error('Cannot define allowed values for directive whose type is not string',
@@ -248,8 +265,11 @@ class HTMLPurifier_ConfigSchema {
         foreach ($allowed_values as $value) {
             $directive->allowed[$value] = true;
-        if ($def->defaults[$namespace][$name] !== null &&
-            !isset($directive->allowed[$def->defaults[$namespace][$name]])) {
+        if (
+            $def->defaults[$namespace][$name] !== null &&
+            !isset($directive->allowed[$def->defaults[$namespace][$name]])
+        ) {
             trigger_error('Default value must be in allowed range of variables',
             $directive->allowed = true; // undo undo!
@@ -267,30 +287,32 @@ class HTMLPurifier_ConfigSchema {
     static function defineAlias($namespace, $name, $new_namespace, $new_name) {
         $def =& HTMLPurifier_ConfigSchema::instance();
-        if (!isset($def->info[$namespace])) {
-            trigger_error('Cannot define directive alias in undefined namespace',
-                E_USER_ERROR);
-            return;
-        }
-        if (!ctype_alnum($name)) {
-            trigger_error('Directive name must be alphanumeric',
-                E_USER_ERROR);
-            return;
-        }
-        if (isset($def->info[$namespace][$name])) {
-            trigger_error('Cannot define alias over directive',
-                E_USER_ERROR);
-            return;
-        }
-        if (!isset($def->info[$new_namespace][$new_name])) {
-            trigger_error('Cannot define alias to undefined directive',
-                E_USER_ERROR);
-            return;
-        }
-        if ($def->info[$new_namespace][$new_name]->class == 'alias') {
-            trigger_error('Cannot define alias to alias',
-                E_USER_ERROR);
-            return;
+            if (!isset($def->info[$namespace])) {
+                trigger_error('Cannot define directive alias in undefined namespace',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!ctype_alnum($name)) {
+                trigger_error('Directive name must be alphanumeric',
+                    E_USER_ERROR);
+                return;
+            }
+            if (isset($def->info[$namespace][$name])) {
+                trigger_error('Cannot define alias over directive',
+                    E_USER_ERROR);
+                return;
+            }
+            if (!isset($def->info[$new_namespace][$new_name])) {
+                trigger_error('Cannot define alias to undefined directive',
+                    E_USER_ERROR);
+                return;
+            }
+            if ($def->info[$new_namespace][$new_name]->class == 'alias') {
+                trigger_error('Cannot define alias to alias',
+                    E_USER_ERROR);
+                return;
+            }
         $def->info[$namespace][$name] =
             new HTMLPurifier_ConfigDef_DirectiveAlias(
@@ -313,8 +335,10 @@ class HTMLPurifier_ConfigSchema {
                 return $var;
             case 'istring':
             case 'string':
+            case 'text': // no difference, just is longer/multiple line string
+            case 'itext':
                 if (!is_string($var)) break;
-                if ($type === 'istring') $var = strtolower($var);
+                if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
                 return $var;
             case 'int':
                 if (is_string($var) && ctype_digit($var)) $var = (int) $var;
@@ -345,9 +369,13 @@ class HTMLPurifier_ConfigSchema {
                     // a single empty string item, but having an empty
                     // array is more intuitive
                     if ($var == '') return array();
-                    // simplistic string to array method that only works
-                    // for simple lists of tag names or alphanumeric characters
-                    $var = explode(',',$var);
+                    if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
+                        // simplistic string to array method that only works
+                        // for simple lists of tag names or alphanumeric characters
+                        $var = explode(',',$var);
+                    } else {
+                        $var = preg_split('/(,|[\n\r]+)/', $var);
+                    }
                     // remove spaces
                     foreach ($var as $i => $j) $var[$i] = trim($j);
                     if ($type === 'hash') {
@@ -388,6 +416,7 @@ class HTMLPurifier_ConfigSchema {
      * Takes an absolute path and munges it into a more manageable relative path
     function mungeFilename($filename) {
+        if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
         $offset = strrpos($filename, 'HTMLPurifier');
         $filename = substr($filename, $offset);
         $filename = str_replace('\\', '/', $filename);
diff --git a/library/HTMLPurifier/ContentSets.php b/library/HTMLPurifier/ContentSets.php
index 001f4814..7baf7a31 100644
--- a/library/HTMLPurifier/ContentSets.php
+++ b/library/HTMLPurifier/ContentSets.php
@@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ChildDef.php';
 require_once 'HTMLPurifier/ChildDef/Empty.php';
 require_once 'HTMLPurifier/ChildDef/Required.php';
 require_once 'HTMLPurifier/ChildDef/Optional.php';
+require_once 'HTMLPurifier/ChildDef/Custom.php';
diff --git a/library/HTMLPurifier/DefinitionCache/Serializer.php b/library/HTMLPurifier/DefinitionCache/Serializer.php
index 2b07da13..1830e37e 100644
--- a/library/HTMLPurifier/DefinitionCache/Serializer.php
+++ b/library/HTMLPurifier/DefinitionCache/Serializer.php
@@ -99,7 +99,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
     function generateBaseDirectoryPath($config) {
         $base = $config->get('Cache', 'SerializerPath');
-        $base = is_null($base) ? dirname(__FILE__) . '/Serializer' : $base;
+        $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
         return $base;
diff --git a/library/HTMLPurifier/EntityLookup.php b/library/HTMLPurifier/EntityLookup.php
index ed3ea3df..212cf780 100644
--- a/library/HTMLPurifier/EntityLookup.php
+++ b/library/HTMLPurifier/EntityLookup.php
@@ -19,7 +19,7 @@ class HTMLPurifier_EntityLookup {
     function setup($file = false) {
         if (!$file) {
-            $file = dirname(__FILE__) . '/EntityLookup/entities.ser';
+            $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
         $this->table = unserialize(file_get_contents($file));
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index 9ed413c7..aaeb8bae 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -110,12 +110,13 @@ HTMLPurifier_ConfigSchema::define(
-    'HTML', 'Allowed', null, 'string/null', '
+    'HTML', 'Allowed', null, 'itext/null', '
     This is a convenience directive that rolls the functionality of
     %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
     Specify elements and attributes that are allowed using:
-    <code>element1[attr1|attr2],element2...</code>.
+    <code>element1[attr1|attr2],element2...</code>. You can also use
+    newlines instead of commas to separate elements.
@@ -426,8 +427,9 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
         $elements = array();
         $attributes = array();
-        $chunks = explode(',', $list);
+        $chunks = preg_split('/(,|[\n\r]+)/', $list);
         foreach ($chunks as $chunk) {
+            if (empty($chunk)) continue;
             // remove TinyMCE element control characters
             if (!strpos($chunk, '[')) {
                 $element = $chunk;
diff --git a/library/HTMLPurifier/HTMLModule/Ruby.php b/library/HTMLPurifier/HTMLModule/Ruby.php
new file mode 100644
index 00000000..f5432446
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Ruby.php
@@ -0,0 +1,28 @@
+require_once 'HTMLPurifier/HTMLModule.php';
+ * XHTML 1.1 Ruby Annotation Module, defines elements that indicate
+ * short runs of text alongside base text for annotation or pronounciation.
+ */
+class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
+    var $name = 'Ruby';
+    function HTMLPurifier_HTMLModule_Ruby() {
+        $this->addElement('ruby', true, 'Inline',
+            'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
+            'Common');
+        $this->addElement('rbc', true, false, 'Required: rb', 'Common');
+        $this->addElement('rtc', true, false, 'Required: rt', 'Common');
+        $rb =& $this->addElement('rb', true, false, 'Inline', 'Common');
+        $rb->excludes = array('ruby' => true);
+        $rt =& $this->addElement('rt', true, false, 'Inline', 'Common', array('rbspan' => 'Number'));
+        $rt->excludes = array('ruby' => true);
+        $this->addElement('rp', true, false, 'Optional: #PCDATA', 'Common');
+    }
diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php
index 69628dd8..d4f10d0c 100644
--- a/library/HTMLPurifier/HTMLModuleManager.php
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@@ -28,6 +28,7 @@ require_once 'HTMLPurifier/HTMLModule/Target.php';
 require_once 'HTMLPurifier/HTMLModule/Scripting.php';
 require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
 require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
+require_once 'HTMLPurifier/HTMLModule/Ruby.php';
 // tidy modules
 require_once 'HTMLPurifier/HTMLModule/Tidy.php';
@@ -215,8 +216,8 @@ class HTMLPurifier_HTMLModuleManager
             'XHTML 1.1', true,
-            array_merge($common, $xml),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary'), // Tidy_XHTML1_1
+            array_merge($common, $xml, array('Ruby')),
+            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
             '-//W3C//DTD XHTML 1.1//EN',
diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php
index 939f307e..59017163 100644
--- a/library/HTMLPurifier/Injector.php
+++ b/library/HTMLPurifier/Injector.php
@@ -8,6 +8,11 @@
 class HTMLPurifier_Injector
+    /**
+     * Advisory name of injector, this is for friendly error messages
+     */
+    var $name;
      * Amount of tokens the injector needs to skip + 1. Because
      * the decrement is the first thing that happens, this needs to
@@ -40,16 +45,37 @@ class HTMLPurifier_Injector
     var $inputIndex;
-     * Prepares the injector by giving it the config and context objects,
-     * so that important variables can be extracted and not passed via
-     * parameter constantly. Remember: always instantiate a new injector
-     * when handling a set of HTML.
+     * Array of elements and attributes this injector creates and therefore
+     * need to be allowed by the definition. Takes form of
+     * array('element' => array('attr', 'attr2'), 'element2')
+     */
+    var $needed = array();
+    /**
+     * Prepares the injector by giving it the config and context objects:
+     * this allows references to important variables to be made within
+     * the injector. This function also checks if the HTML environment
+     * will work with the Injector: if p tags are not allowed, the
+     * Auto-Paragraphing injector should not be enabled.
+     * @param $config Instance of HTMLPurifier_Config
+     * @param $context Instance of HTMLPurifier_Context
+     * @return Boolean false if success, string of missing needed element/attribute if failure
     function prepare($config, &$context) {
         $this->htmlDefinition = $config->getHTMLDefinition();
+        // perform $needed checks
+        foreach ($this->needed as $element => $attributes) {
+            if (is_int($element)) $element = $attributes;
+            if (!isset($this->htmlDefinition->info[$element])) return $element;
+            if (!is_array($attributes)) continue;
+            foreach ($attributes as $name) {
+                if (!isset($this->htmlDefinition->info[$element]->attr[$name])) return "$element.$name";
+            }
+        }
         $this->currentNesting =& $context->get('CurrentNesting');
         $this->inputTokens    =& $context->get('InputTokens');
         $this->inputIndex     =& $context->get('InputIndex');
+        return false;
@@ -74,12 +100,12 @@ class HTMLPurifier_Injector
      * Handler that is called when a text token is processed
-    function handleText(&$token, $config, &$context) {}
+    function handleText(&$token) {}
-     * Handler that is called when a start token is processed
+     * Handler that is called when a start or empty token is processed
-    function handleStart(&$token, $config, &$context) {}
+    function handleElement(&$token) {}
diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php
index e8e2e34f..6e0a6a3e 100644
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@@ -15,6 +15,11 @@ HTMLPurifier_ConfigSchema::define(
       block elements in nodes that allow paragraph tags</li>
   <li>There are double newlines in paragraph tags</li>
+  <code>p</code> tags must be allowed for this directive to take effect.
+  We do not use <code>br</code> tags for paragraphing, as that is
+  semantically incorrect.
   This directive has been available since 2.0.1.
@@ -27,13 +32,16 @@ HTMLPurifier_ConfigSchema::define(
 class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
+    var $name = 'AutoParagraph';
+    var $needed = array('p');
     function _pStart() {
         $par = new HTMLPurifier_Token_Start('p');
         $par->armor['MakeWellFormed_TagClosedError'] = true;
         return $par;
-    function handleText(&$token, $config, &$context) {
+    function handleText(&$token) {
         $text = $token->data;
         if (empty($this->currentNesting)) {
             if (!$this->allowsElement('p')) return;
@@ -79,7 +87,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
-    function handleStart(&$token, $config, &$context) {
+    function handleElement(&$token) {
         // check if we're inside a tag already
         if (!empty($this->currentNesting)) {
             if ($this->allowsElement('p')) {
@@ -88,11 +96,19 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
                 // this token is already paragraph, abort
                 if ($token->name == 'p') return;
-                // check if this token is adjacent to the parent
-                if ($this->inputTokens[$this->inputIndex - 1]->type != 'start') {
+                // this token is a block level, abort
+                if (!$this->_isInline($token)) return;
+                // check if this token is adjacent to the parent token
+                $prev = $this->inputTokens[$this->inputIndex - 1];
+                if ($prev->type != 'start') {
                     // not adjacent, we can abort early
                     // add lead paragraph tag if our token is inline
-                    if ($this->_isInline($token)) {
+                    // and the previous tag was an end paragraph
+                    if (
+                        $prev->name == 'p' && $prev->type == 'end' &&
+                        $this->_isInline($token)
+                    ) {
                         $token = array($this->_pStart(), $token);
@@ -105,8 +121,8 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
                 $ok = false;
                 // maintain a mini-nesting counter, this lets us bail out
                 // early if possible
-                $j = 2; // current nesting, is two due to parent and this start
-                for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
+                $j = 1; // current nesting, one is due to parent (we recalculate current token)
+                for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
                     if ($this->inputTokens[$i]->type == 'start') $j++;
                     if ($this->inputTokens[$i]->type == 'end') $j--;
                     if ($this->inputTokens[$i]->type == 'text') {
@@ -150,7 +166,14 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
         $needs_start = false;
         $needs_end   = false;
-        for ($i = 0, $c = count($raw_paragraphs); $i < $c; $i++) {
+        $c = count($raw_paragraphs);
+        if ($c == 1) {
+            // there were no double-newlines, abort quickly
+            $result[] = new HTMLPurifier_Token_Text($data);
+            return;
+        }
+        for ($i = 0; $i < $c; $i++) {
             $par = $raw_paragraphs[$i];
             if (trim($par) !== '') {
                 $paragraphs[] = $par;
diff --git a/library/HTMLPurifier/Injector/Linkify.php b/library/HTMLPurifier/Injector/Linkify.php
index 7ada1d7a..bf7abfa9 100644
--- a/library/HTMLPurifier/Injector/Linkify.php
+++ b/library/HTMLPurifier/Injector/Linkify.php
@@ -6,7 +6,8 @@ HTMLPurifier_ConfigSchema::define(
     'AutoFormat', 'Linkify', false, 'bool', '
   This directive turns on linkification, auto-linking http, ftp and
-  https URLs. This directive has been available since 2.0.1.
+  https URLs. <code>a</code> tags with the <code>href</code> attribute
+  must be allowed. This directive has been available since 2.0.1.
@@ -16,7 +17,10 @@ HTMLPurifier_ConfigSchema::define(
 class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
-    function handleText(&$token, $config, &$context) {
+    var $name = 'Linkify';
+    var $needed = array('a' => array('href'));
+    function handleText(&$token) {
         if (!$this->allowsElement('a')) return;
         if (strpos($token->data, '://') === false) {
diff --git a/library/HTMLPurifier/Injector/PurifierLinkify.php b/library/HTMLPurifier/Injector/PurifierLinkify.php
index d6f2e607..a7686297 100644
--- a/library/HTMLPurifier/Injector/PurifierLinkify.php
+++ b/library/HTMLPurifier/Injector/PurifierLinkify.php
@@ -6,8 +6,9 @@ HTMLPurifier_ConfigSchema::define(
     'AutoFormat', 'PurifierLinkify', false, 'bool', '
   Internal auto-formatter that converts configuration directives in
-  syntax <a>%Namespace.Directive</a> to links. This directive has been available
-  since 2.0.1.
+  syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags
+  with the <code>href</code> attribute must be allowed.
+  This directive has been available since 2.0.1.
@@ -27,14 +28,16 @@ HTMLPurifier_ConfigSchema::define(
 class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector
+    var $name = 'PurifierLinkify';
     var $docURL;
+    var $needed = array('a' => array('href'));
     function prepare($config, &$context) {
-        parent::prepare($config, $context);
         $this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL');
+        return parent::prepare($config, $context);
-    function handleText(&$token, $config, &$context) {
+    function handleText(&$token) {
         if (!$this->allowsElement('a')) return;
         if (strpos($token->data, '%') === false) return;
diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php
index a64cf301..b16c3ff3 100644
--- a/library/HTMLPurifier/Language/messages/en.php
+++ b/library/HTMLPurifier/Language/messages/en.php
@@ -28,7 +28,7 @@ $messages = array(
 'Strategy_RemoveForeignElements: Foreign element to text'    => 'Unrecognized $CurrentToken.Serialized tag converted to text',
 'Strategy_RemoveForeignElements: Foreign element removed'    => 'Unrecognized $CurrentToken.Serialized tag removed',
 'Strategy_RemoveForeignElements: Comment removed'            => 'Comment containing "$CurrentToken.Data" removed',
-'Strategy_RemoveForeignElements: Script removed'             => 'Script removed',
+'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed',
 'Strategy_RemoveForeignElements: Token removed to end'       => 'Tags and text starting from $1 element where removed to end',
 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php
index 71539ded..ac6e7dbf 100644
--- a/library/HTMLPurifier/LanguageFactory.php
+++ b/library/HTMLPurifier/LanguageFactory.php
@@ -82,7 +82,7 @@ class HTMLPurifier_LanguageFactory
     function setup() {
         $this->validator = new HTMLPurifier_AttrDef_Lang();
-        $this->dir = dirname(__FILE__);
+        $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier';
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index 839782ca..29295db7 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -66,6 +66,16 @@ HTMLPurifier_ConfigSchema::define(
+    'Core', 'AggressivelyFixLt', false, 'bool', '
+This directive enables aggressive pre-filter fixes HTML Purifier can
+perform in order to ensure that open angled-brackets do not get killed
+during parsing stage. Enabling this will result in two preg_replace_callback
+calls and one preg_replace call for every bit of HTML passed through here.
+It is not necessary and will have no effect for PHP 4.
+This directive has been available since 2.1.0.
  * Forgivingly lexes HTML (SGML-style) markup into tokens.
diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index 82865673..17f23e34 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -42,6 +42,16 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
         $html = $this->normalize($html, $config, $context);
+        // attempt to armor stray angled brackets that cannot possibly
+        // form tags and thus are probably being used as emoticons
+        if ($config->get('Core', 'AggressivelyFixLt')) {
+            $char = '[^a-z!\/]';
+            $comment = "/<!--(.*?)(-->|\z)/is";
+            $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackArmorCommentEntities'), $html);
+            $html = preg_replace("/<($char)/i", '&lt;\\1', $html);
+            $html = preg_replace_callback($comment, array('HTMLPurifier_Lexer_DOMLex', 'callbackUndoCommentSubst'), $html); // fix comments
+        }
         // preprocess html, essential for UTF-8
         $html =
             '<!DOCTYPE html '.
@@ -151,5 +161,21 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
     public function muteErrorHandler($errno, $errstr) {}
+    /**
+     * Callback function for undoing escaping of stray angled brackets
+     * in comments
+     */
+    static public function callbackUndoCommentSubst($matches) {
+        return '<!--' . strtr($matches[1], array('&amp;'=>'&','&lt;'=>'<')) . $matches[2];
+    }
+    /**
+     * Callback function that entity-izes ampersands in comments so that
+     * callbackUndoCommentSubst doesn't clobber them
+     */
+    static public function callbackArmorCommentEntities($matches) {
+        return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
+    }
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 883f4956..cd7cb4c1 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -150,6 +150,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 // We are in tag and it is well formed
                 // Grab the internals of the tag
                 $strlen_segment = $position_next_gt - $cursor;
+                if ($strlen_segment < 1) {
+                    // there's nothing to process!
+                    $token = new HTMLPurifier_Token_Text('<');
+                    $cursor++;
+                    continue;
+                }
                 $segment = substr($html, $cursor, $strlen_segment);
                 // Check if it's a comment
@@ -204,7 +212,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 // Check leading character is alnum, if not, we may
                 // have accidently grabbed an emoticon. Translate into
                 // text and go our merry way
-                if (!ctype_alnum($segment[0])) {
+                if (!ctype_alpha($segment[0])) {
+                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
                     if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
                     $token = new
@@ -371,6 +380,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                     $value = $quoted_value;
+            if ($value === false) $value = '';
             return array($key => $value);
@@ -385,7 +395,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
         // infinite loop protection
         $loops = 0;
         while(true) {
             // infinite loop protection
@@ -399,7 +408,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
             $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
             // grab the key
             $key_begin = $cursor; //we're currently at the start of the key
@@ -435,6 +443,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 $cursor += strspn($string, $this->_whitespace, $cursor);
+                if ($cursor === false) {
+                    $array[$key] = '';
+                    break;
+                }
                 // we might be in front of a quote right now
                 $char = @$string[$cursor];
@@ -452,7 +465,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                     $value_end = $cursor;
+                // we reached a premature end
+                if ($cursor === false) {
+                    $cursor = $size;
+                    $value_end = $cursor;
+                }
                 $value = substr($string, $value_begin, $value_end - $value_begin);
+                if ($value === false) $value = '';
                 $array[$key] = $this->parseData($value);
diff --git a/library/HTMLPurifier/Printer/ConfigForm.css b/library/HTMLPurifier/Printer/ConfigForm.css
index 23c7f999..0653bbb0 100644
--- a/library/HTMLPurifier/Printer/ConfigForm.css
+++ b/library/HTMLPurifier/Printer/ConfigForm.css
@@ -1,7 +1,7 @@
 .hp-config {}
-.hp-config tbody th {text-align:right;}
+.hp-config tbody th {text-align:right; padding-right:0.5em;}
 .hp-config thead, .hp-config .namespace {background:#3C578C; color:#FFF;}
 .hp-config .namespace th {text-align:center;}
 .hp-config .verbose {display:none;}
diff --git a/library/HTMLPurifier/Printer/ConfigForm.php b/library/HTMLPurifier/Printer/ConfigForm.php
index fb86f5f3..31da35f8 100644
--- a/library/HTMLPurifier/Printer/ConfigForm.php
+++ b/library/HTMLPurifier/Printer/ConfigForm.php
@@ -23,18 +23,52 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
     var $name;
+    /**
+     * Whether or not to compress directive names, clipping them off
+     * after a certain amount of letters
+     */
+    var $compress = false;
      * @param $name Form element name for directives to be stuffed into
      * @param $doc_url String documentation URL, will have fragment tagged on
+     * @param $compress Integer max length before compressing a directive name, set to false to turn off
-    function HTMLPurifier_Printer_ConfigForm($name, $doc_url = null) {
+    function HTMLPurifier_Printer_ConfigForm(
+        $name, $doc_url = null, $compress = false
+    ) {
         $this->docURL = $doc_url;
         $this->name   = $name;
+        $this->compress = $compress;
         $this->fields['default']    = new HTMLPurifier_Printer_ConfigForm_default();
         $this->fields['bool']       = new HTMLPurifier_Printer_ConfigForm_bool();
+    /**
+     * @param $cols Integer columns of textarea, null to use default
+     * @param $rows Integer rows of textarea, null to use default
+     */
+    function setTextareaDimensions($cols = null, $rows = null) {
+        if ($cols) $this->fields['default']->cols = $cols;
+        if ($rows) $this->fields['default']->rows = $rows;
+    }
+    /**
+     * Retrieves styling, in case the directory it's in is not publically
+     * available
+     */
+    function getCSS() {
+        return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
+    }
+    /**
+     * Retrieves JavaScript, in case directory is not public
+     */
+    function getJavaScript() {
+        return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
+    }
      * Returns HTML output for a configuration form
      * @param $config Configuration object of current form state
@@ -98,11 +132,12 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
                 $ret .= $this->start('a', array('href' => $url));
                 $attr = array('for' => "{$this->name}:$ns.$directive");
                 // crop directive name if it's too long
-                if (strlen($directive) < 14) {
+                if (!$this->compress || (strlen($directive) < $this->compress)) {
                     $directive_disp = $directive;
                 } else {
-                    $directive_disp = substr($directive, 0, 12) . '...';
+                    $directive_disp = substr($directive, 0, $this->compress - 2) . '...';
                     $attr['title'] = $directive;
@@ -176,6 +211,8 @@ class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer
  * Swiss-army knife configuration form field printer
 class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
+    var $cols = 18;
+    var $rows = 5;
     function render($ns, $directive, $value, $name, $config) {
         // this should probably be split up a little
@@ -190,12 +227,12 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
                         $value[] = $val;
                 case 'list':
-                    $value = implode(',', $value);
+                    $value = implode(PHP_EOL, $value);
                 case 'hash':
                     $nvalue = '';
                     foreach ($value as $i => $v) {
-                        $nvalue .= "$i:$v,";
+                        $nvalue .= "$i:$v" . PHP_EOL;
                     $value = $nvalue;
@@ -220,6 +257,15 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
                 $ret .= $this->element('option', $val, $attr);
             $ret .= $this->end('select');
+        } elseif (
+            $def->type == 'text' || $def->type == 'itext' ||
+            $def->type == 'list' || $def->type == 'hash' || $def->type == 'lookup'
+        ) {
+            $attr['cols'] = $this->cols;
+            $attr['rows'] = $this->rows;
+            $ret .= $this->start('textarea', $attr);
+            $ret .= $this->text($value);
+            $ret .= $this->end('textarea');
         } else {
             $attr['value'] = $value;
             $attr['type'] = 'text';
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index 3a8109cf..b3e8aa74 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -67,7 +67,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
         unset($injectors['Custom']); // special case
         foreach ($injectors as $injector => $b) {
             $injector = "HTMLPurifier_Injector_$injector";
-            if ($b) $this->injectors[] = new $injector;
+            if (!$b) continue;
+            $this->injectors[] = new $injector;
         foreach ($custom_injectors as $injector) {
             if (is_string($injector)) {
@@ -87,7 +88,11 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
         // give the injectors references to the definition and context
         // variables for performance reasons
         foreach ($this->injectors as $i => $x) {
-            $this->injectors[$i]->prepare($config, $context);
+            $error = $this->injectors[$i]->prepare($config, $context);
+            if (!$error) continue;
+            list($injector) = array_splice($this->injectors, $i, 1);
+            $name = $injector->name;
+            trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
         // -- end INJECTOR --
@@ -109,7 +114,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                 if ($token->type === 'text') {
                      // injector handler code; duplicated for performance reasons
                      foreach ($this->injectors as $i => $x) {
-                         if (!$x->skip) $x->handleText($token, $config, $context);
+                         if (!$x->skip) $x->handleText($token);
                          if (is_array($token)) {
                              $this->currentInjector = $i;
@@ -122,26 +127,24 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             $info = $definition->info[$token->name]->child;
-            // quick checks:
-            // test if it claims to be a start tag but is empty
+            // quick tag checks: anything that's *not* an end tag
+            $ok = false;
             if ($info->type == 'empty' && $token->type == 'start') {
-                $result[] = new HTMLPurifier_Token_Empty($token->name, $token->attr);
-                continue;
-            }
-            // test if it claims to be empty but really is a start tag
-            if ($info->type != 'empty' && $token->type == 'empty' ) {
-                $result[] = new HTMLPurifier_Token_Start($token->name, $token->attr);
-                $result[] = new HTMLPurifier_Token_End($token->name);
-                continue;
-            }
-            // automatically insert empty tags
-            if ($token->type == 'empty') {
-                $result[] = $token;
-                continue;
-            }
-            // start tags have precedence, so they get passed through...
-            if ($token->type == 'start') {
+                // test if it claims to be a start tag but is empty
+                $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
+                $ok = true;
+            } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
+                // claims to be empty but really is a start tag
+                $token = array(
+                    new HTMLPurifier_Token_Start($token->name, $token->attr),
+                    new HTMLPurifier_Token_End($token->name)
+                );
+                $ok = true;
+            } elseif ($token->type == 'empty') {
+                // real empty token
+                $ok = true;
+            } elseif ($token->type == 'start') {
+                // start tag
                 // ...unless they also have to close their parent
                 if (!empty($this->currentNesting)) {
@@ -163,16 +166,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                     $this->currentNesting[] = $parent; // undo the pop
-                // injector handler code; duplicated for performance reasons
+                $ok = true;
+            }
+            // injector handler code; duplicated for performance reasons
+            if ($ok) {
                 foreach ($this->injectors as $i => $x) {
-                    if (!$x->skip) $x->handleStart($token, $config, $context);
+                    if (!$x->skip) $x->handleElement($token);
                     if (is_array($token)) {
                         $this->currentInjector = $i;
                 $this->processToken($token, $config, $context);
@@ -280,9 +285,11 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
             // adjust the injector skips based on the array substitution
-            $offset = count($token) + 1;
-            for ($i = 0; $i <= $this->currentInjector; $i++) {
-                $this->injectors[$i]->skip += $offset;
+            if ($this->injectors) {
+                $offset = count($token) + 1;
+                for ($i = 0; $i <= $this->currentInjector; $i++) {
+                    $this->injectors[$i]->skip += $offset;
+                }
         } elseif ($token) {
             // regular case
diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
index c14662c3..2c280b23 100644
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -8,19 +8,38 @@ require_once 'HTMLPurifier/TagTransform.php';
 require_once 'HTMLPurifier/AttrValidator.php';
-    'Core', 'RemoveInvalidImg', true, 'bool',
-    'This directive enables pre-emptive URI checking in <code>img</code> '.
-    'tags, as the attribute validation strategy is not authorized to '.
-    'remove elements from the document.  This directive has been available '.
-    'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
+    'Core', 'RemoveInvalidImg', true, 'bool', '
+  This directive enables pre-emptive URI checking in <code>img</code> 
+  tags, as the attribute validation strategy is not authorized to 
+  remove elements from the document.  This directive has been available 
+  since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
-    'Core', 'RemoveScriptContents', true, 'bool', '
+    'Core', 'RemoveScriptContents', null, 'bool/null', '
   This directive enables HTML Purifier to remove not only script tags
-  but all of their contents. This directive has been available since 2.0.0,
-  revert to pre-2.0.0 behavior by setting to false.
+  but all of their contents. This directive has been deprecated since 2.1.0,
+  and when not set the value of %Core.HiddenElements will take
+  precedence. This directive has been available since 2.0.0, and can be used to 
+  revert to pre-2.0.0 behavior by setting it to false.
+    'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
+  This directive is a lookup array of elements which should have their
+  contents removed when they are not allowed by the HTML definition.
+  For example, the contents of a <code>script</code> tag are not 
+  normally shown in a document, so if script tags are to be removed,
+  their contents should be removed to. This is opposed to a <code>b</code>
+  tag, which defines some presentational changes but does not hide its
+  contents.
@@ -43,7 +62,16 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
         $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
         $remove_invalid_img  = $config->get('Core', 'RemoveInvalidImg');
         $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
+        $hidden_elements     = $config->get('Core', 'HiddenElements');
+        // remove script contents compatibility
+        if ($remove_script_contents === true) {
+            $hidden_elements['script'] = true;
+        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
+            unset($hidden_elements['script']);
+        }
         $attr_validator = new HTMLPurifier_AttrValidator();
@@ -107,7 +135,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                     // CAN BE GENERICIZED
-                    if ($token->name == 'script' && $token->type == 'start') {
+                    if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
                         $textify_comments = $token->name;
                     } elseif ($token->name === $textify_comments && $token->type == 'end') {
                         $textify_comments = false;
@@ -122,7 +150,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                 } else {
                     // check if we need to destroy all of the tag's children
                     // CAN BE GENERICIZED
-                    if ($token->name == 'script' && $remove_script_contents) {
+                    if (isset($hidden_elements[$token->name])) {
                         if ($token->type == 'start') {
                             $remove_until = $token->name;
                         } elseif ($token->type == 'empty') {
@@ -130,7 +158,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                         } else {
                             $remove_until = false;
-                        if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed');
+                        if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
                     } else {
                         if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php
index 4b3d7486..869f3fab 100644
--- a/library/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -46,6 +46,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
+        $context->destroy('CurrentToken');
         return $tokens;
diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php
new file mode 100644
index 00000000..ed7ffdd6
--- /dev/null
+++ b/library/HTMLPurifier/URI.php
@@ -0,0 +1,119 @@
+require_once 'HTMLPurifier/URIParser.php';
+require_once 'HTMLPurifier/URIFilter.php';
+ * HTML Purifier's internal representation of a URI
+ */
+class HTMLPurifier_URI
+    var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+    /**
+     * @note Automatically normalizes scheme and port
+     */
+    function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
+        $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
+        $this->userinfo = $userinfo;
+        $this->host = $host;
+        $this->port = is_null($port) ? $port : (int) $port;
+        $this->path = $path;
+        $this->query = $query;
+        $this->fragment = $fragment;
+    }
+    /**
+     * Retrieves a scheme object corresponding to the URI's scheme/default
+     * @param $config Instance of HTMLPurifier_Config
+     * @param $context Instance of HTMLPurifier_Context
+     * @return Scheme object appropriate for validating this URI
+     */
+    function getSchemeObj($config, &$context) {
+        $registry =& HTMLPurifier_URISchemeRegistry::instance();
+        if ($this->scheme !== null) {
+            $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
+            if (!$scheme_obj) return false; // invalid scheme, clean it out
+        } else {
+            // no scheme: retrieve the default one
+            $def = $config->getDefinition('URI');
+            $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
+            if (!$scheme_obj) {
+                // something funky happened to the default scheme object
+                trigger_error(
+                    'Default scheme object "' . $def->defaultScheme . '" was not readable',
+                    E_USER_WARNING
+                );
+                return false;
+            }
+        }
+        return $scheme_obj;
+    }
+    /**
+     * Generic validation method applicable for all schemes
+     * @param $config Instance of HTMLPurifier_Config
+     * @param $context Instance of HTMLPurifier_Context
+     * @return True if validation/filtering succeeds, false if failure
+     */
+    function validate($config, &$context) {
+        // validate host
+        if (!is_null($this->host)) {
+            $host_def = new HTMLPurifier_AttrDef_URI_Host();
+            $this->host = $host_def->validate($this->host, $config, $context);
+            if ($this->host === false) $this->host = null;
+        }
+        // validate port
+        if (!is_null($this->port)) {
+            if ($this->port < 1 || $this->port > 65535) $this->port = null;
+        }
+        // query and fragment are quite simple in terms of definition:
+        // *( pchar / "/" / "?" ), so define their validation routines
+        // when we start fixing percent encoding
+        // path gets to be validated against a hodge-podge of rules depending
+        // on the status of authority and scheme, but it's not that important,
+        // esp. since it won't be applicable to everyone
+        return true;
+    }
+    /**
+     * Convert URI back to string
+     * @return String URI appropriate for output
+     */
+    function toString() {
+        // reconstruct authority
+        $authority = null;
+        if (!is_null($this->host)) {
+            $authority = '';
+            if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
+            $authority .= $this->host;
+            if(!is_null($this->port))     $authority .= ':' . $this->port;
+        }
+        // reconstruct the result
+        $result = '';
+        if (!is_null($this->scheme))    $result .= $this->scheme . ':';
+        if (!is_null($authority))       $result .=  '//' . $authority;
+        $result .= $this->path;
+        if (!is_null($this->query))     $result .= '?' . $this->query;
+        if (!is_null($this->fragment))  $result .= '#' . $this->fragment;
+        return $result;
+    }
+    /**
+     * Returns a copy of the URI object
+     */
+    function copy() {
+        return unserialize(serialize($this));
+    }
diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php
new file mode 100644
index 00000000..45c505ed
--- /dev/null
+++ b/library/HTMLPurifier/URIDefinition.php
@@ -0,0 +1,145 @@
+require_once 'HTMLPurifier/Definition.php';
+require_once 'HTMLPurifier/URIFilter.php';
+require_once 'HTMLPurifier/URIParser.php';
+require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
+require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
+require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
+require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
+    'URI', 'DefinitionID', null, 'string/null', '
+    Unique identifier for a custom-built URI definition. If you  want
+    to add custom URIFilters, you must specify this value.
+    This directive has been available since 2.1.0.
+    'URI', 'DefinitionRev', 1, 'int', '
+    Revision identifier for your custom definition. See
+    %HTML.DefinitionRev for details. This directive has been available
+    since 2.1.0.
+// informative URI directives
+    'URI', 'DefaultScheme', 'http', 'string', '
+    Defines through what scheme the output will be served, in order to 
+    select the proper object validator when no scheme information is present.
+    'URI', 'Host', null, 'string/null', '
+    Defines the domain name of the server, so we can determine whether or 
+    an absolute URI is from your website or not.  Not strictly necessary, 
+    as users should be using relative URIs to reference resources on your 
+    website.  It will, however, let you use absolute URIs to link to 
+    subdomains of the domain you post here: i.e. example.com will allow 
+    sub.example.com.  However, higher up domains will still be excluded: 
+    if you set %URI.Host to sub.example.com, example.com will be blocked. 
+    <strong>Note:</strong> This directive overrides %URI.Base because
+    a given page may be on a sub-domain, but you wish HTML Purifier to be
+    more relaxed and allow some of the parent domains too.
+    This directive has been available since 1.2.0.
+    'URI', 'Base', null, 'string/null', '
+    The base URI is the URI of the document this purified HTML will be
+    inserted into.  This information is important if HTML Purifier needs
+    to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
+    is on.  You may use a non-absolute URI for this value, but behavior
+    may vary (%URI.MakeAbsolute deals nicely with both absolute and 
+    relative paths, but forwards-compatibility is not guaranteed).
+    <strong>Warning:</strong> If set, the scheme on this URI
+    overrides the one specified by %URI.DefaultScheme. This directive has
+    been available since 2.1.0.
+class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
+    var $type = 'URI';
+    var $filters = array();
+    var $registeredFilters = array();
+    /**
+     * HTMLPurifier_URI object of the base specified at %URI.Base
+     */
+    var $base;
+    /**
+     * String host to consider "home" base
+     */
+    var $host;
+    /**
+     * Name of default scheme based on %URI.DefaultScheme and %URI.Base
+     */
+    var $defaultScheme;
+    function HTMLPurifier_URIDefinition() {
+        $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
+        $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
+        $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
+        $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
+    }
+    function registerFilter($filter) {
+        $this->registeredFilters[$filter->name] = $filter;
+    }
+    function addFilter($filter, $config) {
+        $filter->prepare($config);
+        $this->filters[$filter->name] = $filter;
+    }
+    function doSetup($config) {
+        $this->setupMemberVariables($config);
+        $this->setupFilters($config);
+    }
+    function setupFilters($config) {
+        foreach ($this->registeredFilters as $name => $filter) {
+            $conf = $config->get('URI', $name);
+            if ($conf !== false && $conf !== null) {
+                $this->addFilter($filter, $config);
+            }
+        }
+        unset($this->registeredFilters);
+    }
+    function setupMemberVariables($config) {
+        $this->host = $config->get('URI', 'Host');
+        $base_uri = $config->get('URI', 'Base');
+        if (!is_null($base_uri)) {
+            $parser = new HTMLPurifier_URIParser();
+            $this->base = $parser->parse($base_uri);
+            $this->defaultScheme = $this->base->scheme;
+            if (is_null($this->host)) $this->host = $this->base->host;
+        }
+        if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme');
+    }
+    function filter(&$uri, $config, &$context) {
+        foreach ($this->filters as $name => $x) {
+            $result = $this->filters[$name]->filter($uri, $config, $context);
+            if (!$result) return false;
+        }
+        return true;
+    }
diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php
new file mode 100644
index 00000000..e0066f3b
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter.php
@@ -0,0 +1,24 @@
+ * Chainable filters for custom URI processing 
+ */
+class HTMLPurifier_URIFilter
+    var $name;
+    /**
+     * Performs initialization for the filter
+     */
+    function prepare($config) {}
+    /**
+     * Filter a URI object
+     * @param &$uri Reference to URI object
+     * @param $config Instance of HTMLPurifier_Config
+     * @param &$context Instance of HTMLPurifier_Context
+     */
+    function filter(&$uri, $config, &$context) {
+        trigger_error('Cannot call abstract function', E_USER_ERROR);
+    }
diff --git a/library/HTMLPurifier/URIFilter/DisableExternal.php b/library/HTMLPurifier/URIFilter/DisableExternal.php
new file mode 100644
index 00000000..4e6dc187
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/DisableExternal.php
@@ -0,0 +1,34 @@
+require_once 'HTMLPurifier/URIFilter.php';
+    'URI', 'DisableExternal', false, 'bool',
+    'Disables links to external websites.  This is a highly effective '.
+    'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
+    'links or images outside of your domain will be allowed.  Non-linkified '.
+    'URIs will still be preserved.  If you want to be able to link to '.
+    'subdomains or use absolute URIs, specify %URI.Host for your website. '.
+    'This directive has been available since 1.2.0.'
+class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
+    var $name = 'DisableExternal';
+    var $ourHostParts = false;
+    function prepare($config) {
+        $our_host = $config->get('URI', 'Host');
+        if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
+    }
+    function filter(&$uri, $config, &$context) {
+        if (is_null($uri->host)) return true;
+        if ($this->ourHostParts === false) return false;
+        $host_parts = array_reverse(explode('.', $uri->host));
+        foreach ($this->ourHostParts as $i => $x) {
+            if (!isset($host_parts[$i])) return false;
+            if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
+        }
+        return true;
+    }
diff --git a/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/library/HTMLPurifier/URIFilter/DisableExternalResources.php
new file mode 100644
index 00000000..dc00e741
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/DisableExternalResources.php
@@ -0,0 +1,26 @@
+require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
+    'URI', 'DisableExternalResources', false, 'bool',
+    'Disables the embedding of external resources, preventing users from '.
+    'embedding things like images from other hosts. This prevents '.
+    'access tracking (good for email viewers), bandwidth leeching, '.
+    'cross-site request forging, goatse.cx posting, and '.
+    'other nasties, but also results in '.
+    'a loss of end-user functionality (they can\'t directly post a pic '.
+    'they posted from Flickr anymore). Use it if you don\'t have a '.
+    'robust user-content moderation team. This directive has been '.
+    'available since 1.3.0.'
+class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
+    var $name = 'DisableExternalResources';
+    function filter(&$uri, $config, &$context) {
+        if (!$context->get('EmbeddedURI', true)) return true;
+        return parent::filter($uri, $config, $context);
+    }
diff --git a/library/HTMLPurifier/URIFilter/HostBlacklist.php b/library/HTMLPurifier/URIFilter/HostBlacklist.php
new file mode 100644
index 00000000..d3429d5c
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/HostBlacklist.php
@@ -0,0 +1,28 @@
+require_once 'HTMLPurifier/URIFilter.php';
+    'URI', 'HostBlacklist', array(), 'list',
+    'List of strings that are forbidden in the host of any URI. Use it to '.
+    'kill domain names of spam, etc. Note that it will catch anything in '.
+    'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
+    'This directive has been available since 1.3.0.'
+class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
+    var $name = 'HostBlacklist';
+    var $blacklist = array();
+    function prepare($config) {
+        $this->blacklist = $config->get('URI', 'HostBlacklist');
+    }
+    function filter(&$uri, $config, &$context) {
+        foreach($this->blacklist as $blacklisted_host_fragment) {
+            if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
+                return false;
+            }
+        }
+        return true;
+    }
diff --git a/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
new file mode 100644
index 00000000..9935dc6e
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -0,0 +1,115 @@
+// does not support network paths
+require_once 'HTMLPurifier/URIFilter.php';
+    'URI', 'MakeAbsolute', false, 'bool', '
+    Converts all URIs into absolute forms. This is useful when the HTML
+    being filtered assumes a specific base path, but will actually be
+    viewed in a different context (and setting an alternate base URI is
+    not possible). %URI.Base must be set for this directive to work.
+    This directive has been available since 2.1.0.
+class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
+    var $name = 'MakeAbsolute';
+    var $base;
+    var $basePathStack = array();
+    function prepare($config) {
+        $def = $config->getDefinition('URI');
+        $this->base = $def->base;
+        if (is_null($this->base)) {
+            trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_ERROR);
+            return;
+        }
+        $this->base->fragment = null; // fragment is invalid for base URI
+        $stack = explode('/', $this->base->path);
+        array_pop($stack); // discard last segment
+        $stack = $this->_collapseStack($stack); // do pre-parsing
+        $this->basePathStack = $stack;
+    }
+    function filter(&$uri, $config, &$context) {
+        if (is_null($this->base)) return true; // abort early
+        if (
+            $uri->path === '' && is_null($uri->scheme) &&
+            is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
+        ) {
+            // reference to current document
+            $uri = $this->base->copy();
+            return true;
+        }
+        if (!is_null($uri->scheme)) {
+            // absolute URI already: don't change
+            if (!is_null($uri->host)) return true;
+            $scheme_obj = $uri->getSchemeObj($config, $context);
+            if (!$scheme_obj->hierarchical) {
+                // non-hierarchal URI with explicit scheme, don't change
+                return true;
+            }
+            // special case: had a scheme but always is hierarchical and had no authority
+        }
+        if (!is_null($uri->host)) {
+            // network path, don't bother
+            return true;
+        }
+        if ($uri->path === '') {
+            $uri->path = $this->base->path;
+        }elseif ($uri->path[0] !== '/') {
+            // relative path, needs more complicated processing
+            $stack = explode('/', $uri->path);
+            $new_stack = array_merge($this->basePathStack, $stack);
+            $new_stack = $this->_collapseStack($new_stack);
+            $uri->path = implode('/', $new_stack);
+        }
+        // re-combine
+        $uri->scheme = $this->base->scheme;
+        if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
+        if (is_null($uri->host))     $uri->host     = $this->base->host;
+        if (is_null($uri->port))     $uri->port     = $this->base->port;
+        return true;
+    }
+    /**
+     * Resolve dots and double-dots in a path stack
+     * @private
+     */
+    function _collapseStack($stack) {
+        $result = array();
+        for ($i = 0; isset($stack[$i]); $i++) {
+            $is_folder = false;
+            // absorb an internally duplicated slash
+            if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
+            if ($stack[$i] == '..') {
+                if (!empty($result)) {
+                    $segment = array_pop($result);
+                    if ($segment === '' && empty($result)) {
+                        // error case: attempted to back out too far:
+                        // restore the leading slash
+                        $result[] = '';
+                    } elseif ($segment === '..') {
+                        $result[] = '..'; // cannot remove .. with ..
+                    }
+                } else {
+                    // relative path, preserve the double-dots
+                    $result[] = '..';
+                }
+                $is_folder = true;
+                continue;
+            }
+            if ($stack[$i] == '.') {
+                // silently absorb
+                $is_folder = true;
+                continue;
+            }
+            $result[] = $stack[$i];
+        }
+        if ($is_folder) $result[] = '';
+        return $result;
+    }
diff --git a/library/HTMLPurifier/URIParser.php b/library/HTMLPurifier/URIParser.php
new file mode 100644
index 00000000..dff7e28e
--- /dev/null
+++ b/library/HTMLPurifier/URIParser.php
@@ -0,0 +1,62 @@
+require_once 'HTMLPurifier/URI.php';
+ * Parses a URI into the components and fragment identifier as specified
+ * by RFC 2396.
+ * @todo Replace regexps with a native PHP parser
+ */
+class HTMLPurifier_URIParser
+    /**
+     * Parses a URI
+     * @param $uri string URI to parse
+     * @return HTMLPurifier_URI representation of URI
+     */
+    function parse($uri) {
+        $r_URI = '!'.
+            '(([^:/?#<>\'"]+):)?'. // 2. Scheme
+            '(//([^/?#<>\'"]*))?'. // 4. Authority
+            '([^?#<>\'"]*)'.       // 5. Path
+            '(\?([^#<>\'"]*))?'.   // 7. Query
+            '(#([^<>\'"]*))?'.     // 8. Fragment
+            '!';
+        $matches = array();
+        $result = preg_match($r_URI, $uri, $matches);
+        if (!$result) return false; // *really* invalid URI
+        // seperate out parts
+        $scheme     = !empty($matches[1]) ? $matches[2] : null;
+        $authority  = !empty($matches[3]) ? $matches[4] : null;
+        $path       = $matches[5]; // always present, can be empty
+        $query      = !empty($matches[6]) ? $matches[7] : null;
+        $fragment   = !empty($matches[8]) ? $matches[9] : null;
+        // further parse authority
+        if ($authority !== null) {
+            // ridiculously inefficient: it's a stacked regex!
+            $HEXDIG = '[A-Fa-f0-9]';
+            $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
+            $sub_delims = '!$&\'()'; // needs []
+            $pct_encoded = "%$HEXDIG$HEXDIG";
+            $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
+            $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+            $matches = array();
+            preg_match($r_authority, $authority, $matches);
+            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
+            $host       = !empty($matches[3]) ? $matches[3] : '';
+            $port       = !empty($matches[4]) ? (int) $matches[5] : null;
+        } else {
+            $port = $host = $userinfo = null;
+        }
+        return new HTMLPurifier_URI(
+            $scheme, $userinfo, $host, $port, $path, $query, $fragment);
+    }
diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php
index 9be99752..41c02f70 100644
--- a/library/HTMLPurifier/URIScheme.php
+++ b/library/HTMLPurifier/URIScheme.php
@@ -19,24 +19,24 @@ class HTMLPurifier_URIScheme
     var $browsable = false;
+    /**
+     * Whether or not the URI always uses <hier_part>, resolves edge cases
+     * with making relative URIs absolute
+     */
+    var $hierarchical = false;
      * Validates the components of a URI
      * @note This implementation should be called by children if they define
      *       a default port, as it does port processing.
-     * @note Fragment is omitted as that is scheme independent
-     * @param $userinfo User info found before at sign in authority
-     * @param $host Hostname in authority
-     * @param $port Port found after colon in authority
-     * @param $path Path of URI
-     * @param $query Query of URI, found after question mark
+     * @param $uri Instance of HTMLPurifier_URI
      * @param $config HTMLPurifier_Config object
      * @param $context HTMLPurifier_Context object
+     * @return Bool success or failure
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        if ($this->default_port == $port) $port = null;
-        return array($userinfo, $host, $port, $path, $query);
+    function validate(&$uri, $config, &$context) {
+        if ($this->default_port == $uri->port) $uri->port = null;
+        return true;
diff --git a/library/HTMLPurifier/URIScheme/ftp.php b/library/HTMLPurifier/URIScheme/ftp.php
index 3dbb1446..5555ef33 100644
--- a/library/HTMLPurifier/URIScheme/ftp.php
+++ b/library/HTMLPurifier/URIScheme/ftp.php
@@ -9,35 +9,35 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
     var $default_port = 21;
     var $browsable = true; // usually
+    var $hierarchical = true;
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        list($userinfo, $host, $port, $path, $query) = 
-            parent::validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context );
-        $semicolon_pos = strrpos($path, ';'); // reverse
+    function validate(&$uri, $config, &$context) {
+        parent::validate($uri, $config, $context);
+        $uri->query    = null;
+        // typecode check
+        $semicolon_pos = strrpos($uri->path, ';'); // reverse
         if ($semicolon_pos !== false) {
-            // typecode check
-            $type = substr($path, $semicolon_pos + 1); // no semicolon
-            $path = substr($path, 0, $semicolon_pos);
+            $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
+            $uri->path = substr($uri->path, 0, $semicolon_pos);
             $type_ret = '';
             if (strpos($type, '=') !== false) {
                 // figure out whether or not the declaration is correct
                 list($key, $typecode) = explode('=', $type, 2);
                 if ($key !== 'type') {
                     // invalid key, tack it back on encoded
-                    $path .= '%3B' . $type;
+                    $uri->path .= '%3B' . $type;
                 } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
                     $type_ret = ";type=$typecode";
             } else {
-                $path .= '%3B' . $type;
+                $uri->path .= '%3B' . $type;
-            $path = str_replace(';', '%3B', $path);
-            $path .= $type_ret;
+            $uri->path = str_replace(';', '%3B', $uri->path);
+            $uri->path .= $type_ret;
-        return array($userinfo, $host, $port, $path, null);
+        return true;
diff --git a/library/HTMLPurifier/URIScheme/http.php b/library/HTMLPurifier/URIScheme/http.php
index 18a1cf87..7abc6680 100644
--- a/library/HTMLPurifier/URIScheme/http.php
+++ b/library/HTMLPurifier/URIScheme/http.php
@@ -9,14 +9,12 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
     var $default_port = 80;
     var $browsable = true;
+    var $hierarchical = true;
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        list($userinfo, $host, $port, $path, $query) = 
-            parent::validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context );
-        return array(null, $host, $port, $path, $query);
+    function validate(&$uri, $config, &$context) {
+        parent::validate($uri, $config, $context);
+        $uri->userinfo = null;
+        return true;
diff --git a/library/HTMLPurifier/URIScheme/mailto.php b/library/HTMLPurifier/URIScheme/mailto.php
index 8e552f5c..f6acc6af 100644
--- a/library/HTMLPurifier/URIScheme/mailto.php
+++ b/library/HTMLPurifier/URIScheme/mailto.php
@@ -15,14 +15,13 @@ class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
     var $browsable = false;
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        list($userinfo, $host, $port, $path, $query) = 
-            parent::validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context );
+    function validate(&$uri, $config, &$context) {
+        parent::validate($uri, $config, $context);
+        $uri->userinfo = null;
+        $uri->host     = null;
+        $uri->port     = null;
         // we need to validate path against RFC 2368's addr-spec
-        return array(null, null, null, $path, $query);
+        return true;
diff --git a/library/HTMLPurifier/URIScheme/news.php b/library/HTMLPurifier/URIScheme/news.php
index 7b81834f..87bda63c 100644
--- a/library/HTMLPurifier/URIScheme/news.php
+++ b/library/HTMLPurifier/URIScheme/news.php
@@ -9,14 +9,14 @@ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
     var $browsable = false;
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        list($userinfo, $host, $port, $path, $query) = 
-            parent::validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context );
+    function validate(&$uri, $config, &$context) {
+        parent::validate($uri, $config, $context);
+        $uri->userinfo = null;
+        $uri->host     = null;
+        $uri->port     = null;
+        $uri->query    = null;
         // typecode check needed on path
-        return array(null, null, null, $path, null);
+        return true;
diff --git a/library/HTMLPurifier/URIScheme/nntp.php b/library/HTMLPurifier/URIScheme/nntp.php
index 8f513419..caa85b26 100644
--- a/library/HTMLPurifier/URIScheme/nntp.php
+++ b/library/HTMLPurifier/URIScheme/nntp.php
@@ -10,13 +10,11 @@ class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
     var $default_port = 119;
     var $browsable = false;
-    function validateComponents(
-        $userinfo, $host, $port, $path, $query, $config, &$context
-    ) {
-        list($userinfo, $host, $port, $path, $query) = 
-            parent::validateComponents(
-                $userinfo, $host, $port, $path, $query, $config, $context );
-        return array(null, $host, $port, $path, null);
+    function validate(&$uri, $config, &$context) {
+        parent::validate($uri, $config, $context);
+        $uri->userinfo = null;
+        $uri->query    = null;
+        return true;
diff --git a/library/HTMLPurifier/URISchemeRegistry.php b/library/HTMLPurifier/URISchemeRegistry.php
index 5d8c462c..7716042d 100644
--- a/library/HTMLPurifier/URISchemeRegistry.php
+++ b/library/HTMLPurifier/URISchemeRegistry.php
@@ -79,12 +79,14 @@ class HTMLPurifier_URISchemeRegistry
         if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
-        if (empty($this->_dir)) $this->_dir = dirname(__FILE__) . '/URIScheme/';
+        if (empty($this->_dir)) $this->_dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/URIScheme/';
         if (!isset($allowed_schemes[$scheme])) return $null;
-        @include_once $this->_dir . $scheme . '.php';
+        // this bit of reflection is not very efficient, and a bit
+        // hacky too
         $class = 'HTMLPurifier_URIScheme_' . $scheme;
+        if (!class_exists($class)) include_once $this->_dir . $scheme . '.php';
         if (!class_exists($class)) return $null;
         $this->schemes[$scheme] = new $class();
         return $this->schemes[$scheme];
diff --git a/maintenance/common.php b/maintenance/common.php
new file mode 100644
index 00000000..d5437b77
--- /dev/null
+++ b/maintenance/common.php
@@ -0,0 +1,9 @@
+function assertCli() {
+    if (php_sapi_name() != 'cli' && !getenv('PHP_IS_CLI')) {
+        echo 'Script cannot be called from web-browser (if you are calling via cli,
+set environment variable PHP_IS_CLI to work around this).';
+        exit;
+    }
diff --git a/maintenance/flush-definition-cache.php b/maintenance/flush-definition-cache.php
new file mode 100755
index 00000000..6d51ab06
--- /dev/null
+++ b/maintenance/flush-definition-cache.php
@@ -0,0 +1,36 @@
+require_once 'common.php';
+ * Flushes the default HTMLDefinition serial cache
+ * @param Accepts one argument, cache type to flush; otherwise flushes all
+ *      the caches.
+ */
+echo "Flushing cache... \n";
+require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php');
+$config = HTMLPurifier_Config::createDefault();
+$names = array('HTML', 'CSS', 'URI', 'Test');
+if (isset($argv[1])) {
+    if (in_array($argv[1], $names)) {
+        $names = array($argv[1]);
+    } else {
+        echo "Did not recognized cache parameter {$argv[1]} as valid cache, aborting.\n";
+        exit;
+    }
+foreach ($names as $name) {
+    echo " - Flushing $name\n";
+    $cache = new HTMLPurifier_DefinitionCache_Serializer($name);
+    $cache->flush($config);
+echo 'Cache flushed successfully.';
diff --git a/maintenance/flush-htmldefinition-cache.php b/maintenance/flush-htmldefinition-cache.php
deleted file mode 100644
index c6d31bfb..00000000
--- a/maintenance/flush-htmldefinition-cache.php
+++ /dev/null
@@ -1,23 +0,0 @@
- * Flushes the default HTMLDefinition serial cache
- */
-if (php_sapi_name() != 'cli') {
-    echo 'Script cannot be called from web-browser.';
-    exit;
-echo 'Flushing cache... ';
-require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php');
-$config = HTMLPurifier_Config::createDefault();
-$cache = new HTMLPurifier_DefinitionCache_Serializer('HTML');
-echo 'Cache flushed successfully.';
diff --git a/maintenance/generate-entity-file.php b/maintenance/generate-entity-file.php
old mode 100644
new mode 100755
index 062fed1c..01aca19a
--- a/maintenance/generate-entity-file.php
+++ b/maintenance/generate-entity-file.php
@@ -1,16 +1,14 @@
+require_once 'common.php';
  * Parses *.ent files into an entity lookup table, and then serializes and
  * writes the whole kaboodle to a file. The resulting file should be versioned.
-if (php_sapi_name() != 'cli') {
-    echo 'Script cannot be called from web-browser.';
-    exit;
 chdir( dirname(__FILE__) );
 // here's where the entity files are located, assuming working directory
diff --git a/maintenance/merge-library.php b/maintenance/merge-library.php
new file mode 100755
index 00000000..46c3c891
--- /dev/null
+++ b/maintenance/merge-library.php
@@ -0,0 +1,207 @@
+require_once 'common.php';
+ * Compiles all of HTML Purifier's library files into one big file
+ * named HTMLPurifier.standalone.php. Operates recursively, and will
+ * barf if there are conditional includes.
+ * 
+ * Details: also creates blank "include" files in the test/blank directory
+ * in order to simulate require_once's inside the test files.
+ */
+ * Global array that tracks already loaded includes
+ */
+$GLOBALS['loaded'] = array('HTMLPurifier.php' => true);
+ * @param $text Text to replace includes from
+ */
+function replace_includes($text) {
+    return preg_replace_callback(
+        "/require_once ['\"]([^'\"]+)['\"];/",
+        'replace_includes_callback',
+        $text
+    );
+ * Removes leading PHP tags from included files. Assumes that there is
+ * no trailing tag.
+ */
+function remove_php_tags($text) {
+    return substr($text, 5);
+ * Creates an appropriate blank file, recursively generating directories
+ * if necessary
+ */
+function create_blank($file) {
+    $dir = dirname($file);
+    $base = realpath('../tests/blanks/') . DIRECTORY_SEPARATOR ;
+    if ($dir != '.') mkdir_deep($base . $dir);
+    file_put_contents($base . $file, '');
+ * Recursively creates a directory
+ * @note Adapted from the PHP manual comment 76612
+ */
+function mkdir_deep($folder) {
+    $folders = preg_split("#[\\\\/]#", $folder);
+    $base = '';
+    for($i = 0, $c = count($folders); $i < $c; $i++) {
+        if(empty($folders[$i])) {
+            if (!$i) {
+                // special case for root level
+                $base .= DIRECTORY_SEPARATOR;
+            }
+            continue;
+        }
+        $base .= $folders[$i];
+        if(!is_dir($base)){
+            mkdir($base);
+        }
+        $base .= DIRECTORY_SEPARATOR;
+    }
+ * Copy a file, or recursively copy a folder and its contents
+ *
+ * @author      Aidan Lister <aidan@php.net>
+ * @version     1.0.1
+ * @link        http://aidanlister.com/repos/v/function.copyr.php
+ * @param       string   $source    Source path
+ * @param       string   $dest      Destination path
+ * @return      bool     Returns TRUE on success, FALSE on failure
+ */
+function copyr($source, $dest) {
+    // Simple copy for a file
+    if (is_file($source)) {
+        return copy($source, $dest);
+    }
+    // Make destination directory
+    if (!is_dir($dest)) {
+        mkdir($dest);
+    }
+    // Loop through the folder
+    $dir = dir($source);
+    while (false !== $entry = $dir->read()) {
+        // Skip pointers
+        if ($entry == '.' || $entry == '..') {
+            continue;
+        }
+        // Skip hidden files
+        if ($entry[0] == '.') {
+            continue;
+        }
+        // Deep copy directories
+        if ($dest !== "$source/$entry") {
+            copyr("$source/$entry", "$dest/$entry");
+        }
+    }
+    // Clean up
+    $dir->close();
+    return true;
+ * Delete a file, or a folder and its contents
+ *
+ * @author      Aidan Lister <aidan@php.net>
+ * @version     1.0.3
+ * @link        http://aidanlister.com/repos/v/function.rmdirr.php
+ * @param       string   $dirname    Directory to delete
+ * @return      bool     Returns TRUE on success, FALSE on failure
+ */
+function rmdirr($dirname)
+    // Sanity check
+    if (!file_exists($dirname)) {
+        return false;
+    }
+    // Simple delete for a file
+    if (is_file($dirname) || is_link($dirname)) {
+        return unlink($dirname);
+    }
+    // Loop through the folder
+    $dir = dir($dirname);
+    while (false !== $entry = $dir->read()) {
+        // Skip pointers
+        if ($entry == '.' || $entry == '..') {
+            continue;
+        }
+        // Recurse
+        rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
+    }
+    // Clean up
+    $dir->close();
+    return rmdir($dirname);
+ * Copies the contents of a directory to the standalone directory
+ */
+function make_dir_standalone($dir) {
+    return copyr($dir, 'standalone/' . $dir);
+function make_file_standalone($file) {
+    mkdir_deep('standalone/' . dirname($file));
+    return copy($file, 'standalone/' . $file);
+ * @param $matches preg_replace_callback matches array, where index 1
+ *        is the filename to include
+ */
+function replace_includes_callback($matches) {
+    $file = $matches[1];
+    // PHP 5 only file
+    if ($file == 'HTMLPurifier/Lexer/DOMLex.php') {
+        return $matches[0];
+    }
+    if (isset($GLOBALS['loaded'][$file])) return '';
+    $GLOBALS['loaded'][$file] = true;
+    create_blank($file);
+    return replace_includes(remove_php_tags(file_get_contents($file)));
+chdir(dirname(__FILE__) . '/../library/');
+echo 'Creating full file...';
+$contents = replace_includes(file_get_contents('HTMLPurifier.php'));
+$contents = str_replace(
+    "define('HTMLPURIFIER_PREFIX', dirname(__FILE__));",
+    "define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
+set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());",
+    $contents
+file_put_contents('HTMLPurifier.standalone.php', $contents);
+echo ' done!' . PHP_EOL;
+echo 'Creating standalone directory...';
+rmdirr('standalone'); // ensure a clean copy
+// PHP 5 only file
+echo ' done!' . PHP_EOL;
diff --git a/plugins/phorum/config.default.php b/plugins/phorum/config.default.php
new file mode 100644
index 00000000..2f9031cc
--- /dev/null
+++ b/plugins/phorum/config.default.php
@@ -0,0 +1,56 @@
+if(!defined("PHORUM")) exit;
+// default HTML Purifier configuration settings
+$config->set('HTML', 'Allowed',
+  // alphabetically sorted
+$config->set('AutoFormat', 'AutoParagraph', true);
+$config->set('AutoFormat', 'Linkify', true);
+$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
+$config->set('Core', 'AggressivelyFixLt', true);
+$config->set('Core', 'Encoding', $GLOBALS['PHORUM']['DATA']['CHARSET']); // we'll change this eventually
+if (strtolower($GLOBALS['PHORUM']['DATA']['CHARSET']) !== 'utf-8') {
+  $config->set('Core', 'EscapeNonASCIICharacters', true);
diff --git a/plugins/phorum/htmlpurifier.php b/plugins/phorum/htmlpurifier.php
new file mode 100644
index 00000000..4654c65d
--- /dev/null
+++ b/plugins/phorum/htmlpurifier.php
@@ -0,0 +1,272 @@
+ * HTML Purifier Phorum Mod. Filter your HTML the Standards-Compliant Way!
+ * 
+ * This Phorum mod enables users to post raw HTML into Phorum.  But never
+ * fear: with the help of HTML Purifier, this HTML will be beat into
+ * de-XSSed and standards-compliant form, safe for general consumption.
+ * It is not recommended, but possible to run this mod in parallel
+ * with other formatters (in short, please DISABLE the BBcode mod).
+ * 
+ * For help migrating from your previous markup language to pure HTML
+ * please check the migrate.bbcode.php file.
+ * 
+ * If you'd like to use this with a WYSIWYG editor, make sure that
+ * editor sets $PHORUM['mod_htmlpurifier']['wysiwyg'] to true. Otherwise,
+ * administrators who need to edit other people's comments may be at
+ * risk for some nasty attacks.
+ * 
+ * Tested with Phorum 5.1.22. This module will almost definitely need
+ * to be upgraded when Phorum 6 rolls around.
+ */
+// Note: Cache data is base64 encoded because Phorum insists on flinging
+// to the user and expecting it to come back unharmed, newlines and
+// all, which ain't happening. It's slower, it takes up more space, but
+// at least it won't get mutilated
+ * Purifies a data array
+ */
+function phorum_htmlpurifier_format($data)
+    $purifier =& HTMLPurifier::getInstance();
+    $cache_serial = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
+    foreach($data as $message_id => $message){
+        if(isset($message['body'])) {
+            if ($message_id) {
+                // we're dealing with a real message, not a fake, so
+                // there a number of shortcuts that can be taken
+                if (isset($message['meta']['htmlpurifier_light'])) {
+                    // format hook was called outside of Phorum's normal
+                    // functions, do the abridged purification
+                    $data[$message_id]['body'] = $purifier->purify($message['body']);
+                    continue;
+                }
+                if (!empty($PHORUM['args']['purge'])) {
+                    // purge the cache, must be below the following if
+                    unset($message['meta']['body_cache']);
+                }
+                if (
+                    isset($message['meta']['body_cache']) &&
+                    isset($message['meta']['body_cache_serial']) &&
+                    $message['meta']['body_cache_serial'] == $cache_serial
+                ) {
+                    // cached version is present, bail out early
+                    $data[$message_id]['body'] = base64_decode($message['meta']['body_cache']);
+                    continue;
+                }
+            }
+            // migration might edit this array, that's why it's defined
+            // so early
+            $updated_message = array();
+            // create the $body variable
+            if (
+                $message_id && // message must be real to migrate
+                !isset($message['meta']['body_cache_serial'])
+            ) {
+                // perform migration
+                $fake_data = array();
+                list($signature, $edit_message) = phorum_htmlpurifier_remove_sig_and_editmessage($message);
+                $fake_data[$message_id] = $message;
+                $fake_data = phorum_htmlpurifier_migrate($fake_data);
+                $body = $fake_data[$message_id]['body'];
+                $body = str_replace("<phorum break>", '', $body);
+                $updated_message['body'] = $body; // save it in
+                $body .= $signature . $edit_message; // add it back in
+            } else {
+                // reverse Phorum's pre-processing
+                $body = $message['body'];
+                // order is important
+                $body = str_replace("<phorum break>\n", "\n", $body);
+                $body = str_replace(array('&lt;','&gt;','&amp;'), array('<','>','&'), $body);
+                if (!$message_id && defined('PHORUM_CONTROL_CENTER')) {
+                    // we're in control.php, so it was double-escaped
+                    $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
+                }
+            }
+            $body = $purifier->purify($body);
+            // dynamically update the cache (MUST BE DONE HERE!)
+            // this is inefficient because it's one db call per
+            // cache miss, but once the cache is in place things are
+            // a lot zippier.
+            if ($message_id) { // make sure it's not a fake id
+                $updated_message['meta'] = $message['meta'];
+                $updated_message['meta']['body_cache'] = base64_encode($body);
+                $updated_message['meta']['body_cache_serial'] = $cache_serial;
+                phorum_db_update_message($message_id, $updated_message);
+            }
+            // must not get overloaded until after we cache it, otherwise
+            // we'll inadvertently change the original text
+            $data[$message_id]['body'] = $body;
+        }
+    }
+    return $data;
+// -----------------------------------------------------------------------
+// This is fragile code, copied from read.php:359. It will break if
+// that is changed
+ * Generates a signature based on a message array
+ */
+function phorum_htmlpurifier_generate_sig($row) {
+    $phorum_sig = '';
+    if(isset($row["user"]["signature"])
+       && isset($row['meta']['show_signature']) && $row['meta']['show_signature']==1){
+           $phorum_sig=trim($row["user"]["signature"]);
+           if(!empty($phorum_sig)){
+               $phorum_sig="\n\n$phorum_sig";
+           }
+    }
+    return $phorum_sig;
+ * Generates an edit message based on a message array
+ */
+function phorum_htmlpurifier_generate_editmessage($row) {
+    $editmessage = '';
+    if(isset($row['meta']['edit_count']) && $row['meta']['edit_count'] > 0) {
+        $editmessage = str_replace ("%count%", $row['meta']['edit_count'], $PHORUM["DATA"]["LANG"]["EditedMessage"]);
+        $editmessage = str_replace ("%lastedit%", phorum_date($PHORUM["short_date"],$row['meta']['edit_date']),  $editmessage);
+        $editmessage = str_replace ("%lastuser%", $row['meta']['edit_username'],  $editmessage);
+        $editmessage="\n\n\n\n$editmessage";
+    }
+    return $editmessage;
+// End fragile code
+// -----------------------------------------------------------------------
+ * Removes the signature and edit message from a message
+ * @param $row Message passed by reference
+ */
+function phorum_htmlpurifier_remove_sig_and_editmessage(&$row) {
+    // attempt to remove the Phorum's pre-processing:
+    // we must not process the signature or editmessage
+    $signature = phorum_htmlpurifier_generate_sig($row);
+    $editmessage = phorum_htmlpurifier_generate_editmessage($row);
+    $row['body'] = strtr($row['body'], array($signature => '', $editmessage => ''));
+    return array($signature, $editmessage);
+ * Indicate that data is fully HTML and not from migration, invalidate
+ * previous caches
+ * @note This function used to generate the actual cache entries, but
+ * since there's data missing that must be deferred to the first read
+ */
+function phorum_htmlpurifier_posting($message) {
+    unset($message['meta']['body_cache']); // invalidate the cache
+    $message['meta']['body_cache_serial'] = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
+    return $message;
+ * Overload quoting mechanism to prevent default, mail-style quote from happening
+ */
+function phorum_htmlpurifier_quote($array) {
+    $purifier =& HTMLPurifier::getInstance();
+    $text = $purifier->purify($array[1]);
+    return "<blockquote cite=\"$array[0]\">\n$text\n</blockquote>";
+ * Ensure that our format hook is processed last. Also, loads the library.
+ * @credits <http://secretsauce.phorum.org/snippets/make_bbcode_last_formatter.php.txt>
+ */
+function phorum_htmlpurifier_common() {
+    require_once(dirname(__FILE__).'/htmlpurifier/HTMLPurifier.auto.php');
+    require(dirname(__FILE__).'/init-config.php');
+    $config = phorum_htmlpurifier_get_config();
+    HTMLPurifier::getInstance($config);
+    // increment revision.txt if you want to invalidate the cache
+    $GLOBALS['PHORUM']['mod_htmlpurifier']['body_cache_serial'] = $config->getSerial();
+    // load migration
+    if (file_exists(dirname(__FILE__) . '/migrate.php')) {
+        include(dirname(__FILE__) . '/migrate.php');
+    } else {
+        echo '<strong>Error:</strong> No migration path specified for HTML Purifier, please check
+        <tt>modes/htmlpurifier/migrate.bbcode.php</tt> for instructions on
+        how to migrate from your previous markup language.';
+        exit;
+    }
+    // see if our hooks need to be bubbled to the end
+    phorum_htmlpurifier_bubble_hook('format');
+function phorum_htmlpurifier_bubble_hook($hook) {
+    global $PHORUM;
+    $our_idx = null;
+    $last_idx = null;
+    if (!isset($PHORUM['hooks'][$hook]['mods'])) return;
+    foreach ($PHORUM['hooks'][$hook]['mods'] as $idx => $mod) {
+        if ($mod == 'htmlpurifier') $our_idx = $idx;
+        $last_idx = $idx;
+    }
+    list($mod) = array_splice($PHORUM['hooks'][$hook]['mods'], $our_idx, 1);
+    $PHORUM['hooks'][$hook]['mods'][] = $mod;
+    list($func) = array_splice($PHORUM['hooks'][$hook]['funcs'], $our_idx, 1);
+    $PHORUM['hooks'][$hook]['funcs'][] = $func;
+ * Pre-emptively performs purification if it looks like a WYSIWYG editor
+ * is being used
+ */
+function phorum_htmlpurifier_before_editor($message) {
+    if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
+        if (!empty($message['body'])) {
+            $body = $message['body'];
+            // de-entity-ize contents
+            $body = str_replace(array('&lt;','&gt;','&amp;'), array('<','>','&'), $body);
+            $purifier =& HTMLPurifier::getInstance();
+            $body = $purifier->purify($message['body']);
+            // re-entity-ize contents
+            $body = htmlspecialchars($body, ENT_QUOTES, $GLOBALS['PHORUM']['DATA']['CHARSET']);
+        }
+    }
+    return $message;
+function phorum_htmlpurifier_editor_after_subject() {
+    // don't show this message if it's a WYSIWYG editor, since it will
+    // then be handled automatically
+    if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
+    ?><tr><td colspan="2" style="padding:1em 0.3em;">
+  HTML input is <strong>on</strong>. Make sure you escape all HTML and
+  angled-brackets with &amp;lt; and &amp;gt; (you can also use CDATA
+  tags, simply wrap the suspect text with
+&lt;![CDATA[<em>text</em>]]&gt;. Paragraphs will only be applied to 
+double-spaces; single-spaces will not generate <tt>&lt;br&gt;</tt> tags.
+    </td></tr><?php
diff --git a/plugins/phorum/htmlpurifier/LICENSE b/plugins/phorum/htmlpurifier/LICENSE
new file mode 100644
index 00000000..5ab7695a
--- /dev/null
+++ b/plugins/phorum/htmlpurifier/LICENSE
@@ -0,0 +1,504 @@
+		       Version 2.1, February 1999
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+			    Preamble
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+    a) The modified work must itself be a software library.
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+           How to Apply These Terms to Your New Libraries
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    Lesser General Public License for more details.
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+Also add information on how to contact you by electronic and paper mail.
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+That's all there is to it!
diff --git a/plugins/phorum/htmlpurifier/README b/plugins/phorum/htmlpurifier/README
new file mode 100644
index 00000000..65334fb3
--- /dev/null
+++ b/plugins/phorum/htmlpurifier/README
@@ -0,0 +1 @@
+The contents of the library/ folder should be here.
diff --git a/plugins/phorum/info.txt b/plugins/phorum/info.txt
new file mode 100644
index 00000000..ed2f4ae5
--- /dev/null
+++ b/plugins/phorum/info.txt
@@ -0,0 +1,8 @@
+hook:  format|phorum_htmlpurifier_format
+hook:  quote|phorum_htmlpurifier_quote
+hook:  posting_custom_action|phorum_htmlpurifier_posting
+hook:  common|phorum_htmlpurifier_common
+hook:  before_editor|phorum_htmlpurifier_before_editor
+hook:  tpl_editor_after_subject|phorum_htmlpurifier_editor_after_subject
+title: HTML Purifier Phorum Mod
+desc:  This module enables standards-compliant HTML filtering on Phorum. Please check migrate.bbcode.php before enabling this mod.
\ No newline at end of file
diff --git a/plugins/phorum/init-config.php b/plugins/phorum/init-config.php
new file mode 100644
index 00000000..c279f67d
--- /dev/null
+++ b/plugins/phorum/init-config.php
@@ -0,0 +1,27 @@
+ * Initializes the appropriate configuration from either a PHP file
+ * or a module configuration value
+ * @return Instance of HTMLPurifier_Config
+ */
+function phorum_htmlpurifier_get_config() {
+    global $PHORUM;
+    $config_exists = phorum_htmlpurifier_config_file_exists();
+    if ($config_exists || !isset($PHORUM['mod_htmlpurifier']['config'])) {
+        $config = HTMLPurifier_Config::createDefault();
+        include(dirname(__FILE__) . '/config.default.php');
+        if ($config_exists) {
+            include(dirname(__FILE__) . '/config.php');
+        }
+        unset($PHORUM['mod_htmlpurifier']['config']); // unnecessary
+    } else {
+        $config = HTMLPurifier_Config::create($PHORUM['mod_htmlpurifier']['config']);
+    }
+    return $config;
+function phorum_htmlpurifier_config_file_exists() {
+    return file_exists(dirname(__FILE__) . '/config.php');
diff --git a/plugins/phorum/install.txt b/plugins/phorum/install.txt
new file mode 100644
index 00000000..d1848938
--- /dev/null
+++ b/plugins/phorum/install.txt
@@ -0,0 +1,33 @@
+HTML Purifier Phorum Mod - Filter your HTML the Standards-Compliant Way!
+This Phorum mod enables HTML posting on Phorum.  Under normal circumstances,
+this would cause a huge security risk, but because we are running
+HTML through HTML Purifier, output is guaranteed to be XSS free and
+This mod requires HTML input, and previous markup languages need to be
+converted accordingly.  Thus, it is vital that you create a 'migrate.php'
+file that works with your installation. If you're using the built-in
+BBCode formatting, simply move migrate.bbcode.php to that place; for
+other markup languages, consult said file for instructions on how
+to adapt it to your needs.
+This module will not work if 'migrate.php' is not created, and an improperly
+made migration file may *CORRUPT* Phorum, so please take your time to
+do this correctly. It should go without saying to *BACKUP YOUR DATABASE*
+before attempting anything here.
+This module will not automatically migrate user signatures, because this
+process may take a long time. After installing the HTML Purifier module and
+then configuring 'migrate.php', navigate to Settings and click 'Migrate
+Signatures' to migrate all user signatures.
+The version of HTML Purifier bundled with is a custom modified 2.0.1.
+Do not attempt to replace it with a version equal to or less than
+downloaded from the HTML Purifier website: the module will combust
+spectacularly. (Greater versions, however, are okay, because the changes
+made to accomodate this module have been committed to the trunk).
+Visit HTML Purifier at <http://htmlpurifier.org/>. May the force
+be with you.
diff --git a/plugins/phorum/migrate.bbcode.php b/plugins/phorum/migrate.bbcode.php
new file mode 100644
index 00000000..58316b07
--- /dev/null
+++ b/plugins/phorum/migrate.bbcode.php
@@ -0,0 +1,28 @@
+ * This file is responsible for migrating from a specific markup language
+ * 
+ * Copy this file to 'migrate.php' and it will automatically work for
+ * BBCode; you may need to tweak this a little to get it to work for other
+ * languages (usually, just replace the include name and the function name).
+ * 
+ * If you do NOT want to have any migration performed (for instance, you
+ * are installing the module on a new forum with no posts), simply remove
+ * phorum_htmlpurifier_migrate() function. You still need migrate.php
+ * present, otherwise the module won't work.
+ */
+if(!defined("PHORUM")) exit;
+require_once(dirname(__FILE__) . "/../bbcode/bbcode.php");
+ * 'format' hook style function that will be called to convert
+ * legacy markup into HTML.
+ */
+function phorum_htmlpurifier_migrate($data) {
+    return phorum_bb_code($data); // bbcode's 'format' hook
diff --git a/plugins/phorum/settings.php b/plugins/phorum/settings.php
new file mode 100644
index 00000000..4754d8b0
--- /dev/null
+++ b/plugins/phorum/settings.php
@@ -0,0 +1,63 @@
+// based off of BBCode's settings file
+ * HTML Purifier Phorum mod settings configuration. This provides
+ * a convenient web-interface for editing the most common HTML Purifier
+ * configuration directives. You can also specify custom configuration
+ * by creating a 'config.php' file.
+ */
+if(!defined("PHORUM_ADMIN")) exit;
+// error reporting is good!
+error_reporting(E_ALL ^ E_NOTICE);
+// load library and other paraphenalia
+require_once './include/admin/PhorumInputForm.php';
+require_once (dirname(__FILE__) . '/htmlpurifier/HTMLPurifier.auto.php');
+require_once (dirname(__FILE__) . '/init-config.php');
+require_once (dirname(__FILE__) . '/settings/migrate-sigs-form.php');
+require_once (dirname(__FILE__) . '/settings/migrate-sigs.php');
+require_once (dirname(__FILE__) . '/settings/form.php');
+require_once (dirname(__FILE__) . '/settings/save.php');
+// define friendly configuration directives. you can expand this array
+// to get more web-definable directives
+$PHORUM['mod_htmlpurifier']['directives'] = array(
+    'URI.Host', // auto-detectable
+    'URI.DisableExternal',
+    'URI.DisableExternalResources',
+    'URI.DisableResources',
+    'URI.Munge',
+    'URI.HostBlacklist',
+    'URI.Disable',
+    'HTML.TidyLevel',
+    'HTML.Doctype', // auto-detectable
+    'HTML.Allowed',
+    'AutoFormat',
+    '-AutoFormat.Custom',
+    '-AutoFormat.PurifierLinkify',
+    'Output.TidyFormat',
+// lower this setting if you're getting time outs/out of memory
+$PHORUM['mod_htmlpurifier']['migrate-sigs-increment'] = 100;
+if (isset($_POST['reset'])) {
+    unset($PHORUM['mod_htmlpurifier']['config']);
+if ($offset = phorum_htmlpurifier_migrate_sigs_check()) {
+    // migrate signatures
+    phorum_htmlpurifier_migrate_sigs($offset);
+} elseif(!empty($_POST)){
+    // save settings
+    phorum_htmlpurifier_save_settings();
+echo '<br />';
diff --git a/plugins/phorum/settings/form.php b/plugins/phorum/settings/form.php
new file mode 100644
index 00000000..b957b8d2
--- /dev/null
+++ b/plugins/phorum/settings/form.php
@@ -0,0 +1,79 @@
+function phorum_htmlpurifier_show_form() {
+    if (phorum_htmlpurifier_config_file_exists()) {
+        phorum_htmlpurifier_show_config_info();
+        return;
+    }
+    global $PHORUM;
+    $config = phorum_htmlpurifier_get_config();
+    $frm = new PhorumInputForm ("", "post", "Save");
+    $frm->hidden("module", "modsettings");
+    $frm->hidden("mod", "htmlpurifier"); // this is the directory name that the Settings file lives in
+    if (!empty($error)){
+        echo "$error<br />";
+    }
+    $frm->addbreak("Edit settings for the HTML Purifier module");
+    $frm->addMessage('<p>Click on directive links to read what each option does
+    (links do not open in new windows).</p>
+    <p>For more flexibility (for instance, you want to edit the full
+    range of configuration directives), you can create a <tt>config.php</tt>
+    file in your <tt>mods/htmlpurifier/</tt> directory. Doing so will,
+    however, make the web configuration interface unavailable.</p>');
+    require_once 'HTMLPurifier/Printer/ConfigForm.php';
+    $htmlpurifier_form = new HTMLPurifier_Printer_ConfigForm('config', 'http://htmlpurifier.org/live/configdoc/plain.html#%s');
+    $htmlpurifier_form->setTextareaDimensions(23, 7); // widen a little, since we have space
+    $frm->addMessage($htmlpurifier_form->render(
+        $config, $PHORUM['mod_htmlpurifier']['directives'], false));
+    $frm->addMessage("<strong>Warning: Changing HTML Purifier's configuration will invalidate
+      the cache. Expect to see a flurry of database activity after you change
+      any of these settings.</strong>");
+    $frm->addrow('Reset to defaults:', $frm->checkbox("reset", "1", "", false));
+    // hack to include extra styling
+    echo '<style type="text/css">' . $htmlpurifier_form->getCSS() . '
+    .hp-config {margin-left:auto;margin-right:auto;}
+    </style>';
+    $js = $htmlpurifier_form->getJavaScript();
+    echo '<script type="text/javascript">'."<!--\n$js\n//-->".'</script>';
+    $frm->show();
+function phorum_htmlpurifier_show_config_info() {
+    global $PHORUM;
+    // update mod_htmlpurifier for housekeeping
+    phorum_htmlpurifier_commit_settings();
+    // politely tell user how to edit settings manually
+        <div class="input-form-td-break">How to edit settings for HTML Purifier module</div>
+        <p>
+          A <tt>config.php</tt> file exists in your <tt>mods/htmlpurifier/</tt>
+          directory. This file contains your custom configuration: in order to
+          change it, please navigate to that file and edit it accordingly.
+        </p>
+        <p>
+          To use the web interface, delete <tt>config.php</tt> (or rename it to
+          <tt>config.php.bak</tt>).
+        </p>
+        <p>
+          <strong>Warning: Changing HTML Purifier's configuration will invalidate
+          the cache. Expect to see a flurry of database activity after you change
+          any of these settings.</strong>
+        </p>
diff --git a/plugins/phorum/settings/migrate-sigs-form.php b/plugins/phorum/settings/migrate-sigs-form.php
new file mode 100644
index 00000000..ad4877b5
--- /dev/null
+++ b/plugins/phorum/settings/migrate-sigs-form.php
@@ -0,0 +1,21 @@
+function phorum_htmlpurifier_show_migrate_sigs_form() {
+    $frm = new PhorumInputForm ('', "post", "Migrate");
+    $frm->hidden("module", "modsettings");
+    $frm->hidden("mod", "htmlpurifier");
+    $frm->hidden("migrate-sigs", "1");
+    $frm->addbreak("Migrate user signatures to HTML");
+    $frm->addMessage('This operation will migrate your users signatures
+        to HTML. <strong>This process is irreversible and must only be performed once.</strong>
+        Type in yes in the confirmation field to migrate.');
+    if (!file_exists(dirname(__FILE__) . '/../migrate.php')) {
+        $frm->addMessage('Migration file does not exist, cannot migrate signatures.
+            Please check <tt>migrate.bbcode.php</tt> on how to create an appropriate file.');
+    } else {
+        $frm->addrow('Confirm:', $frm->text_box("confirmation", ""));
+    }
+    $frm->show();
diff --git a/plugins/phorum/settings/migrate-sigs.php b/plugins/phorum/settings/migrate-sigs.php
new file mode 100644
index 00000000..7896be36
--- /dev/null
+++ b/plugins/phorum/settings/migrate-sigs.php
@@ -0,0 +1,85 @@
+function phorum_htmlpurifier_migrate_sigs_check() {
+    global $PHORUM;
+    $offset = 0;
+    if (!empty($_POST['migrate-sigs'])) {
+        if (!isset($_POST['confirmation']) || strtolower($_POST['confirmation']) !== 'yes') {
+            echo 'Invalid confirmation code.';
+            exit;
+        }
+        $PHORUM['mod_htmlpurifier']['migrate-sigs'] = true;
+        phorum_db_update_settings(array("mod_htmlpurifier"=>$PHORUM["mod_htmlpurifier"]));
+        $offset = 1;
+    } elseif (!empty($_GET['migrate-sigs']) && $PHORUM['mod_htmlpurifier']['migrate-sigs']) {
+        $offset = (int) $_GET['migrate-sigs'];
+    }
+    return $offset;
+function phorum_htmlpurifier_migrate_sigs($offset) {
+    global $PHORUM;
+    if(!$offset) return; // bail out quick of $offset == 0
+    @set_time_limit(0); // attempt to let this run
+    $increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment'];
+    require_once(dirname(__FILE__) . '/../migrate.php');
+    // migrate signatures
+    // do this in batches so we don't run out of time/space
+    $end = $offset + $increment;
+    $user_ids = array();
+    for ($i = $offset; $i < $end; $i++) {
+        $user_ids[] = $i;
+    }
+    $userinfos = phorum_db_user_get_fields($user_ids, 'signature');
+    foreach ($userinfos as $i => $user) {
+        if (empty($user['signature'])) continue;
+        $sig = $user['signature'];
+        // perform standard Phorum processing on the sig
+        $sig = str_replace(array("&","<",">"), array("&amp;","&lt;","&gt;"), $sig);
+        $sig = preg_replace("/<((http|https|ftp):\/\/[a-z0-9;\/\?:@=\&\$\-_\.\+!*'\(\),~%]+?)>/i", "$1", $sig);
+        // prepare fake data to pass to migration function
+        $fake_data = array(array("author"=>"", "email"=>"", "subject"=>"", 'body' => $sig));
+        list($fake_message) = phorum_htmlpurifier_migrate($fake_data);
+        $user['signature'] = $fake_message['body'];
+        if (!phorum_user_save($user)) {
+            exit('Error while saving user data');
+        }
+    }
+    unset($userinfos); // free up memory
+    // query for highest ID in database
+    $type = $PHORUM['DBCONFIG']['type'];
+    if ($type == 'mysql') {
+        $conn = phorum_db_mysql_connect();
+        $sql = "select MAX(user_id) from {$PHORUM['user_table']}";
+        $res = mysql_query($sql, $conn);
+        $row = mysql_fetch_row($res);
+        $top_id = (int) $row[0];
+    } elseif ($type == 'mysqli') {
+        $conn = phorum_db_mysqli_connect();
+        $sql = "select MAX(user_id) from {$PHORUM['user_table']}";
+        $res = mysqli_query($conn, $sql);
+        $row = mysqli_fetch_row($res);
+        $top_id = (int) $row[0];
+    } else {
+        exit('Unrecognized database!');
+    }
+    $offset += $increment;
+    if ($offset > $top_id) { // test for end condition
+        echo 'Migration finished';
+        $PHORUM['mod_htmlpurifier']['migrate-sigs'] = false;
+        phorum_htmlpurifier_commit_settings();
+        return true;
+    }
+    $host  = $_SERVER['HTTP_HOST'];
+    $uri   = rtrim(dirname($_SERVER['PHP_SELF']), '/\\');
+    $extra = 'admin.php?module=modsettings&mod=htmlpurifier&migrate-sigs=' . $offset;
+    // relies on output buffering to work
+    header("Location: http://$host$uri/$extra");
+    exit;
diff --git a/plugins/phorum/settings/save.php b/plugins/phorum/settings/save.php
new file mode 100644
index 00000000..a08b8314
--- /dev/null
+++ b/plugins/phorum/settings/save.php
@@ -0,0 +1,23 @@
+function phorum_htmlpurifier_save_settings() {
+    global $PHORUM;
+    if (phorum_htmlpurifier_config_file_exists()) {
+        echo "Cannot update settings, <code>mods/htmlpurifier/config.php</code> already exists. To change
+        settings, edit that file. To use the web form, delete that file.<br />";
+    } else {
+        $config = phorum_htmlpurifier_get_config();
+        if (!isset($_POST['reset'])) $config->mergeArrayFromForm($_POST, 'config', $PHORUM['mod_htmlpurifier']['directives']);
+        $PHORUM['mod_htmlpurifier']['config'] = $config->getAll();
+        if(!phorum_htmlpurifier_commit_settings()){
+            $error="Database error while updating settings.";
+        } else {
+            echo "Settings Updated<br />";
+        }
+    }
+function phorum_htmlpurifier_commit_settings() {
+    global $PHORUM;
+    return phorum_db_update_settings(array("mod_htmlpurifier"=>$PHORUM["mod_htmlpurifier"]));
diff --git a/smoketests/testSchema.php b/smoketests/testSchema.php
index 6e8bc74b..e6b721dc 100644
--- a/smoketests/testSchema.php
+++ b/smoketests/testSchema.php
@@ -37,3 +37,7 @@ HTMLPurifier_ConfigSchema::defineNamespace('ReportCard', 'It is for grades.');
 HTMLPurifier_ConfigSchema::define('ReportCard', 'English', null, 'string/null', 'Grade from English class.');
 HTMLPurifier_ConfigSchema::define('ReportCard', 'Absences', 0, 'int', 'How many times missing from school?');
+HTMLPurifier_ConfigSchema::defineNamespace('Text', 'This stuff is long, boring, and English.');
+HTMLPurifier_ConfigSchema::define('Text', 'AboutUs', 'Nothing much, but this should be decently long so that a textarea would be better', 'text', 'Who are we? What are we up to?');
+HTMLPurifier_ConfigSchema::define('Text', 'Hash', "not-case-sensitive\nstill-not-case-sensitive\nsuper-not-case-sensitive", 'itext', 'This is of limited utility, but of course it ends up being used.');
diff --git a/test-settings.sample.php b/test-settings.sample.php
index bd1f622b..74e2de54 100644
--- a/test-settings.sample.php
+++ b/test-settings.sample.php
@@ -1,16 +1,20 @@
-// This file is necessary to run the unit tests and profiling
-// scripts.
+// This file is necessary to run the unit tests and profiling scripts.
+// Please copy it to 'test-settings.php' and make the necessary edits.
-// Is PEAR available on your system? If it isn't, set to false. If PEAR
-// is not part of the default include_path, add it.
-$GLOBALS['HTMLPurifierTest']['PEAR'] = true;
+// Some of these scripts run a long time, so it is recommended that you
+// turn off the time limit
+// Turning off output buffering will prevent mysterious errors from core dumps
+// Where is SimpleTest located?
+$simpletest_location = '/path/to/simpletest/';
 // How many times should profiling scripts iterate over the function? More runs 
 // means more accurate results, but they'll take longer to perform.
 $GLOBALS['HTMLPurifierTest']['Runs'] = 2;
-// Where is SimpleTest located?
-$simpletest_location = '/path/to/simpletest/';
diff --git a/tests/HTMLPurifier/AttrCollectionsTest.php b/tests/HTMLPurifier/AttrCollectionsTest.php
index 6420a6ac..52fc88f1 100644
--- a/tests/HTMLPurifier/AttrCollectionsTest.php
+++ b/tests/HTMLPurifier/AttrCollectionsTest.php
@@ -9,7 +9,7 @@ class HTMLPurifier_AttrCollectionsTest_NoConstructor extends HTMLPurifier_AttrCo
     function performInclusions(&$a) {}
-class HTMLPurifier_AttrCollectionsTest extends UnitTestCase
+class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness
     function testConstruction() {
diff --git a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
index 861cbb32..25571128 100644
--- a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
@@ -16,6 +16,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
         $this->assertDef(',', false);
         $this->assertDef('Times New Roman, serif', '\'Times New Roman\', serif');
+        $this->assertDef($d = "'John\\'s Font'");
+        $this->assertDef("John's Font", $d);
+        $this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
+        $this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php
index a4d2521e..58b77248 100644
--- a/tests/HTMLPurifier/AttrDef/URITest.php
+++ b/tests/HTMLPurifier/AttrDef/URITest.php
@@ -2,317 +2,86 @@
 require_once 'HTMLPurifier/AttrDefHarness.php';
 require_once 'HTMLPurifier/AttrDef/URI.php';
+require_once 'HTMLPurifier/URIParser.php';
-// we also need to test all the configuration directives defined by this class
-// http: is returned quite often when a URL is invalid. We have to change
-// this behavior to just a plain old "FALSE"!
+ * @todo Aim for complete code coverage with mocks
+ */
 class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
-    var $scheme, $components, $return_components;
-    function testGenericURI() {
-        generate_mock_once('HTMLPurifier_URIScheme');
-        generate_mock_once('HTMLPurifier_URISchemeRegistry');
-        $old_registry = HTMLPurifier_URISchemeRegistry::instance();
-        // finally, lets get a copy of the actual class
+    function setUp() {
         $this->def = new HTMLPurifier_AttrDef_URI();
-        // initialize test inputs
-        $uri = // input URI
-        $components = // what components the URI should be parsed to
-        $return_components = // return components
-        $expect_uri = array(); // what reassembled URI to expect
-        //////////////////////////////////////////////////////////////////////
-        // test a regular instance, return identical URI
-        $uri[0] = 'http://www.example.com/webhp?q=foo#result2';
-        $components[0] = array(
-            null,               // userinfo
-            'www.example.com',  // host
-            null,               // port
-            '/webhp',           // path
-            'q=foo'             // query
-        );
-        // test an amended URI (the actual logic is irrelevant)
-        // test that user and port get parsed correctly (3.2.1 and 3.2.3)
-        $uri[1] = 'http://user@authority.part:80/now/the/path?query#fragment';
-        $components[1] = array(
-            'user', 'authority.part', 80,
-            '/now/the/path', 'query'
-        );
-        $return_components[1] = array( // removed port (it's standard)
-            'user', 'authority.part', null, '/now/the/path', 'query'
-        );
-        $expect_uri[1] = 'http://user@authority.part/now/the/path?query#fragment';
-        // percent encoded characters are not resolved during generic URI
-        // parsing even though RFC 3986 defines this notation
-        // also test what happens when query/fragment are missing
-        $uri[2] = 'http://en.wikipedia.org/wiki/Clich%C3%A9';
-        $components[2] = array(
-            null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null
-        );
-        // test distinction between empty query and undefined query (above)
-        $uri[3] = 'http://www.example.com/?#';
-        $components[3] = array(null, 'www.example.com', null, '/', '');
-        // path is always defined, even if empty
-        $uri[4] = 'http://www.example.com';
-        $components[4] = array(null, 'www.example.com', null, '', null);
-        // test parsing of an opaque URI
-        $uri[5] = 'mailto:bob@example.com';
-        $components[5] = array(null, null, null, 'bob@example.com', null);
-        // even though we don't resolve percent entities, we have to fix
-        // improper percent-encodes. Taken one at a time:
-        // %56 - V, which is an unreserved character
-        // %fc - u with an umlaut, normalize to uppercase
-        // %GJ - invalid characters in entity, encode %
-        // %5 - prematurely terminated, encode %
-        // %FC - u with umlaut, correct
-        // note that Apache doesn't do such fixing, rather, it just claims
-        // that the browser sent a "Bad Request".  See PercentEncoder.php
-        // for more details
-        $uri[6] = 'http://www.example.com/%56%fc%GJ%5%FC';
-        $components[6] = array(null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null);
-        $expect_uri[6] = 'http://www.example.com/V%FC%25GJ%255%FC';
-        // test IPv4 address (behavior may vary with configuration)
-        $uri[7] = '';
-        $components[7] = array(null, '', null, '/', null);
-        // while it may look like an IPv4 address, it's really a reg-name.
-        // don't destroy it
-        $uri[8] = 'http://333.123.32.123/';
-        $components[8] = array(null, '333.123.32.123', null, '/', null);
-        // test IPv6 address, using amended form of RFC's example
-        $uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one';
-        $components[9] = array(null, '[2001:db8::7]', null, '/c=GB',
-            'objectClass?one');
-        // We will not implement punycode encoding, that's up to the browsers
-        // We also will not implement percent to IDNA encoding transformations:
-        // if you need to use an international domain in a link, make sure that
-        // you've got it in UTF-8 and send it in raw (no encoding).
-        // break the RFC a little and allow international characters
-        // WARNING: UTF-8 encoded!
-        $uri[10] = 'http://tūdaliņ.lv';
-        $components[10] = array(null, 'tūdaliņ.lv', null, '', null);
-        // test invalid IPv6 address and invalid reg-name
-        $uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]';
-        $components[11] = array(null, null, null, '', null);
-        $expect_uri[11] = 'http:';
-        // test invalid port
-        $uri[12] = 'http://example.com:foobar';
-        $components[12] = array(null, 'example.com', null, '', null);
-        $expect_uri[12] = 'http://example.com';
-        // test overlarge port (max is 65535, although this isn't official)
-        $uri[13] = 'http://example.com:65536';
-        $components[13] = array(null, 'example.com', null, '', null);
-        $expect_uri[13] = 'http://example.com';
-        // some spec abnf tests
-        // "authority . path-abempty" omitted, it is a trivial case
-        // "path-absolute", note this is different from path-rootless
-        $uri[14] = 'http:/this/is/path';
-        $components[14] = array(null, null, null, '/this/is/path', null);
-        $expect_uri[14] = 'http:/this/is/path'; // do not munge scheme off
-        // scheme munging is not being tested yet, it's an extra feature
-        // "path-rootless" - this should not be used but is allowed
-        $uri[15] = 'http:this/is/path';
-        $components[15] = array(null, null, null, 'this/is/path', null);
-        //$expect_uri[15] = 'this/is/path'; // munge scheme off
-        // "path-empty" - a rather interesting case, remove the scheme
-        $uri[16] = 'http:';
-        $components[16] = array(null, null, null, '', null);
-        //$expect_uri[16] = ''; // munge scheme off
-        // test invalid scheme, components shouldn't be passed
-        $uri[17] = 'javascript:alert("moo");';
-        $expect_uri[17] = false;
-        // relative URIs - basic case
-        $uri[18] = '/a/b';
-        $components[18] = array(null, null, null, '/a/b', null);
-        // result of malformed tag, gracefully handle error
-        $uri[19] = 'http://www.google.com/\'>"';
-        $components[19] = array(null, 'www.google.com', null, '/', null);
-        $expect_uri[19] = 'http://www.google.com/';
-        // test empty
-        $uri[20] = '';
-        $components[20] = array(null, null, null, '', null);
-        $expect_uri[20] = '';
-        foreach ($uri as $i => $value) {
-            // the read in values
-            $this->config  = isset($config[$i])  ? $config[$i]  : HTMLPurifier_Config::createDefault();
-            $this->context = isset($context[$i]) ? $context[$i] : new HTMLPurifier_Context();
-            // setUpAssertDef
-            if ( isset($components[$i]) ) {
-                $this->components = $components[$i];
-            } else {
-                $this->components = false;
-            }
-            if ( isset($return_components[$i]) ) {
-                $this->return_components = $return_components[$i];
-            } else {
-                $this->return_components = $this->components;
-            }
-            // parameters
-            if (!isset($expect_uri[$i])) {
-                $expect_uri[$i] = $value; // untouched
-            }
-            $this->assertDef($value, $expect_uri[$i], true, "Test $i: %s");
-        }
-        // reset to regular implementation
-        HTMLPurifier_URISchemeRegistry::instance($old_registry);
-    }
-    function setUpAssertDef() {
-        // $fake_registry isn't the real mock, because due to PHP 4 weirdness
-        // I cannot set a default value to function parameters that are passed
-        // by reference. So we use the value instance() returns.
-        $fake_registry = new HTMLPurifier_URISchemeRegistryMock();
-        $registry =& HTMLPurifier_URISchemeRegistry::instance($fake_registry);
-        // now, let's add a pseudo-scheme to the registry
-        $this->scheme = new HTMLPurifier_URISchemeMock();
-        // here are the schemes we will support with overloaded mocks
-        $registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*'));
-        $registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*'));
-        // default return value is false (meaning no scheme defined: reject)
-        $registry->setReturnValue('getScheme', false, array('*', '*', '*'));
-        if ($this->components === false) {
-            $this->scheme->expectNever('validateComponents');
-        } else {
-            $this->components[] = '*'; // append the configuration
-            $this->components[] = '*'; // append context
-            $this->scheme->setReturnValue(
-                'validateComponents', $this->return_components, $this->components);
-            $this->scheme->expectOnce('validateComponents', $this->components);
-        }
-    }
-    function tearDownAssertDef() {
-        $this->scheme->tally();
+        parent::setUp();
     function testIntegration() {
-        $this->def = new HTMLPurifier_AttrDef_URI();
+        $this->assertDef('http:', '');
+        $this->assertDef('http:/foo', '/foo');
         $this->assertDef('javascript:bad_stuff();', false);
-    function testDisableExternal() {
-        $this->def = new HTMLPurifier_AttrDef_URI();
-        $this->config->set('URI', 'DisableExternal', true);
-        $this->config->set('URI', 'Host', 'sub.example.com');
-        $this->assertDef('/foobar.txt');
-        $this->assertDef('http://google.com/', false);
-        $this->assertDef('http://sub.example.com/alas?foo=asd');
-        $this->assertDef('http://example.com/teehee', false);
-        $this->assertDef('http://www.example.com/#man', false);
-        $this->assertDef('http://go.sub.example.com/perhaps?p=foo');
+    function testIntegrationWithPercentEncoder() {
+        $this->assertDef(
+            'http://www.example.com/%56%fc%GJ%5%FC',
+            'http://www.example.com/V%FC%25GJ%255%FC'
+        );
     function testEmbeds() {
-        // embedded URI
         $this->def = new HTMLPurifier_AttrDef_URI(true);
         $this->assertDef('mailto:foo@example.com', false);
-    function testDisableExternalResources() {
-        $this->config->set('URI', 'DisableExternalResources', true);
-        $this->def = new HTMLPurifier_AttrDef_URI();
-        $this->assertDef('http://sub.example.com/alas?foo=asd');
-        $this->assertDef('/img.png');
-        $this->def = new HTMLPurifier_AttrDef_URI(true);
-        $this->assertDef('http://sub.example.com/alas?foo=asd', false);
-        $this->assertDef('/img.png');
-    }
-    function testMunge() {
+    function testConfigMunge() {
         $this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
-        $this->def = new HTMLPurifier_AttrDef_URI();
         $this->assertDef('javascript:foobar();', false);
-    function testBlacklist() {
-        $this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
-        $this->assertDef('foo.txt');
-        $this->assertDef('http://www.google.com/example.com/moo');
-        $this->assertDef('http://example.com/#23', false);
-        $this->assertDef('https://sub.domain.example.com/foobar', false);
-        $this->assertDef('http://example.com.example.net/?whoo=foo', false);
-        $this->assertDef('ftp://moo-moo.net/foo/foo/', false);
+    function testDefaultSchemeRemovedInBlank() {
+        $this->assertDef('http:', '');
-    function testWhitelist() {
-        /*
+    function testDefaultSchemeRemovedInRelativeURI() {
+        $this->assertDef('http:/foo/bar', '/foo/bar');
+    }
+    function testDefaultSchemeNotRemovedInAbsoluteURI() {
+        $this->assertDef('http://example.com/foo/bar');
+    }
+    function testAltSchemeNotRemoved() {
+        $this->assertDef('mailto:this-looks-like-a-path@example.com');
+    }
+    function testURIDefinitionValidation() {
+        $parser = new HTMLPurifier_URIParser();
+        $uri = $parser->parse('http://example.com');
+        $this->config->set('URI', 'DefinitionID', 'HTMLPurifier_AttrDef_URITest->testURIDefinitionValidation');
+        $uri_def =& $this->config->getDefinition('URI');
+        // overload with mock
+        generate_mock_once('HTMLPurifier_URIDefinition');
+        $uri_def = new HTMLPurifier_URIDefinitionMock();
+        $uri_def->expectOnce('filter', array($uri, '*', '*'));
+        $uri_def->setReturnValue('filter', true, array($uri, '*', '*'));
+        $uri_def->setup = true;
+        $this->assertDef('http://example.com');
+    }
+    /*
+    function test_validate_configWhitelist() {
         $this->config->set('URI', 'HostPolicy', 'DenyAll');
         $this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
@@ -320,8 +89,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
         $this->assertDef('http://google.com.tricky.spamsite.net', false);
-        */
+    */
diff --git a/tests/HTMLPurifier/AttrDefHarness.php b/tests/HTMLPurifier/AttrDefHarness.php
index 84d8cc9e..9d811484 100644
--- a/tests/HTMLPurifier/AttrDefHarness.php
+++ b/tests/HTMLPurifier/AttrDefHarness.php
@@ -1,11 +1,10 @@
-class HTMLPurifier_AttrDefHarness extends UnitTestCase
+class HTMLPurifier_AttrDefHarness extends HTMLPurifier_Harness
     var $def;
-    var $context;
-    var $config;
+    var $context, $config;
     function setUp() {
         $this->config = HTMLPurifier_Config::createDefault();
@@ -13,20 +12,15 @@ class HTMLPurifier_AttrDefHarness extends UnitTestCase
     // cannot be used for accumulator
-    function assertDef($string, $expect = true, $ini = false, $message = '%s') {
+    function assertDef($string, $expect = true) {
         // $expect can be a string or bool
-        if ($ini) $this->setUpAssertDef();
         $result = $this->def->validate($string, $this->config, $this->context);
         if ($expect === true) {
-            $this->assertIdentical($string, $result, $message);
+            $this->assertIdentical($string, $result);
         } else {
-            $this->assertIdentical($expect, $result, $message);
+            $this->assertIdentical($expect, $result);
-        if ($ini) $this->tearDownAssertDef();
-    function setUpAssertDef() {}
-    function tearDownAssertDef() {}
diff --git a/tests/HTMLPurifier/AttrDefTest.php b/tests/HTMLPurifier/AttrDefTest.php
index 0cd11310..84889bb3 100644
--- a/tests/HTMLPurifier/AttrDefTest.php
+++ b/tests/HTMLPurifier/AttrDefTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/AttrDef.php';
-class HTMLPurifier_AttrDefTest extends UnitTestCase
+class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness
     function test_parseCDATA() {
diff --git a/tests/HTMLPurifier/AttrTransformHarness.php b/tests/HTMLPurifier/AttrTransformHarness.php
index 1f7839d0..e6ae1a93 100644
--- a/tests/HTMLPurifier/AttrTransformHarness.php
+++ b/tests/HTMLPurifier/AttrTransformHarness.php
@@ -1,8 +1,8 @@
-require_once 'HTMLPurifier/Harness.php';
+require_once 'HTMLPurifier/ComplexHarness.php';
-class HTMLPurifier_AttrTransformHarness extends HTMLPurifier_Harness
+class HTMLPurifier_AttrTransformHarness extends HTMLPurifier_ComplexHarness
     function setUp() {
diff --git a/tests/HTMLPurifier/AttrTransformTest.php b/tests/HTMLPurifier/AttrTransformTest.php
index e75352fb..5694fdd4 100644
--- a/tests/HTMLPurifier/AttrTransformTest.php
+++ b/tests/HTMLPurifier/AttrTransformTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/AttrTransform.php';
-class HTMLPurifier_AttrTransformTest extends UnitTestCase
+class HTMLPurifier_AttrTransformTest extends HTMLPurifier_Harness
     function test_prependCSS() {
diff --git a/tests/HTMLPurifier/AttrTypesTest.php b/tests/HTMLPurifier/AttrTypesTest.php
index 3f09dd1f..c207c320 100644
--- a/tests/HTMLPurifier/AttrTypesTest.php
+++ b/tests/HTMLPurifier/AttrTypesTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/AttrTypes.php';
-class HTMLPurifier_AttrTypesTest extends UnitTestCase
+class HTMLPurifier_AttrTypesTest extends HTMLPurifier_Harness
     function test_get() {
diff --git a/tests/HTMLPurifier/ChildDefHarness.php b/tests/HTMLPurifier/ChildDefHarness.php
index 1ea04089..b0acb0bf 100644
--- a/tests/HTMLPurifier/ChildDefHarness.php
+++ b/tests/HTMLPurifier/ChildDefHarness.php
@@ -1,9 +1,9 @@
-require_once 'HTMLPurifier/Harness.php';
+require_once 'HTMLPurifier/ComplexHarness.php';
 require_once 'HTMLPurifier/ChildDef.php';
-class HTMLPurifier_ChildDefHarness extends HTMLPurifier_Harness
+class HTMLPurifier_ChildDefHarness extends HTMLPurifier_ComplexHarness
     function setUp() {
diff --git a/tests/HTMLPurifier/ComplexHarness.php b/tests/HTMLPurifier/ComplexHarness.php
new file mode 100644
index 00000000..8ea7378d
--- /dev/null
+++ b/tests/HTMLPurifier/ComplexHarness.php
@@ -0,0 +1,129 @@
+require_once 'HTMLPurifier/Lexer/DirectLex.php';
+ * General-purpose test-harness that makes testing functions that require
+ * configuration and context objects easier when those two parameters are
+ * meaningless.  See HTMLPurifier_ChildDefTest for a good example of usage.
+ */
+class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
+    /**
+     * Instance of the object that will execute the method
+     */
+    var $obj;
+    /**
+     * Name of the function to be executed
+     */
+    var $func;
+    /**
+     * Whether or not the method deals in tokens. If set to true, assertResult()
+     * will transparently convert HTML to and back from tokens.
+     */
+    var $to_tokens = false;
+    /**
+     * Whether or not to convert tokens back into HTML before performing
+     * equality check, has no effect on bools.
+     */
+    var $to_html = false;
+    /**
+     * Instance of an HTMLPurifier_Lexer implementation.
+     */
+    var $lexer;
+    /**
+     * Instance of HTMLPurifier_Generator
+     */
+    var $generator;
+    /**
+     * Default config to fall back on if no config is available
+     */
+    var $config;
+    /**
+     * Default context to fall back on if no context is available
+     */
+    var $context;
+    function HTMLPurifier_ComplexHarness() {
+        $this->lexer     = new HTMLPurifier_Lexer_DirectLex();
+        $this->generator = new HTMLPurifier_Generator();
+        parent::HTMLPurifier_Harness();
+    }
+    /**
+     * Asserts a specific result from a one parameter + config/context function
+     * @param $input Input parameter
+     * @param $expect Expectation
+     * @param $config Configuration array in form of Ns.Directive => Value.
+     *                Has no effect if $this->config is set.
+     * @param $context_array Context array in form of Key => Value or an actual
+     *                       context object.
+     */
+    function assertResult($input, $expect = true,
+        $config_array = array(), $context_array = array()
+    ) {
+        // setup config 
+        if ($this->config) {
+            $config = HTMLPurifier_Config::create($this->config);
+            $config->autoFinalize = false;
+            $config->loadArray($config_array);
+        } else {
+            $config = HTMLPurifier_Config::create($config_array);
+        }
+        // setup context object. Note that we are operating on a copy of it!
+        // When necessary, extend the test harness to allow post-tests
+        // on the context object
+        if (empty($this->context)) {
+            $context = new HTMLPurifier_Context();
+            $context->loadArray($context_array);
+        } else {
+            $context =& $this->context;
+        }
+        if ($this->to_tokens && is_string($input)) {
+            // $func may cause $input to change, so "clone" another copy
+            // to sacrifice
+            $input   = $this->lexer->tokenizeHTML($s = $input, $config, $context);
+            $input_c = $this->lexer->tokenizeHTML($s, $config, $context);
+        } else {
+            $input_c = $input;
+        }
+        // call the function
+        $func = $this->func;
+        $result = $this->obj->$func($input_c, $config, $context);
+        // test a bool result
+        if (is_bool($result)) {
+            $this->assertIdentical($expect, $result);
+            return;
+        } elseif (is_bool($expect)) {
+            $expect = $input;
+        }
+        if ($this->to_html) {
+            $result = $this->generator->
+              generateFromTokens($result, $config, $context);
+            if (is_array($expect)) {
+                $expect = $this->generator->
+                  generateFromTokens($expect, $config, $context);
+            }
+        }
+        $this->assertIdentical($expect, $result);
+    }
diff --git a/tests/HTMLPurifier/ConfigSchemaTest.php b/tests/HTMLPurifier/ConfigSchemaTest.php
index 8dfb8f4c..8ad8f03c 100644
--- a/tests/HTMLPurifier/ConfigSchemaTest.php
+++ b/tests/HTMLPurifier/ConfigSchemaTest.php
@@ -6,7 +6,7 @@ if (!class_exists('CS')) {
     class CS extends HTMLPurifier_ConfigSchema {}
-class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
+class HTMLPurifier_ConfigSchemaTest extends HTMLPurifier_Harness
@@ -260,7 +260,9 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
     function testValidate() {
         $this->assertValid('foobar', 'string');
+        $this->assertValid('foobar', 'text'); // aliases, lstring = long string
         $this->assertValid('FOOBAR', 'istring', 'foobar');
+        $this->assertValid('FOOBAR', 'itext', 'foobar');
         $this->assertValid(34, 'int');
@@ -278,10 +280,14 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
         $this->assertValid(array('1', '2', '3'), 'list');
         $this->assertValid('foo,bar, cow', 'list', array('foo', 'bar', 'cow'));
         $this->assertValid('', 'list', array());
+        $this->assertValid("foo\nbar", 'list', array('foo', 'bar'));
+        $this->assertValid("foo\nbar,baz", 'list', array('foo', 'bar', 'baz'));
         $this->assertValid(array('1' => true, '2' => true), 'lookup');
         $this->assertValid(array('1', '2'), 'lookup', array('1' => true, '2' => true));
         $this->assertValid('foo,bar', 'lookup', array('foo' => true, 'bar' => true));
+        $this->assertValid("foo\nbar", 'lookup', array('foo' => true, 'bar' => true));
+        $this->assertValid("foo\nbar,baz", 'lookup', array('foo' => true, 'bar' => true, 'baz' => true));
         $this->assertValid('', 'lookup', array());
         $this->assertValid(array('foo' => 'bar'), 'hash');
@@ -289,6 +295,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
         $this->assertInvalid(array(0 => 'moo'), 'hash');
         $this->assertValid('', 'hash', array());
         $this->assertValid('foo:bar,too:two', 'hash', array('foo' => 'bar', 'too' => 'two'));
+        $this->assertValid("foo:bar\ntoo:two,three:free", 'hash', array('foo' => 'bar', 'too' => 'two', 'three' => 'free'));
         $this->assertValid('foo:bar,too', 'hash', array('foo' => 'bar'));
         $this->assertValid('foo:bar,', 'hash', array('foo' => 'bar'));
         $this->assertValid('foo:bar:baz', 'hash', array('foo' => 'bar:baz'));
diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php
index a2dff2d2..941dc3da 100644
--- a/tests/HTMLPurifier/ConfigTest.php
+++ b/tests/HTMLPurifier/ConfigTest.php
@@ -6,7 +6,7 @@ if (!class_exists('CS')) {
     class CS extends HTMLPurifier_ConfigSchema {}
-class HTMLPurifier_ConfigTest extends UnitTestCase
+class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
     var $our_copy, $old_copy;
diff --git a/tests/HTMLPurifier/ContextTest.php b/tests/HTMLPurifier/ContextTest.php
index 8e038159..b072542f 100644
--- a/tests/HTMLPurifier/ContextTest.php
+++ b/tests/HTMLPurifier/ContextTest.php
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/Context.php';
 // mocks
 require_once 'HTMLPurifier/IDAccumulator.php';
-class HTMLPurifier_ContextTest extends UnitTestCase
+class HTMLPurifier_ContextTest extends HTMLPurifier_Harness
     var $context;
diff --git a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php
index 4925ff8e..cf4249af 100644
--- a/tests/HTMLPurifier/DefinitionCache/SerializerTest.php
+++ b/tests/HTMLPurifier/DefinitionCache/SerializerTest.php
@@ -17,8 +17,7 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
         $config_md5   = '1.0.0-serial-2';
         $file = realpath(
-            $rel_file = dirname(__FILE__) .
-            '/../../../library/HTMLPurifier/DefinitionCache/Serializer/Test/' .
+            $rel_file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer/Test/' .
             $config_md5 . '.ser'
         if($file && file_exists($file)) unlink($file); // prevent previous failures from causing problems
diff --git a/tests/HTMLPurifier/DefinitionCacheFactoryTest.php b/tests/HTMLPurifier/DefinitionCacheFactoryTest.php
index beabc33c..a2768d7b 100644
--- a/tests/HTMLPurifier/DefinitionCacheFactoryTest.php
+++ b/tests/HTMLPurifier/DefinitionCacheFactoryTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/DefinitionCacheFactory.php';
-class HTMLPurifier_DefinitionCacheFactoryTest extends UnitTestCase
+class HTMLPurifier_DefinitionCacheFactoryTest extends HTMLPurifier_Harness
     var $newFactory;
diff --git a/tests/HTMLPurifier/DefinitionCacheHarness.php b/tests/HTMLPurifier/DefinitionCacheHarness.php
index 7304ecdd..e6bd839f 100644
--- a/tests/HTMLPurifier/DefinitionCacheHarness.php
+++ b/tests/HTMLPurifier/DefinitionCacheHarness.php
@@ -1,6 +1,6 @@
-class HTMLPurifier_DefinitionCacheHarness extends UnitTestCase
+class HTMLPurifier_DefinitionCacheHarness extends HTMLPurifier_Harness
diff --git a/tests/HTMLPurifier/DefinitionCacheTest.php b/tests/HTMLPurifier/DefinitionCacheTest.php
index eef49f14..70fb1760 100644
--- a/tests/HTMLPurifier/DefinitionCacheTest.php
+++ b/tests/HTMLPurifier/DefinitionCacheTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/DefinitionCache.php';
-class HTMLPurifier_DefinitionCacheTest extends UnitTestCase
+class HTMLPurifier_DefinitionCacheTest extends HTMLPurifier_Harness
     function test_isOld() {
diff --git a/tests/HTMLPurifier/DefinitionTest.php b/tests/HTMLPurifier/DefinitionTest.php
index 250e0845..e48817ea 100644
--- a/tests/HTMLPurifier/DefinitionTest.php
+++ b/tests/HTMLPurifier/DefinitionTest.php
@@ -7,7 +7,7 @@ Mock::generatePartial(
-class HTMLPurifier_DefinitionTest extends UnitTestCase
+class HTMLPurifier_DefinitionTest extends HTMLPurifier_Harness
     function test_setup() {
         $def = new HTMLPurifier_Definition_Testable();
diff --git a/tests/HTMLPurifier/DoctypeRegistryTest.php b/tests/HTMLPurifier/DoctypeRegistryTest.php
index 23375411..cec9dfcd 100644
--- a/tests/HTMLPurifier/DoctypeRegistryTest.php
+++ b/tests/HTMLPurifier/DoctypeRegistryTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/DoctypeRegistry.php';
-class HTMLPurifier_DoctypeRegistryTest extends UnitTestCase
+class HTMLPurifier_DoctypeRegistryTest extends HTMLPurifier_Harness
     function test_register() {
diff --git a/tests/HTMLPurifier/ElementDefTest.php b/tests/HTMLPurifier/ElementDefTest.php
index 04b4b0af..a947b4c4 100644
--- a/tests/HTMLPurifier/ElementDefTest.php
+++ b/tests/HTMLPurifier/ElementDefTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/ElementDef.php';
-class HTMLPurifier_ElementDefTest extends UnitTestCase
+class HTMLPurifier_ElementDefTest extends HTMLPurifier_Harness
     function test_mergeIn() {
diff --git a/tests/HTMLPurifier/EncoderTest.php b/tests/HTMLPurifier/EncoderTest.php
index 5cf6a240..6007bf6a 100644
--- a/tests/HTMLPurifier/EncoderTest.php
+++ b/tests/HTMLPurifier/EncoderTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/Encoder.php';
-class HTMLPurifier_EncoderTest extends UnitTestCase
+class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
     var $_entity_lookup;
diff --git a/tests/HTMLPurifier/EntityLookupTest.php b/tests/HTMLPurifier/EntityLookupTest.php
index 706b7c18..f50ee611 100644
--- a/tests/HTMLPurifier/EntityLookupTest.php
+++ b/tests/HTMLPurifier/EntityLookupTest.php
@@ -4,7 +4,7 @@
 require_once 'HTMLPurifier/EntityLookup.php';
-class HTMLPurifier_EntityLookupTest extends UnitTestCase
+class HTMLPurifier_EntityLookupTest extends HTMLPurifier_Harness
     function test() {
diff --git a/tests/HTMLPurifier/EntityParserTest.php b/tests/HTMLPurifier/EntityParserTest.php
index 2d3a4d29..c3b605c1 100644
--- a/tests/HTMLPurifier/EntityParserTest.php
+++ b/tests/HTMLPurifier/EntityParserTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/EntityParser.php';
-class HTMLPurifier_EntityParserTest extends UnitTestCase
+class HTMLPurifier_EntityParserTest extends HTMLPurifier_Harness
     var $EntityParser;
diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php
index 508efcab..0c8db720 100644
--- a/tests/HTMLPurifier/ErrorCollectorTest.php
+++ b/tests/HTMLPurifier/ErrorCollectorTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/ErrorCollector.php';
-class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
+class HTMLPurifier_ErrorCollectorTest extends HTMLPurifier_Harness
     function setup() {
diff --git a/tests/HTMLPurifier/ErrorsHarness.php b/tests/HTMLPurifier/ErrorsHarness.php
index 359492a3..67f7c6b3 100644
--- a/tests/HTMLPurifier/ErrorsHarness.php
+++ b/tests/HTMLPurifier/ErrorsHarness.php
@@ -3,7 +3,7 @@
 require_once 'HTMLPurifier/ErrorCollectorEMock.php';
 require_once 'HTMLPurifier/Lexer/DirectLex.php';
-class HTMLPurifier_ErrorsHarness extends UnitTestCase
+class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
     var $config, $context;
diff --git a/tests/HTMLPurifier/GeneratorTest.php b/tests/HTMLPurifier/GeneratorTest.php
index 9039d1fb..b18d9ad1 100644
--- a/tests/HTMLPurifier/GeneratorTest.php
+++ b/tests/HTMLPurifier/GeneratorTest.php
@@ -3,16 +3,16 @@
 require_once 'HTMLPurifier/Generator.php';
 require_once 'HTMLPurifier/EntityLookup.php';
-require_once 'HTMLPurifier/Harness.php';
+require_once 'HTMLPurifier/ComplexHarness.php';
-class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
+class HTMLPurifier_GeneratorTest extends HTMLPurifier_ComplexHarness
     var $gen;
     var $_entity_lookup;
     function HTMLPurifier_GeneratorTest() {
-        $this->UnitTestCase();
+        $this->HTMLPurifier_Harness();
         $this->gen = new HTMLPurifier_Generator();
         $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
diff --git a/tests/HTMLPurifier/HTMLDefinitionTest.php b/tests/HTMLPurifier/HTMLDefinitionTest.php
index 3581f8cf..28fb28cc 100644
--- a/tests/HTMLPurifier/HTMLDefinitionTest.php
+++ b/tests/HTMLPurifier/HTMLDefinitionTest.php
@@ -2,13 +2,22 @@
 require_once 'HTMLPurifier/HTMLDefinition.php';
-class HTMLPurifier_HTMLDefinitionTest extends UnitTestCase
+class HTMLPurifier_HTMLDefinitionTest extends HTMLPurifier_Harness
     function test_parseTinyMCEAllowedList() {
         $def = new HTMLPurifier_HTMLDefinition();
+        // note: this is case-sensitive, but its config schema 
+        // counterpart is not. This is generally a good thing for users,
+        // but it's a slight internal inconsistency
+        $this->assertEqual(
+            $def->parseTinyMCEAllowedList(''),
+            array(array(), array())
+        );
             array(array('a' => true, 'b' => true, 'c' => true), array())
@@ -35,6 +44,17 @@ class HTMLPurifier_HTMLDefinitionTest extends UnitTestCase
             array('span.style' => true, 'a.href' => true, 'a.title' => true))
+        $this->assertEqual(
+            // alternate form:
+            $def->parseTinyMCEAllowedList(
+            array(array('span' => true, 'strong' => true, 'a' => true),
+            array('span.style' => true, 'a.href' => true, 'a.title' => true))
+        );
     function test_Allowed() {
diff --git a/tests/HTMLPurifier/HTMLModule/RubyTest.php b/tests/HTMLPurifier/HTMLModule/RubyTest.php
new file mode 100644
index 00000000..15abbcb7
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLModule/RubyTest.php
@@ -0,0 +1,56 @@
+require_once 'HTMLPurifier/HTMLModuleHarness.php';
+class HTMLPurifier_HTMLModule_RubyTest extends HTMLPurifier_HTMLModuleHarness
+    function setUp() {
+        parent::setUp();
+        $this->config->set('HTML', 'Doctype', 'XHTML 1.1');
+    }
+    function testBasicUse() {
+        $this->assertResult(
+            '<ruby><rb>WWW</rb><rt>World Wide Web</rt></ruby>'
+        );
+    }
+    function testRPUse() {
+        $this->assertResult(
+            '<ruby><rb>WWW</rb><rp>(</rp><rt>World Wide Web</rt><rp>)</rp></ruby>'
+        );
+    }
+    function testComplexUse() {
+        $this->assertResult(
+  <rbc>
+    <rb>10</rb>
+    <rb>31</rb>
+    <rb>2002</rb>
+  </rbc>
+  <rtc>
+    <rt>Month</rt>
+    <rt>Day</rt>
+    <rt>Year</rt>
+  </rtc>
+  <rtc>
+    <rt rbspan="3">Expiration Date</rt>
+  </rtc>
+        );
+        /* not implemented
+        function testBackwardsCompat() {
+            $this->assertResult(
+                '<ruby>A<rp>(</rp><rt>aaa</rt><rp>)</rp></ruby>',
+                '<ruby><rb>A</rb><rp>(</rp><rt>aaa</rt><rp>)</rp></ruby>'
+            );
+        }
+        */
+    }
diff --git a/tests/HTMLPurifier/HTMLModule/TidyTest.php b/tests/HTMLPurifier/HTMLModule/TidyTest.php
index ff8d844d..f2522d9d 100644
--- a/tests/HTMLPurifier/HTMLModule/TidyTest.php
+++ b/tests/HTMLPurifier/HTMLModule/TidyTest.php
@@ -8,7 +8,7 @@ Mock::generatePartial(
     array('makeFixes', 'makeFixesForLevel', 'populate')
-class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
+class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
     function test_getFixesForLevel() {
diff --git a/tests/HTMLPurifier/HTMLModuleManagerTest.php b/tests/HTMLPurifier/HTMLModuleManagerTest.php
index d5219ca1..50dcb154 100644
--- a/tests/HTMLPurifier/HTMLModuleManagerTest.php
+++ b/tests/HTMLPurifier/HTMLModuleManagerTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/HTMLModuleManager.php';
-class HTMLPurifier_HTMLModuleManagerTest extends UnitTestCase
+class HTMLPurifier_HTMLModuleManagerTest extends HTMLPurifier_Harness
     function test_addModule() {
diff --git a/tests/HTMLPurifier/HTMLModuleTest.php b/tests/HTMLPurifier/HTMLModuleTest.php
index 238ca42a..ace5805f 100644
--- a/tests/HTMLPurifier/HTMLModuleTest.php
+++ b/tests/HTMLPurifier/HTMLModuleTest.php
@@ -3,7 +3,7 @@
 require_once 'HTMLPurifier/HTMLModule.php';
 require_once 'HTMLPurifier/AttrDef.php';
-class HTMLPurifier_HTMLModuleTest extends UnitTestCase
+class HTMLPurifier_HTMLModuleTest extends HTMLPurifier_Harness
     function test_addElementToContentSet() {
diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php
index 84cea5eb..4af4384b 100644
--- a/tests/HTMLPurifier/Harness.php
+++ b/tests/HTMLPurifier/Harness.php
@@ -1,128 +1,69 @@
-require_once 'HTMLPurifier/Lexer/DirectLex.php';
+require_once 'HTMLPurifier/URIParser.php';
- * General-purpose test-harness that makes testing functions that require
- * configuration and context objects easier when those two parameters are
- * meaningless.  See HTMLPurifier_ChildDefTest for a good example of usage.
+ * All-use harness, use this rather than SimpleTest's
 class HTMLPurifier_Harness extends UnitTestCase
-    /**
-     * Instance of the object that will execute the method
-     */
-    var $obj;
-    /**
-     * Name of the function to be executed
-     */
-    var $func;
-    /**
-     * Whether or not the method deals in tokens. If set to true, assertResult()
-     * will transparently convert HTML to and back from tokens.
-     */
-    var $to_tokens = false;
-    /**
-     * Whether or not to convert tokens back into HTML before performing
-     * equality check, has no effect on bools.
-     */
-    var $to_html = false;
-    /**
-     * Instance of an HTMLPurifier_Lexer implementation.
-     */
-    var $lexer;
-    /**
-     * Instance of HTMLPurifier_Generator
-     */
-    var $generator;
-    /**
-     * Default config to fall back on if no config is available
-     */
-    var $config;
-    /**
-     * Default context to fall back on if no context is available
-     */
-    var $context;
     function HTMLPurifier_Harness() {
-        $this->lexer     = new HTMLPurifier_Lexer_DirectLex();
-        $this->generator = new HTMLPurifier_Generator();
+    var $config, $context;
-     * Asserts a specific result from a one parameter + config/context function
-     * @param $input Input parameter
-     * @param $expect Expectation
-     * @param $config Configuration array in form of Ns.Directive => Value.
-     *                Has no effect if $this->config is set.
-     * @param $context_array Context array in form of Key => Value or an actual
-     *                       context object.
+     * Generates easily accessible default config/context
-    function assertResult($input, $expect = true,
-        $config_array = array(), $context_array = array()
-    ) {
-        // setup config 
-        if ($this->config) {
-            $config = HTMLPurifier_Config::create($this->config);
-            $config->loadArray($config_array);
+    function setUp() {
+        list($this->config, $this->context) = $this->createCommon();
+    }
+    /**
+     * Accepts config and context and prepares them into a valid state
+     * @param &$config Reference to config variable
+     * @param &$context Reference to context variable
+     */
+    function prepareCommon(&$config, &$context) {
+        $config = HTMLPurifier_Config::create($config);
+        if (!$context) $context = new HTMLPurifier_Context();
+    }
+    /**
+     * Generates default configuration and context objects
+     * @return Defaults in form of array($config, $context)
+     */
+    function createCommon() {
+        return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context);
+    }
+    /**
+     * If $expect is false, ignore $result and check if status failed.
+     * Otherwise, check if $status if true and $result === $expect.
+     * @param $status Boolean status
+     * @param $result Mixed result from processing
+     * @param $expect Mixed expectation for result
+     */
+    function assertEitherFailOrIdentical($status, $result, $expect) {
+        if ($expect === false) {
+            $this->assertFalse($status, 'Expected false result, got true');
         } else {
-            $config = HTMLPurifier_Config::create($config_array);
+            $this->assertTrue($status, 'Expected true result, got false');
+            $this->assertIdentical($result, $expect);
-        // setup context object. Note that we are operating on a copy of it!
-        // When necessary, extend the test harness to allow post-tests
-        // on the context object
-        if (empty($this->context)) {
-            $context = new HTMLPurifier_Context();
-            $context->loadArray($context_array);
-        } else {
-            $context =& $this->context;
-        }
-        if ($this->to_tokens && is_string($input)) {
-            // $func may cause $input to change, so "clone" another copy
-            // to sacrifice
-            $input   = $this->lexer->tokenizeHTML($s = $input, $config, $context);
-            $input_c = $this->lexer->tokenizeHTML($s, $config, $context);
-        } else {
-            $input_c = $input;
-        }
-        // call the function
-        $func = $this->func;
-        $result = $this->obj->$func($input_c, $config, $context);
-        // test a bool result
-        if (is_bool($result)) {
-            $this->assertIdentical($expect, $result);
-            return;
-        } elseif (is_bool($expect)) {
-            $expect = $input;
-        }
-        if ($this->to_html) {
-            $result = $this->generator->
-              generateFromTokens($result, $config, $context);
-            if (is_array($expect)) {
-                $expect = $this->generator->
-                  generateFromTokens($expect, $config, $context);
+    }
+    function getTests() {
+        // __onlytest makes only one test get triggered
+        foreach (get_class_methods(get_class($this)) as $method) {
+            if (strtolower(substr($method, 0, 10)) == '__onlytest') {
+                return array($method);
-        $this->assertIdentical($expect, $result);
+        return parent::getTests();
diff --git a/tests/HTMLPurifier/IDAccumulatorTest.php b/tests/HTMLPurifier/IDAccumulatorTest.php
index 05db0b2a..006d689c 100644
--- a/tests/HTMLPurifier/IDAccumulatorTest.php
+++ b/tests/HTMLPurifier/IDAccumulatorTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/IDAccumulator.php';
-class HTMLPurifier_IDAccumulatorTest extends UnitTestCase
+class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness
     function test() {
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 8611fd9b..0f0b5e5e 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -237,6 +237,35 @@ Par1
+        $this->assertResult(
+'<img /> Foo',
+'<p><img /> Foo</p>'
+        );
+        $this->assertResult(
+'<li>Foo <a>bar</a></li>'
+        );
+        $this->assertResult(
+        );
+        $this->assertResult(
+        );
+        $this->assertResult(
+        );
+        $this->assertResult(
+'<b>One</b> <i>Two</i>',
+'<p><b>One</b> <i>Two</i></p>'
+        );
     function testInlineRootNode() {
@@ -249,5 +278,10 @@ Par2',
+    function testNeeded() {
+        $this->expectError('Cannot enable AutoParagraph injector because p is not allowed');
+        $this->assertResult('<b>foobar</b>', true, array('AutoFormat.AutoParagraph' => true, 'HTML.Allowed' => 'b'));
+    }
diff --git a/tests/HTMLPurifier/Injector/LinkifyTest.php b/tests/HTMLPurifier/Injector/LinkifyTest.php
index b91908ee..66a06956 100644
--- a/tests/HTMLPurifier/Injector/LinkifyTest.php
+++ b/tests/HTMLPurifier/Injector/LinkifyTest.php
@@ -34,5 +34,10 @@ class HTMLPurifier_Injector_LinkifyTest extends HTMLPurifier_InjectorHarness
+    function testNeeded() {
+        $this->expectError('Cannot enable Linkify injector because a is not allowed');
+        $this->assertResult('http://example.com/', true, array('AutoFormat.Linkify' => true, 'HTML.Allowed' => 'b'));
+    }
diff --git a/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php b/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
index d538c489..e820d677 100644
--- a/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
+++ b/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
@@ -38,5 +38,10 @@ class HTMLPurifier_Injector_PurifierLinkifyTest extends HTMLPurifier_InjectorHar
+    function testNeeded() {
+        $this->expectError('Cannot enable PurifierLinkify injector because a is not allowed');
+        $this->assertResult('%Namespace.Directive', true, array('AutoFormat.PurifierLinkify' => true, 'HTML.Allowed' => 'b'));
+    }
diff --git a/tests/HTMLPurifier/LanguageFactoryTest.php b/tests/HTMLPurifier/LanguageFactoryTest.php
index eb0f4556..2cadb1c1 100644
--- a/tests/HTMLPurifier/LanguageFactoryTest.php
+++ b/tests/HTMLPurifier/LanguageFactoryTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/LanguageFactory.php';
-class HTMLPurifier_LanguageFactoryTest extends UnitTestCase
+class HTMLPurifier_LanguageFactoryTest extends HTMLPurifier_Harness
     function test() {
diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php
index f846c619..ec4244a8 100644
--- a/tests/HTMLPurifier/LanguageTest.php
+++ b/tests/HTMLPurifier/LanguageTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/Language.php';
-class HTMLPurifier_LanguageTest extends UnitTestCase
+class HTMLPurifier_LanguageTest extends HTMLPurifier_Harness
     var $lang;
diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php
index ba7d0fe7..37835790 100644
--- a/tests/HTMLPurifier/Lexer/DirectLexTest.php
+++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/Lexer/DirectLex.php';
-class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
+class HTMLPurifier_Lexer_DirectLexTest extends HTMLPurifier_Harness
     var $DirectLex;
@@ -59,6 +59,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
         $input[12] = '="" =""';
         $expect[12] = array('"' => ''); // tough to say, just don't throw a loop
+        $input[13] = 'href="';
+        $expect[13] = array('href' => '');
+        $input[14] = 'href=" <';
+        $expect[14] = array('href' => ' <');
         $config = HTMLPurifier_Config::createDefault();
         $context = new HTMLPurifier_Context();
         $size = count($input);
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index c16b0611..75c05b78 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/Lexer/DirectLex.php';
-class HTMLPurifier_LexerTest extends UnitTestCase
+class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
     var $Lexer;
@@ -287,16 +287,21 @@ class HTMLPurifier_LexerTest extends UnitTestCase
         $expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
         // test emoticon protection
-        $input[19] = '<b>Whoa! >.< That\'s not good >.></b>';
+        $input[19] = '<b>Whoa! <3 That\'s not good >.></b>';
         $expect[19] = array(
             new HTMLPurifier_Token_Start('b'),
-            new HTMLPurifier_Token_Text('Whoa! >.'),
-            new HTMLPurifier_Token_Text('< That\'s not good >'),
+            new HTMLPurifier_Token_Text('Whoa! '),
+            new HTMLPurifier_Token_Text('<3 That\'s not good >'),
             new HTMLPurifier_Token_Text('.>'),
             new HTMLPurifier_Token_End('b'),
+        $dom_expect[19] = array(
+            new HTMLPurifier_Token_Start('b'),
+            new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
+            new HTMLPurifier_Token_End('b'),
+        );
         $sax_expect[19] = false; // SAX drops the < character
-        $dom_expect[19] = false; // DOM drops the entire pseudo-tag
+        $config[19] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
         // test comment parsing with funky characters inside
         $input[20] = '<!-- This >< comment --><br />';
@@ -305,6 +310,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
             new HTMLPurifier_Token_Empty('br')
         $sax_expect[20] = false;
+        $config[20] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
         // test comment parsing of missing end
         $input[21] = '<!-- This >< comment';
@@ -313,6 +319,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
         $sax_expect[21] = false;
         $dom_expect[21] = false;
+        $config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
         // test CDATA tags
         $input[22] = '<script>alert("<foo>");</script>';
@@ -323,7 +330,25 @@ class HTMLPurifier_LexerTest extends UnitTestCase
         $config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
         $sax_expect[22] = false;
-        //$dom_expect[22] = false;
+        // test escaping
+        $input[23] = '<!-- This comment < &lt; & -->';
+        $expect[23] = array(
+            new HTMLPurifier_Token_Comment(' This comment < &lt; & ') );
+        $sax_expect[23] = false; $config[23] =
+        HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' =>
+        true));
+        // more DirectLex edge-cases 
+        $input[24] = '<a href="><>">';
+        $expect[24] = array(
+            new HTMLPurifier_Token_Start('a', array('href' => '')),
+            new HTMLPurifier_Token_Text('<">')
+        );
+        $sax_expect[24] = false;
+        $dom_expect[24] = array(
+            new HTMLPurifier_Token_Empty('a', array('href' => '><>'))
+        );
         $default_config = HTMLPurifier_Config::createDefault();
         $default_context = new HTMLPurifier_Context();
diff --git a/tests/HTMLPurifier/PercentEncoderTest.php b/tests/HTMLPurifier/PercentEncoderTest.php
index ea52021d..4b01ac3a 100644
--- a/tests/HTMLPurifier/PercentEncoderTest.php
+++ b/tests/HTMLPurifier/PercentEncoderTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/PercentEncoder.php';
-class HTMLPurifier_PercentEncoderTest extends UnitTestCase
+class HTMLPurifier_PercentEncoderTest extends HTMLPurifier_Harness
     var $PercentEncoder;
diff --git a/tests/HTMLPurifier/Strategy/CompositeTest.php b/tests/HTMLPurifier/Strategy/CompositeTest.php
index 606d786b..db4ab040 100644
--- a/tests/HTMLPurifier/Strategy/CompositeTest.php
+++ b/tests/HTMLPurifier/Strategy/CompositeTest.php
@@ -15,7 +15,7 @@ class HTMLPurifier_Strategy_Composite_Test
 // doesn't use Strategy harness
-class HTMLPurifier_Strategy_CompositeTest extends UnitTestCase
+class HTMLPurifier_Strategy_CompositeTest extends HTMLPurifier_Harness
     function test() {
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index 2a323c57..ac651684 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -63,12 +63,6 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
           '<span><ins>Not allowed!</ins></span>'
-        $this->assertResult( // alt config
-          '<span><ins><div>Not allowed!</div></ins></span>',
-          '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>',
-          array('Core.EscapeInvalidChildren' => true)
-        );
         // test block element that has inline content
           '<h1><ins><div>Not allowed!</div></ins></h1>',
@@ -84,6 +78,12 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
+        $this->assertResult( // alt config
+          '<span><ins><div>Not allowed!</div></ins></span>',
+          '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>',
+          array('Core.EscapeInvalidChildren' => true)
+        );
     function testExclusionsIntegration() {
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
index c4289d98..87a4b38c 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
@@ -30,12 +30,23 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
+        $this->assertResult(
+            '<style>.foo {blink;}</style>',
+            ''
+        );
             array('Core.RemoveScriptContents' => false)
+        $this->assertResult(
+            '<script>alert();</script>',
+            'alert();',
+            array('Core.HiddenElements' => array())
+        );
             '<menu><li>Item 1</li></menu>',
             '<ul><li>Item 1</li></ul>'
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
index 6b7ac74f..5843b722 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php
@@ -48,8 +48,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
         $this->invoke('<!-- test -->');
-    function testScriptRemoved() {
-        $this->collector->expectAt(0, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Script removed'));
+    function testForeignMetaElementRemoved() {
+        $this->collector->expectAt(0, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'));
         $this->collector->expectContextAt(0, 'CurrentToken', new HTMLPurifier_Token_Start('script', array(), 1));
         $this->collector->expectAt(1, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', 'script'));
diff --git a/tests/HTMLPurifier/StrategyHarness.php b/tests/HTMLPurifier/StrategyHarness.php
index d8b2d5fd..fe20b646 100644
--- a/tests/HTMLPurifier/StrategyHarness.php
+++ b/tests/HTMLPurifier/StrategyHarness.php
@@ -1,11 +1,12 @@
-require_once 'HTMLPurifier/Harness.php';
+require_once 'HTMLPurifier/ComplexHarness.php';
-class HTMLPurifier_StrategyHarness extends HTMLPurifier_Harness
+class HTMLPurifier_StrategyHarness extends HTMLPurifier_ComplexHarness
     function setUp() {
+        parent::setUp();
         $this->func      = 'execute';
         $this->to_tokens = true;
         $this->to_html   = true;
diff --git a/tests/HTMLPurifier/TagTransformTest.php b/tests/HTMLPurifier/TagTransformTest.php
index b3d6f461..92b28fb0 100644
--- a/tests/HTMLPurifier/TagTransformTest.php
+++ b/tests/HTMLPurifier/TagTransformTest.php
@@ -6,7 +6,7 @@ require_once 'HTMLPurifier/TagTransform.php';
 require_once 'HTMLPurifier/TagTransform/Font.php';
 require_once 'HTMLPurifier/TagTransform/Simple.php';
-class HTMLPurifier_TagTransformTest extends UnitTestCase
+class HTMLPurifier_TagTransformTest extends HTMLPurifier_Harness
diff --git a/tests/HTMLPurifier/TokenFactoryTest.php b/tests/HTMLPurifier/TokenFactoryTest.php
index 8a35cbbc..54be5305 100644
--- a/tests/HTMLPurifier/TokenFactoryTest.php
+++ b/tests/HTMLPurifier/TokenFactoryTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/TokenFactory.php';
-class HTMLPurifier_TokenFactoryTest extends UnitTestCase
+class HTMLPurifier_TokenFactoryTest extends HTMLPurifier_Harness
     public function test() {
diff --git a/tests/HTMLPurifier/TokenTest.php b/tests/HTMLPurifier/TokenTest.php
index 22926fb5..2f440ff9 100644
--- a/tests/HTMLPurifier/TokenTest.php
+++ b/tests/HTMLPurifier/TokenTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/Token.php';
-class HTMLPurifier_TokenTest extends UnitTestCase
+class HTMLPurifier_TokenTest extends HTMLPurifier_Harness
     function assertTokenConstruction($name, $attr,
diff --git a/tests/HTMLPurifier/URIDefinitionTest.php b/tests/HTMLPurifier/URIDefinitionTest.php
new file mode 100644
index 00000000..149f89b2
--- /dev/null
+++ b/tests/HTMLPurifier/URIDefinitionTest.php
@@ -0,0 +1,59 @@
+require_once 'HTMLPurifier/URIHarness.php';
+require_once 'HTMLPurifier/URIDefinition.php';
+class HTMLPurifier_URIDefinitionTest extends HTMLPurifier_URIHarness
+    function createFilterMock($expect = true, $result = true) {
+        generate_mock_once('HTMLPurifier_URIFilter');
+        $mock = new HTMLPurifier_URIFilterMock();
+        if ($expect) $mock->expectOnce('filter');
+        else $mock->expectNever('filter');
+        $mock->setReturnValue('filter', $result);
+        return $mock;
+    }
+    function test_filter() {
+        $def = new HTMLPurifier_URIDefinition();
+        $def->filters[] = $this->createFilterMock();
+        $def->filters[] = $this->createFilterMock();
+        $uri = $this->createURI('test');
+        $this->assertTrue($def->filter($uri, $this->config, $this->context));
+    }
+    function test_filter_earlyAbortIfFail() {
+        $def = new HTMLPurifier_URIDefinition();
+        $def->filters[] = $this->createFilterMock(true, false);
+        $def->filters[] = $this->createFilterMock(false); // never called
+        $uri = $this->createURI('test');
+        $this->assertFalse($def->filter($uri, $this->config, $this->context));
+    }
+    function test_setupMemberVariables_collisionPrecedenceIsHostBaseScheme() {
+        $this->config->set('URI', 'Host', $host = 'example.com');
+        $this->config->set('URI', 'Base', $base = 'http://sub.example.com/foo/bar.html');
+        $this->config->set('URI', 'DefaultScheme', 'ftp');
+        $def = new HTMLPurifier_URIDefinition();
+        $def->setupMemberVariables($this->config);
+        $this->assertIdentical($def->host, $host);
+        $this->assertIdentical($def->base, $this->createURI($base));
+        $this->assertIdentical($def->defaultScheme, 'http'); // not ftp!
+    }
+    function test_setupMemberVariables_onlyScheme() {
+        $this->config->set('URI', 'DefaultScheme', 'ftp');
+        $def = new HTMLPurifier_URIDefinition();
+        $def->setupMemberVariables($this->config);
+        $this->assertIdentical($def->defaultScheme, 'ftp');
+    }
+    function test_setupMemberVariables_onlyBase() {
+        $this->config->set('URI', 'Base', 'http://sub.example.com/foo/bar.html');
+        $def = new HTMLPurifier_URIDefinition();
+        $def->setupMemberVariables($this->config);
+        $this->assertIdentical($def->host, 'sub.example.com');
+    }
diff --git a/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php
new file mode 100644
index 00000000..545e421b
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php
@@ -0,0 +1,24 @@
+require_once 'HTMLPurifier/URIFilter/DisableExternalTest.php';
+require_once 'HTMLPurifier/URIFilter/DisableExternalResources.php';
+class HTMLPurifier_URIFilter_DisableExternalResourcesTest extends
+      HTMLPurifier_URIFilter_DisableExternalTest
+    function setUp() {
+        parent::setUp();
+        $this->filter = new HTMLPurifier_URIFilter_DisableExternalResources();
+        $var = true;
+        $this->context->register('EmbeddedURI', $var);
+    }
+    function testPreserveWhenNotEmbedded() {
+        $this->context->destroy('EmbeddedURI'); // undo setUp
+        $this->assertFiltering(
+            'http://example.com'
+        );
+    }
diff --git a/tests/HTMLPurifier/URIFilter/DisableExternalTest.php b/tests/HTMLPurifier/URIFilter/DisableExternalTest.php
new file mode 100644
index 00000000..e4a0e89f
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilter/DisableExternalTest.php
@@ -0,0 +1,47 @@
+require_once 'HTMLPurifier/URIFilter/DisableExternal.php';
+require_once 'HTMLPurifier/URIFilterHarness.php';
+class HTMLPurifier_URIFilter_DisableExternalTest extends HTMLPurifier_URIFilterHarness
+    function setUp() {
+        parent::setUp();
+        $this->filter = new HTMLPurifier_URIFilter_DisableExternal();
+    }
+    function testRemoveExternal() {
+        $this->assertFiltering(
+            'http://example.com', false
+        );
+    }
+    function testPreserveInternal() {
+        $this->assertFiltering(
+            '/foo/bar'
+        );
+    }
+    function testPreserveOurHost() {
+        $this->config->set('URI', 'Host', 'example.com');
+        $this->assertFiltering(
+            'http://example.com'
+        );
+    }
+    function testPreserveOurSubdomain() {
+        $this->config->set('URI', 'Host', 'example.com');
+        $this->assertFiltering(
+            'http://www.example.com'
+        );
+    }
+    function testRemoveSuperdomain() {
+        $this->config->set('URI', 'Host', 'www.example.com');
+        $this->assertFiltering(
+            'http://example.com', false
+        );
+    }
diff --git a/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php b/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php
new file mode 100644
index 00000000..d9a3fdd2
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilter/HostBlacklistTest.php
@@ -0,0 +1,30 @@
+require_once 'HTMLPurifier/URIFilter/HostBlacklist.php';
+require_once 'HTMLPurifier/URIFilterHarness.php';
+class HTMLPurifier_URIFilter_HostBlacklistTest extends HTMLPurifier_URIFilterHarness
+    function setUp() {
+        parent::setUp();
+        $this->filter = new HTMLPurifier_URIFilter_HostBlacklist();
+    }
+    function testRejectBlacklistedHost() {
+        $this->config->set('URI', 'HostBlacklist', 'example.com');
+        $this->assertFiltering('http://example.com', false);
+    }
+    function testRejectBlacklistedHostThoughNotTrue() {
+        // maybe this behavior should change
+        $this->config->set('URI', 'HostBlacklist', 'example.com');
+        $this->assertFiltering('http://example.comcast.com', false);
+    }
+    function testPreserveNonBlacklistedHost() {
+        $this->config->set('URI', 'HostBlacklist', 'example.com');
+        $this->assertFiltering('http://google.com');
+    }
diff --git a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
new file mode 100644
index 00000000..d509a6a1
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
@@ -0,0 +1,122 @@
+require_once 'HTMLPurifier/URIFilter/MakeAbsolute.php';
+require_once 'HTMLPurifier/URIFilterHarness.php';
+class HTMLPurifier_URIFilter_MakeAbsoluteTest extends HTMLPurifier_URIFilterHarness
+    function setUp() {
+        parent::setUp();
+        $this->filter = new HTMLPurifier_URIFilter_MakeAbsolute();
+        $this->setBase();
+    }
+    function setBase($base = 'http://example.com/foo/bar.html?q=s#frag') {
+        $this->config->set('URI', 'Base', $base);
+    }
+    // corresponding to RFC 2396
+    function testPreserveAbsolute() {
+        $this->assertFiltering('http://example.com/foo.html');
+    }
+    function testFilterBlank() {
+        $this->assertFiltering('', 'http://example.com/foo/bar.html?q=s');
+    }
+    function testFilterEmptyPath() {
+        $this->assertFiltering('?q=s#frag', 'http://example.com/foo/bar.html?q=s#frag');
+    }
+    function testPreserveAltScheme() {
+        $this->assertFiltering('mailto:bob@example.com');
+    }
+    function testFilterIgnoreHTTPSpecialCase() {
+        $this->assertFiltering('http:/', 'http://example.com/');
+    }
+    function testFilterAbsolutePath() {
+        $this->assertFiltering('/foo.txt', 'http://example.com/foo.txt');
+    }
+    function testFilterRelativePath() {
+        $this->assertFiltering('baz.txt', 'http://example.com/foo/baz.txt');
+    }
+    function testFilterRelativePathWithInternalDot() {
+        $this->assertFiltering('./baz.txt', 'http://example.com/foo/baz.txt');
+    }
+    function testFilterRelativePathWithEndingDot() {
+        $this->assertFiltering('baz/.', 'http://example.com/foo/baz/');
+    }
+    function testFilterRelativePathDot() {
+        $this->assertFiltering('.', 'http://example.com/foo/');
+    }
+    function testFilterRelativePathWithInternalDotDot() {
+        $this->assertFiltering('../baz.txt', 'http://example.com/baz.txt');
+    }
+    function testFilterRelativePathWithEndingDotDot() {
+        $this->assertFiltering('..', 'http://example.com/');
+    }
+    function testFilterRelativePathTooManyDotDots() {
+        $this->assertFiltering('../../', 'http://example.com/');
+    }
+    function testFilterAppendingQueryAndFragment() {
+        $this->assertFiltering('/foo.php?q=s#frag', 'http://example.com/foo.php?q=s#frag');
+    }
+    // edge cases below
+    function testFilterAbsolutePathBase() {
+        $this->setBase('/foo/baz.txt');
+        $this->assertFiltering('test.php', '/foo/test.php');
+    }
+    function testFilterAbsolutePathBaseDirectory() {
+        $this->setBase('/foo/');
+        $this->assertFiltering('test.php', '/foo/test.php');
+    }
+    function testFilterAbsolutePathBaseBelow() {
+        $this->setBase('/foo/baz.txt');
+        $this->assertFiltering('../../test.php', '/test.php');
+    }
+    function testFilterRelativePathBase() {
+        $this->setBase('foo/baz.html');
+        $this->assertFiltering('foo.php', 'foo/foo.php');
+    }
+    function testFilterRelativePathBaseBelow() {
+        $this->setBase('../baz.html');
+        $this->assertFiltering('test/strike.html', '../test/strike.html');
+    }
+    function testFilterRelativePathBaseWithAbsoluteURI() {
+        $this->setBase('../baz.html');
+        $this->assertFiltering('/test/strike.html');
+    }
+    function testFilterRelativePathBaseWithDot() {
+        $this->setBase('../baz.html');
+        $this->assertFiltering('.', '../');
+    }
+    // error case
+    function testErrorNoBase() {
+        $this->setBase(null);
+        $this->expectError('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration');
+        $this->assertFiltering('foo/bar.txt');
+    }
diff --git a/tests/HTMLPurifier/URIFilterHarness.php b/tests/HTMLPurifier/URIFilterHarness.php
new file mode 100644
index 00000000..04e101f2
--- /dev/null
+++ b/tests/HTMLPurifier/URIFilterHarness.php
@@ -0,0 +1,15 @@
+require_once 'HTMLPurifier/URIHarness.php';
+class HTMLPurifier_URIFilterHarness extends HTMLPurifier_URIHarness
+    function assertFiltering($uri, $expect_uri = true) {
+        $this->prepareURI($uri, $expect_uri);
+        $this->filter->prepare($this->config, $this->context);
+        $result = $this->filter->filter($uri, $this->config, $this->context);
+        $this->assertEitherFailOrIdentical($result, $uri, $expect_uri);
+    }
diff --git a/tests/HTMLPurifier/URIHarness.php b/tests/HTMLPurifier/URIHarness.php
new file mode 100644
index 00000000..63e6d7d6
--- /dev/null
+++ b/tests/HTMLPurifier/URIHarness.php
@@ -0,0 +1,31 @@
+require_once 'HTMLPurifier/URIParser.php';
+class HTMLPurifier_URIHarness extends HTMLPurifier_Harness
+    /**
+     * Prepares two URIs into object form
+     * @param &$uri Reference to string input URI
+     * @param &$expect_uri Reference to string expectation URI
+     * @note If $expect_uri is false, it will stay false
+     */
+    function prepareURI(&$uri, &$expect_uri) {
+        $parser = new HTMLPurifier_URIParser();
+        if ($expect_uri === true) $expect_uri = $uri;
+        $uri = $parser->parse($uri);
+        if ($expect_uri !== false) {
+            $expect_uri = $parser->parse($expect_uri);
+        }
+    }
+    /**
+     * Generates a URI object from the corresponding string
+     */
+    function createURI($uri) {
+        $parser = new HTMLPurifier_URIParser();
+        return $parser->parse($uri);
+    }
diff --git a/tests/HTMLPurifier/URIParserTest.php b/tests/HTMLPurifier/URIParserTest.php
new file mode 100644
index 00000000..370e90ca
--- /dev/null
+++ b/tests/HTMLPurifier/URIParserTest.php
@@ -0,0 +1,140 @@
+require_once 'HTMLPurifier/URIParser.php';
+require_once 'HTMLPurifier/URI.php';
+class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
+    function assertParsing(
+        $uri, $scheme, $userinfo, $host, $port, $path, $query, $fragment, $config = null, $context = null
+    ) {
+        $this->prepareCommon($config, $context);
+        $parser = new HTMLPurifier_URIParser();
+        $result = $parser->parse($uri, $config, $context);
+        $expect = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
+        $this->assertEqual($result, $expect);
+    }
+    function testRegular() {
+        $this->assertParsing(
+            'http://www.example.com/webhp?q=foo#result2',
+            'http', null, 'www.example.com', null, '/webhp', 'q=foo', 'result2'
+        );
+    }
+    function testPortAndUsername() {
+        $this->assertParsing(
+            'http://user@authority.part:80/now/the/path?query#fragment',
+            'http', 'user', 'authority.part', 80, '/now/the/path', 'query', 'fragment'
+        );
+    }
+    function testPercentEncoding() {
+        $this->assertParsing(
+            'http://en.wikipedia.org/wiki/Clich%C3%A9',
+            'http', null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null, null
+        );
+    }
+    function testEmptyQuery() {
+        $this->assertParsing(
+            'http://www.example.com/?#',
+            'http', null, 'www.example.com', null, '/', '', null
+        );
+    }
+    function testEmptyPath() {
+        $this->assertParsing(
+            'http://www.example.com',
+            'http', null, 'www.example.com', null, '', null, null
+        );
+    }
+    function testOpaqueURI() {
+        $this->assertParsing(
+            'mailto:bob@example.com',
+            'mailto', null, null, null, 'bob@example.com', null, null
+        );
+    }
+    function testIPv4Address() {
+        $this->assertParsing(
+            '',
+            'http', null, '', null, '/', null, null
+        );
+    }
+    function testFakeIPv4Address() {
+        $this->assertParsing(
+            'http://333.123.32.123/',
+            'http', null, '333.123.32.123', null, '/', null, null
+        );
+    }
+    function testIPv6Address() {
+        $this->assertParsing(
+            'http://[2001:db8::7]/c=GB?objectClass?one',
+            'http', null, '[2001:db8::7]', null, '/c=GB', 'objectClass?one', null
+        );
+    }
+    function testInternationalizedDomainName() {
+        $this->assertParsing(
+            "http://t\xC5\xABdali\xC5\x86.lv",
+            'http', null, "t\xC5\xABdali\xC5\x86.lv", null, '', null, null
+        );
+    }
+    function testInvalidPort() {
+        $this->assertParsing(
+            'http://example.com:foobar',
+            'http', null, 'example.com', null, '', null, null
+        );
+    }
+    function testPathAbsolute() {
+        $this->assertParsing(
+            'http:/this/is/path',
+            'http', null, null, null, '/this/is/path', null, null
+        );
+    }
+    function testPathRootless() {
+        // this should not be used but is allowed
+        $this->assertParsing(
+            'http:this/is/path',
+            'http', null, null, null, 'this/is/path', null, null
+        );
+    }
+    function testPathEmpty() {
+        $this->assertParsing(
+            'http:',
+            'http', null, null, null, '', null, null
+        );
+    }
+    function testRelativeURI() {
+        $this->assertParsing(
+            '/a/b',
+            null, null, null, null, '/a/b', null, null
+        );
+    }
+    function testMalformedTag() {
+        $this->assertParsing(
+            'http://www.example.com/\'>"',
+            'http', null, 'www.example.com', null, '/', null, null
+        );
+    }
+    function testEmpty() {
+        $this->assertParsing(
+            '',
+            null, null, null, null, '', null, null
+        );
+    }
diff --git a/tests/HTMLPurifier/URISchemeRegistryTest.php b/tests/HTMLPurifier/URISchemeRegistryTest.php
index 02b938e4..497d0f66 100644
--- a/tests/HTMLPurifier/URISchemeRegistryTest.php
+++ b/tests/HTMLPurifier/URISchemeRegistryTest.php
@@ -2,7 +2,7 @@
 require_once 'HTMLPurifier/URISchemeRegistry.php';
-class HTMLPurifier_URISchemeRegistryTest extends UnitTestCase
+class HTMLPurifier_URISchemeRegistryTest extends HTMLPurifier_Harness
     function test() {
diff --git a/tests/HTMLPurifier/URISchemeTest.php b/tests/HTMLPurifier/URISchemeTest.php
index 1e236a80..5b1f99a3 100644
--- a/tests/HTMLPurifier/URISchemeTest.php
+++ b/tests/HTMLPurifier/URISchemeTest.php
@@ -1,6 +1,10 @@
+require_once 'HTMLPurifier/URI.php';
+require_once 'HTMLPurifier/URIHarness.php';
 require_once 'HTMLPurifier/URIScheme.php';
+require_once 'HTMLPurifier/URISchemeRegistry.php';
 require_once 'HTMLPurifier/URIScheme/http.php';
 require_once 'HTMLPurifier/URIScheme/ftp.php';
@@ -12,145 +16,133 @@ require_once 'HTMLPurifier/URIScheme/nntp.php';
 // WARNING: All the URI schemes are far to relaxed, we need to tighten
 // the checks.
-class HTMLPurifier_URISchemeTest extends UnitTestCase
+class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
-    function test_http() {
-        $scheme = new HTMLPurifier_URIScheme_http();
-        $config = HTMLPurifier_Config::createDefault();
-        $context = new HTMLPurifier_Context();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'www.example.com', null, '/', 's=foobar', $config, $context),
-          array(null, 'www.example.com', null, '/', 's=foobar')
-        );
-        // absorb default port and userinfo
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'www.example.com', 80, '/', 's=foobar', $config, $context),
-          array(null, 'www.example.com', null, '/', 's=foobar')
-        );
-        // do not absorb non-default port
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'www.example.com', 8080, '/', 's=foobar', $config, $context),
-          array(null, 'www.example.com', 8080, '/', 's=foobar')
-        );
-        // https is basically the same
-        $scheme = new HTMLPurifier_URIScheme_https();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'www.example.com', 443, '/', 's=foobar', $config, $context),
-          array(null, 'www.example.com', null, '/', 's=foobar')
-        );
+    function assertValidation($uri, $expect_uri = true) {
+        $this->prepareURI($uri, $expect_uri);
+        // convenience hack: the scheme should be explicitly specified
+        $scheme = $uri->getSchemeObj($this->config, $this->context);
+        $result = $scheme->validate($uri, $this->config, $this->context);
+        $this->assertEitherFailOrIdentical($result, $uri, $expect_uri);
-    function test_ftp() {
-        $scheme = new HTMLPurifier_URIScheme_ftp();
-        $config = HTMLPurifier_Config::createDefault();
-        $context = new HTMLPurifier_Context();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'www.example.com', 21, '/', 's=foobar', $config, $context),
-          array('user', 'www.example.com', null, '/', null)
-        );
-        // valid typecode
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'www.example.com', null, '/file.txt;type=a', null, $config, $context),
-          array(null, 'www.example.com', null, '/file.txt;type=a', null)
-        );
-        // remove invalid typecode
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'www.example.com', null, '/file.txt;type=z', null, $config, $context),
-          array(null, 'www.example.com', null, '/file.txt', null)
-        );
-        // encode errant semicolons
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'www.example.com', null, '/too;many;semicolons=1', null, $config, $context),
-          array(null, 'www.example.com', null, '/too%3Bmany%3Bsemicolons=1', null)
-        );
-    }
-    function test_news() {
-        $scheme = new HTMLPurifier_URIScheme_news();
-        $config = HTMLPurifier_Config::createDefault();
-        $context = new HTMLPurifier_Context();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, null, null, 'gmane.science.linguistics', null, $config, $context),
-          array(null, null, null, 'gmane.science.linguistics', null)
-        );
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, null, null, '642@eagle.ATT.COM', null, $config, $context),
-          array(null, null, null, '642@eagle.ATT.COM', null)
-        );
-        // test invalid field removal
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'www.google.com', 80, 'rec.music', 'path=foo', $config, $context),
-          array(null, null, null, 'rec.music', null)
-        );
-    }
-    function test_nntp() {
-        $scheme = new HTMLPurifier_URIScheme_nntp();
-        $config = HTMLPurifier_Config::createDefault();
-        $context = new HTMLPurifier_Context();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, 'news.example.com', null, '/alt.misc/12345', null, $config, $context),
-          array(null, 'news.example.com', null, '/alt.misc/12345', null)
-        );
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'news.example.com', 119, '/alt.misc/12345', 'foo=asdf', $config, $context),
-          array(null, 'news.example.com', null,  '/alt.misc/12345', null)
+    function test_http_regular() {
+        $this->assertValidation(
+            'http://example.com/?s=q#fragment'
-    function test_mailto() {
-        $scheme = new HTMLPurifier_URIScheme_mailto();
-        $config = HTMLPurifier_Config::createDefault();
-        $context = new HTMLPurifier_Context();
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                null, null, null, 'bob@example.com', null, $config, $context),
-          array(null, null, null, 'bob@example.com', null)
+    function test_http_removeDefaultPort() {
+        $this->assertValidation(
+            'http://example.com:80',
+            'http://example.com'
-        $this->assertIdentical(
-          $scheme->validateComponents(
-                'user', 'example.com', 80, 'bob@example.com', 'subject=Foo!', $config, $context),
-          array(null, null, null, 'bob@example.com', 'subject=Foo!')
+    }
+    function test_http_removeUserInfo() {
+        $this->assertValidation(
+            'http://bob@example.com',
+            'http://example.com'
+        );
+    }
+    function test_http_preserveNonDefaultPort() {
+        $this->assertValidation(
+            'http://example.com:8080'
+        );
+    }
+    function test_https_regular() {
+        $this->assertValidation(
+            'https://user@example.com:443/?s=q#frag',
+            'https://example.com/?s=q#frag'
+        );
+    }
+    function test_ftp_regular() {
+        $this->assertValidation(
+            'ftp://user@example.com/path'
+        );
+    }
+    function test_ftp_removeDefaultPort() {
+        $this->assertValidation(
+            'ftp://example.com:21',
+            'ftp://example.com'
+        );
+    }
+    function test_ftp_removeQueryString() {
+        $this->assertValidation(
+            'ftp://example.com?s=q',
+            'ftp://example.com'
+        );
+    }
+    function test_ftp_preserveValidTypecode() {
+        $this->assertValidation(
+            'ftp://example.com/file.txt;type=a'
+        );
+    }
+    function test_ftp_removeInvalidTypecode() {
+        $this->assertValidation(
+            'ftp://example.com/file.txt;type=z',
+            'ftp://example.com/file.txt'
+        );
+    }
+    function test_ftp_encodeExtraSemicolons() {
+        $this->assertValidation(
+            'ftp://example.com/too;many;semicolons=1',
+            'ftp://example.com/too%3Bmany%3Bsemicolons=1'
+        );
+    }
+    function test_news_regular() {
+        $this->assertValidation(
+            'news:gmane.science.linguistics'
+        );
+    }
+    function test_news_explicit() {
+        $this->assertValidation(
+            'news:642@eagle.ATT.COM'
+        );
+    }
+    function test_news_removeNonPathComponents() {
+        $this->assertValidation(
+            'news://user@example.com:80/rec.music?path=foo#frag',
+            'news:/rec.music#frag'
+        );
+    }
+    function test_nntp_regular() {
+        $this->assertValidation(
+            'nntp://news.example.com/alt.misc/42#frag'
+        );
+    }
+    function test_nntp_removalOfRedundantOrUselessComponents() {
+        $this->assertValidation(
+            'nntp://user@news.example.com:119/alt.misc/42?s=q#frag',
+            'nntp://news.example.com/alt.misc/42#frag'
+        );
+    }
+    function test_mailto_regular() {
+        $this->assertValidation(
+            'mailto:bob@example.com'
+        );
+    }
+    function test_mailto_removalOfRedundantOrUselessComponents() {
+        $this->assertValidation(
+            'mailto://user@example.com:80/bob@example.com?subject=Foo#frag',
+            'mailto:/bob@example.com?subject=Foo#frag'
diff --git a/tests/HTMLPurifier/URITest.php b/tests/HTMLPurifier/URITest.php
new file mode 100644
index 00000000..9da37a7a
--- /dev/null
+++ b/tests/HTMLPurifier/URITest.php
@@ -0,0 +1,166 @@
+require_once 'HTMLPurifier/URI.php';
+require_once 'HTMLPurifier/URIParser.php';
+class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
+    function createURI($uri) {
+        $parser = new HTMLPurifier_URIParser();
+        return $parser->parse($uri);
+    }
+    function test_construct() {
+        $uri1 = new HTMLPurifier_URI('HTTP', 'bob', 'example.com', '23', '/foo', 'bar=2', 'slash');
+        $uri2 = new HTMLPurifier_URI('http', 'bob', 'example.com',  23,  '/foo', 'bar=2', 'slash');
+        $this->assertIdentical($uri1, $uri2);
+    }
+    var $oldRegistry;
+    function &setUpSchemeRegistryMock() {
+        $this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance();
+        generate_mock_once('HTMLPurifier_URIScheme');
+        generate_mock_once('HTMLPurifier_URISchemeRegistry');
+        $registry =& HTMLPurifier_URISchemeRegistry::instance(
+          new HTMLPurifier_URISchemeRegistryMock()
+        );
+        return $registry;
+    }
+    function &setUpSchemeMock($name) {
+        $registry =& $this->setUpSchemeRegistryMock();
+        $scheme_mock = new HTMLPurifier_URISchemeMock();
+        $registry->setReturnValue('getScheme', $scheme_mock, array($name, '*', '*'));
+        return $scheme_mock;
+    }
+    function setUpNoValidSchemes() {
+        $registry =& $this->setUpSchemeRegistryMock();
+        $registry->setReturnValue('getScheme', false, array('*', '*', '*'));
+    }
+    function tearDownSchemeRegistryMock() {
+        HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry);
+    }
+    function test_getSchemeObj() {
+        $scheme_mock =& $this->setUpSchemeMock('http');
+        $uri = $this->createURI('http:');
+        $scheme_obj = $uri->getSchemeObj($this->config, $this->context);
+        $this->assertIdentical($scheme_obj, $scheme_mock);
+        $this->tearDownSchemeRegistryMock();
+    }
+    function test_getSchemeObj_invalidScheme() {
+        $this->setUpNoValidSchemes();
+        $uri = $this->createURI('http:');
+        $result = $uri->getSchemeObj($this->config, $this->context);
+        $this->assertIdentical($result, false);
+        $this->tearDownSchemeRegistryMock();
+    }
+    function test_getSchemaObj_defaultScheme() {
+        $scheme = 'foobar';
+        $scheme_mock =& $this->setUpSchemeMock($scheme);
+        $this->config->set('URI', 'DefaultScheme', $scheme);
+        $uri = $this->createURI('hmm');
+        $scheme_obj = $uri->getSchemeObj($this->config, $this->context);
+        $this->assertIdentical($scheme_obj, $scheme_mock);
+        $this->tearDownSchemeRegistryMock();
+    }
+    function test_getSchemaObj_invalidDefaultScheme() {
+        $this->setUpNoValidSchemes();
+        $this->config->set('URI', 'DefaultScheme', 'foobar');
+        $uri = $this->createURI('hmm');
+        $this->expectError('Default scheme object "foobar" was not readable');
+        $result = $uri->getSchemeObj($this->config, $this->context);
+        $this->assertIdentical($result, false);
+        $this->tearDownSchemeRegistryMock();
+    }
+    function assertToString($expect_uri, $scheme, $userinfo, $host, $port, $path, $query, $fragment) {
+        $uri = new HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
+        $string = $uri->toString();
+        $this->assertIdentical($string, $expect_uri);
+    }
+    function test_toString_full() {
+        $this->assertToString(
+            'http://bob@example.com:300/foo?bar=baz#fragment',
+            'http', 'bob', 'example.com', 300, '/foo', 'bar=baz', 'fragment'
+        ); 
+    }
+    function test_toString_scheme() {
+        $this->assertToString(
+            'http:',
+            'http', null, null, null, '', null, null
+        ); 
+    }
+    function test_toString_authority() {
+        $this->assertToString(
+            '//bob@example.com:8080',
+            null, 'bob', 'example.com', 8080, '', null, null
+        ); 
+    }
+    function test_toString_path() {
+        $this->assertToString(
+            '/path/to',
+            null, null, null, null, '/path/to', null, null
+        ); 
+    }
+    function test_toString_query() {
+        $this->assertToString(
+            '?q=string',
+            null, null, null, null, '', 'q=string', null
+        ); 
+    }
+    function test_toString_fragment() {
+        $this->assertToString(
+            '#fragment',
+            null, null, null, null, '', null, 'fragment'
+        ); 
+    }
+    function assertValidation($uri, $expect_uri = true) {
+        if ($expect_uri === true) $expect_uri = $uri;
+        $uri = $this->createURI($uri);
+        $result = $uri->validate($this->config, $this->context);
+        if ($expect_uri === false) {
+            $this->assertFalse($result);
+        } else {
+            $this->assertTrue($result);
+            $this->assertIdentical($uri->toString(), $expect_uri);
+        }
+    }
+    function test_validate_overlongPort() {
+        $this->assertValidation('http://example.com:65536', 'http://example.com');
+    }
+    function test_validate_zeroPort() {
+        $this->assertValidation('http://example.com:00', 'http://example.com');
+    }
+    function test_validate_invalidHostThatLooksLikeIPv6() {
+        $this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:');
+    }
diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php
index ba338fba..3ad307bb 100644
--- a/tests/HTMLPurifierTest.php
+++ b/tests/HTMLPurifierTest.php
@@ -4,7 +4,7 @@ require_once 'HTMLPurifier.php';
 // integration test
-class HTMLPurifierTest extends UnitTestCase
+class HTMLPurifierTest extends HTMLPurifier_Harness
     var $purifier;
@@ -132,5 +132,22 @@ alert("<This is compatible with XHTML>");
+    function testGetInstance() {
+        $purifier  =& HTMLPurifier::getInstance();
+        $purifier2 =& HTMLPurifier::getInstance();
+        $this->assertReference($purifier, $purifier2);
+    }
+    function testMakeAbsolute() {
+        $this->assertPurification(
+            '<a href="foo.txt">Foobar</a>',
+            '<a href="http://example.com/bar/foo.txt">Foobar</a>',
+            array(
+                'URI.Base' => 'http://example.com/bar/baz.php',
+                'URI.MakeAbsolute' => true
+            )
+        );
+    }
diff --git a/tests/index.php b/tests/index.php
old mode 100644
new mode 100755
index aad6a94c..69be2981
--- a/tests/index.php
+++ b/tests/index.php
@@ -5,6 +5,7 @@
 error_reporting(E_ALL | E_STRICT);
 define('HTMLPurifierTest', 1);
 // wishlist: automated calling of this file from multiple PHP versions so we
 // don't have to constantly switch around
@@ -38,7 +39,14 @@ if ( is_string($GLOBALS['HTMLPurifierTest']['PEAR']) ) {
 // initialize and load HTML Purifier
-require_once '../library/HTMLPurifier.auto.php';
+// use ?standalone to load the alterative standalone stub
+if (isset($_GET['standalone']) || (isset($argv[1]) && $argv[1] == 'standalone')) {
+    set_include_path(realpath('blanks') . PATH_SEPARATOR . get_include_path());
+    require_once '../library/HTMLPurifier.standalone.php';
+} else {
+    require_once '../library/HTMLPurifier.auto.php';
+require_once 'HTMLPurifier/Harness.php';
 // setup special DefinitionCacheFactory decorator
 $factory =& HTMLPurifier_DefinitionCacheFactory::instance();
diff --git a/tests/test_files.php b/tests/test_files.php
index 5920981e..f9fa71c1 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -79,6 +79,7 @@ $test_files[] = 'HTMLPurifier/GeneratorTest.php';
 $test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModuleTest.php';
+$test_files[] = 'HTMLPurifier/HTMLModule/RubyTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModule/TidyTest.php';
 $test_files[] = 'HTMLPurifier/IDAccumulatorTest.php';
@@ -102,8 +103,15 @@ $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
 $test_files[] = 'HTMLPurifier/TagTransformTest.php';
 $test_files[] = 'HTMLPurifier/TokenTest.php';
+$test_files[] = 'HTMLPurifier/URIDefinitionTest.php';
+$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php';
+$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';
+$test_files[] = 'HTMLPurifier/URIFilter/HostBlacklistTest.php';
+$test_files[] = 'HTMLPurifier/URIFilter/MakeAbsoluteTest.php';
+$test_files[] = 'HTMLPurifier/URIParserTest.php';
 $test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php';
 $test_files[] = 'HTMLPurifier/URISchemeTest.php';
+$test_files[] = 'HTMLPurifier/URITest.php';
 $test_files[] = 'HTMLPurifierTest.php';
 if (version_compare(PHP_VERSION, '5', '>=')) {