From b3f0e6c86c5748887bf7d1985e8e8a0e3a155d51 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" <edwardzyang@thewritingpot.com>
Date: Mon, 3 Sep 2007 15:40:43 +0000
Subject: [PATCH] Release 2.1.2, merged in 1368 to HEAD.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1404 48356398-32a2-884e-a903-53898d9a118a
---
 Doxyfile                                      |    2 +-
 NEWS                                          |   28 +
 TODO                                          |   15 +-
 VERSION                                       |    2 +-
 WHATSNEW                                      |   18 +-
 docs/dev-progress.html                        |    3 +-
 docs/enduser-customize.html                   |   30 +-
 docs/enduser-id.html                          |    6 +-
 docs/enduser-tidy.html                        |    8 +-
 docs/enduser-utf8.html                        |   78 +-
 library/HTMLPurifier.php                      |    4 +-
 library/HTMLPurifier/AttrDef/URI.php          |    1 +
 library/HTMLPurifier/AttrDef/URI/Email.php    |    1 +
 library/HTMLPurifier/AttrTypes.php            |    3 +
 library/HTMLPurifier/CSSDefinition.php        |    4 +-
 library/HTMLPurifier/Config.php               |    2 +-
 library/HTMLPurifier/HTMLDefinition.php       |    4 +-
 library/HTMLPurifier/HTMLModule/Object.php    |   47 +
 library/HTMLPurifier/HTMLModuleManager.php    |    3 +-
 library/HTMLPurifier/Lexer.php                |    3 +
 library/HTMLPurifier/Lexer/DOMLex.php         |   29 +-
 library/HTMLPurifier/Lexer/DirectLex.php      |    2 +-
 library/HTMLPurifier/Lexer/PH5P.php           | 3886 +++++++++++++++++
 library/HTMLPurifier/Printer/ConfigForm.php   |   17 +-
 .../HTMLPurifier/Printer/HTMLDefinition.php   |    4 +-
 library/HTMLPurifier/URISchemeRegistry.php    |   21 +-
 maintenance/PH5P.patch                        |   45 +
 maintenance/common.php                        |  134 +
 .../compat-function-file-put-contents.php     |  107 +
 maintenance/merge-library.php                 |  199 +-
 smoketests/all.php                            |    2 +-
 smoketests/common.php                         |    6 +-
 tests/Debugger.php                            |    6 +-
 tests/HTMLPurifier/AttrDef/CSSTest.php        |    3 +
 .../HTMLPurifier/AttrTransform/BdoDirTest.php |   17 +-
 .../AttrTransform/BgColorTest.php             |   24 +-
 .../AttrTransform/BoolToCSSTest.php           |   18 +-
 .../HTMLPurifier/AttrTransform/BorderTest.php |   18 +-
 .../AttrTransform/EnumToCSSTest.php           |   42 +-
 .../AttrTransform/ImgRequiredTest.php         |   38 +-
 .../AttrTransform/ImgSpaceTest.php            |   24 +-
 tests/HTMLPurifier/AttrTransform/LangTest.php |   27 +-
 .../HTMLPurifier/AttrTransform/LengthTest.php |   15 +-
 tests/HTMLPurifier/AttrTransform/NameTest.php |    8 +-
 tests/HTMLPurifier/AttrTransformHarness.php   |    1 +
 .../HTMLPurifier/AttrValidator_ErrorsTest.php |    2 +-
 tests/HTMLPurifier/ChildDef/ChameleonTest.php |   32 +-
 tests/HTMLPurifier/ChildDef/OptionalTest.php  |   12 +-
 tests/HTMLPurifier/ChildDef/RequiredTest.php  |   49 +-
 .../ChildDef/StrictBlockquoteTest.php         |   51 +-
 tests/HTMLPurifier/ChildDef/TableTest.php     |   54 +-
 tests/HTMLPurifier/ChildDefHarness.php        |    1 +
 tests/HTMLPurifier/ComplexHarness.php         |   49 +-
 tests/HTMLPurifier/EntityLookupTest.php       |    2 +-
 tests/HTMLPurifier/HTMLModule/ObjectTest.php  |   39 +
 .../HTMLPurifier/HTMLModule/ScriptingTest.php |   46 +-
 .../Injector/AutoParagraphTest.php            |  176 +-
 tests/HTMLPurifier/Injector/LinkifyTest.php   |   21 +-
 .../Injector/PurifierLinkifyTest.php          |   32 +-
 tests/HTMLPurifier/LexerTest.php              |  803 ++--
 tests/HTMLPurifier/SimpleTest/Reporter.php    |    1 +
 tests/HTMLPurifier/Strategy/CoreTest.php      |   16 +-
 .../HTMLPurifier/Strategy/FixNestingTest.php  |  134 +-
 .../Strategy/MakeWellFormedTest.php           |   86 +-
 .../Strategy/MakeWellFormed_InjectorTest.php  |   65 +
 .../Strategy/RemoveForeignElementsTest.php    |  106 +-
 .../RemoveForeignElements_TidyTest.php        |   46 +
 .../Strategy/ValidateAttributesTest.php       |  431 +-
 .../Strategy/ValidateAttributes_IDTest.php    |   65 +
 .../Strategy/ValidateAttributes_TidyTest.php  |  353 ++
 tests/index.php                               |    6 +-
 tests/test_files.php                          |    5 +
 72 files changed, 6233 insertions(+), 1405 deletions(-)
 create mode 100644 library/HTMLPurifier/HTMLModule/Object.php
 create mode 100644 library/HTMLPurifier/Lexer/PH5P.php
 create mode 100644 maintenance/PH5P.patch
 create mode 100644 maintenance/compat-function-file-put-contents.php
 create mode 100644 tests/HTMLPurifier/HTMLModule/ObjectTest.php
 create mode 100644 tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
 create mode 100644 tests/HTMLPurifier/Strategy/RemoveForeignElements_TidyTest.php
 create mode 100644 tests/HTMLPurifier/Strategy/ValidateAttributes_IDTest.php
 create mode 100644 tests/HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php

diff --git a/Doxyfile b/Doxyfile
index 9076573d..044217be 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -4,7 +4,7 @@
 # Project related configuration options
 #---------------------------------------------------------------------------
 PROJECT_NAME           = HTML Purifier
-PROJECT_NUMBER         = 2.1.1
+PROJECT_NUMBER         = 2.1.2
 OUTPUT_DIRECTORY       = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
diff --git a/NEWS b/NEWS
index 04bfa37d..212edc94 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,34 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
     . Internal change
 ==========================
 
+2.1.2, released 2007-09-03
+! Implemented Object module for trusted users
+! Implemented experimental HTML5 parsing mode using PH5P. To use, add
+  this to your code:
+        require_once 'HTMLPurifier/Lexer/PH5P.php';
+        $config->set('Core', 'LexerImpl', 'PH5P');
+  Note that this Lexer introduces some classes not in the HTMLPurifier
+  namespace.  Also, this is PHP5 only.
+! CSS property border-spacing implemented
+- Fix non-visible parsing error in DirectLex with empty tags that have
+  slashes inside attribute values.
+- Fix typo in CSS definition: border-collapse:seperate; was incorrectly
+  accepted as valid CSS. Usually non-visible, because this styling is the
+  default for tables in most browsers. Thanks Brett Zamir for pointing
+  this out.
+- Fix validation errors in configuration form
+- Hammer out a bunch of edge-case bugs in the standalone distribution
+- Inclusion reflection removed from URISchemeRegistry; you must manually
+  include any new schema files you wish to use
+- Numerous typo fixes in documentation thanks to Brett Zamir
+. Unit test refactoring for one logical test per test function
+. Config and context parameters in ComplexHarness deprecated: instead, edit
+  the $config and $context member variables
+. HTML wrapper in DOMLex now takes DTD identifiers into account; doesn't
+  really make a difference, but is good for completeness sake
+. merge-library.php script refactored for greater code reusability and
+  PHP4 compatibility
+
 2.1.1, released 2007-08-04
 - Fix show-stopper bug in %URI.MakeAbsolute functionality
 - Fix PHP4 syntax error in standalone version
diff --git a/TODO b/TODO
index 0fa3eb08..0c0a5669 100644
--- a/TODO
+++ b/TODO
@@ -28,23 +28,22 @@ afraid to cast your vote for the next feature to be implemented!
  - Remove empty inline tags<i></i>
  - Append something to duplicate IDs so they're still usable (impl. note: the
    dupe detector would also need to detect the suffix as well)
+ - Externalize inline CSS to promote clean HTML
 
 2.4 release [It's All About Trust] (floating)
  # Implement untrusted, dangerous elements/attributes
  # Implement IDREF support (harder than it seems, since you cannot have
    IDREFs to non-existent IDs)
+ # Frameset XHTML 1.0 and HTML 4.01 doctypes
 
 3.0 release [Beyond HTML]
  # Legit token based CSS parsing (will require revamping almost every
-   AttrDef class)
+   AttrDef class). Probably will use CSSTidy class
  # More control over allowed CSS properties (maybe modularize it in the
    same fashion!)
  # Formatters for plaintext
     - Smileys
  - Standardize token armor for all areas of processing
- - Fixes for Firefox's inability to handle COL alignment props (Bug 915)
- - Automatically add non-breaking spaces to empty table cells when
-   empty-cells:show is applied to have compatibility with Internet Explorer
  - Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
    Also, enable disabling of directionality
 
@@ -63,25 +62,27 @@ Ongoing
  - Complete basic smoketests
 
 Unknown release (on a scratch-an-itch basis)
- ? Semi-lossy dumb alternate character encoding transfor
+ # CHMOD install script for PEAR installs
  ? Have 'lang' attribute be checked against official lists, achieved by
    encoding all characters that have string entity equivalents
  - Abstract ChildDef_BlockQuote to work with all elements that only
    allow blocks in them, required or optional
  - Reorganize Unit Tests
-    - Refactor loop tests: Lexer
  - Reorganize configuration directives (Create more namespaces! Get messy!)
  - Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
  - Implement lenient <ruby> child validation
  - Explain how to use HTML Purifier in non-PHP languages / create
    a simple command line stub (or complicated?)
+ - Fixes for Firefox's inability to handle COL alignment props (Bug 915)
+ - Automatically add non-breaking spaces to empty table cells when
+   empty-cells:show is applied to have compatibility with Internet Explorer
 
 Requested
 
 Wontfix
  - Non-lossy smart alternate character encoding transformations (unless
    patch provided)
- - Pretty-printing HTML, users can use Tidy on the output on entire page
+ - Pretty-printing HTML: users can use Tidy on the output on entire page
  - Native content compression, whitespace stripping (don't rely on Tidy, make
    sure we don't remove from <pre> or related tags): use gzip if this is
    really important
diff --git a/VERSION b/VERSION
index 7c327287..8f9174b4 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1.1
\ No newline at end of file
+2.1.2
\ No newline at end of file
diff --git a/WHATSNEW b/WHATSNEW
index a08edbb5..e9a40184 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -1,10 +1,8 @@
-In version 2.1, HTML Purifier's URI validation and filtering handling
-system has been revamped with a new, extensible URIFilter system. Also
-notable features include preservation of emoticons in PHP5 with
-%Core.AggressivelyFixLt, standalone and lite download versions,
-transforming relative URIs to absolute URIs, Ruby in XHTML 1.1, a Phorum
-mod, and UTF-8 font names.  Notable bug-fixes include refinement of
-the auto-paragraphing algorithm (no longer experimental), better XHTML
-1.1 support and the removal of the contents of <style> elements. Version
-2.1.1 amends a few bugs in some of newly introduced features, namely
-running the standalone download version in PHP4 and %URI.MakeAbsolute.
+Version 2.1.2 is a mix of experimental features and stability updates.
+Among new features: an Object module for trusted users, support for the
+CSS property 'border-spacing', and HTML 5 style parsing using PH5P.
+Bug fixes ihave resolved a few obscure issues including border-collapse:seperate,
+a DirectLex parsing error, broken HTML in printDefinition.php, and problems
+with the experimental standalone distribution. Also, there were large
+amounts of behind-the-scenes refactoring and the removal of URIScheme
+inclusion reflection.
diff --git a/docs/dev-progress.html b/docs/dev-progress.html
index b3109df2..9eef6a09 100644
--- a/docs/dev-progress.html
+++ b/docs/dev-progress.html
@@ -39,7 +39,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
 <table cellspacing="0"><tbody>
 <tr><td class="impl-yes">Implemented</td></tr>
 <tr><td class="impl-partial">Partially implemented</td></tr>
-<tr><td class="impl-no">Will not implement</td></tr>
+<tr><td class="impl-no">Not priority to implement</td></tr>
 <tr><td class="danger">Dangerous attribute/property</td></tr>
 <tr><td class="css1">Present in CSS1</td></tr>
 <tr><td class="feature">Feature, requires extra work</td></tr>
@@ -118,6 +118,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
 <tbody>
 <tr><th colspan="2">Table</th></tr>
 <tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
+<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
 <tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
 <tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
     possible fix with &amp;nbsp;? Unknown release milestone.</td></tr>
diff --git a/docs/enduser-customize.html b/docs/enduser-customize.html
index 8e9fe1dd..8634021c 100644
--- a/docs/enduser-customize.html
+++ b/docs/enduser-customize.html
@@ -32,7 +32,7 @@
   Before we even write any code, it is paramount to consider whether or
   not the code we're writing is necessary or not. HTML Purifier, by default,
   contains a large set of elements and attributes: large enough so that
-  <em>any</em> element or attribute in XHTML 1.0 (and its HTML variant)
+  <em>any</em> element or attribute in XHTML 1.0 or 1.1 (and its HTML variants)
   that can be safely used by the general public is implemented.
 </p>
 
@@ -76,11 +76,12 @@
 <h3>XHTML 1.1</h3>
 
 <p>
-  We have not implemented the
+  As of HTMLPurifier 2.1.0, we have implemented the
   <a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
   which defines a set of tags
   for publishing short annotations for text, used mostly in Japanese
-  and Chinese school texts.
+  and Chinese school texts, but applicable for positioning any text (not
+  limited to translations) above or below other corresponding text.
 </p>
 
 <h3>XHTML 2.0</h3>
@@ -492,10 +493,11 @@ $def =& $config->getHTMLDefinition(true);
 <p>
   The <code>(%flow;)*</code> indicates the allowed children of the
   <code>li</code> tag: <code>li</code> allows any number of flow
-  elements as its children. In HTML Purifier, we'd write it like
-  <code>Flow</code> (here's where the content sets we were
-  discussing earlier come into play). There are three shorthand content models you
-  can specify:
+  elements as its children. (The <code>- O</code> allows the closing tag to be 
+  omitted, though in XML this is not allowed.) In HTML Purifier, 
+  we'd write it like <code>Flow</code> (here's where the content sets 
+  we were discussing earlier come into play). There are three shorthand
+  content models you can specify:
 </p>
 
 <table class="table">
@@ -668,12 +670,22 @@ $def =& $config->getHTMLDefinition(true);
   Common is a combination of the above-mentioned collections.
 </p>
 
+<p class="aside">
+  Readers familiar with the modularization may have noticed that the Core
+  attribute collection differs from that specified by the <a
+  href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
+  modules of the XHTML Modularization 1.1</a>. We believe this section
+  to be in error, as <code>br</code> permits the use of the <code>style</code>
+  attribute even though it uses the <code>Core</code> collection, and 
+  the DTD and XML Schemas supplied by W3C support our interpretation.
+</p>
+
 <h3>Attributes</h3>
 
 <p>
-  If you didn't read the <a href="#addAttribute">previous section on
+  If you didn't read the <a href="#addAttribute">earlier section on
   adding attributes</a>, read it now.  The last parameter is simply
-  array of attribute names to attribute implementations, in the exact
+  an array of attribute names to attribute implementations, in the exact
   same format as <code>addAttribute()</code>.
 </p>
 
diff --git a/docs/enduser-id.html b/docs/enduser-id.html
index 8321a0a2..051ae7ca 100644
--- a/docs/enduser-id.html
+++ b/docs/enduser-id.html
@@ -58,7 +58,7 @@ appear elsewhere on the document.  The method is simple:</p>
 
 <pre>$config->set('HTML', 'EnableAttrID', true);
 $config->set('Attr', 'IDBlacklist' array(
-    'list', 'of', 'attributes', 'that', 'are', 'forbidden'
+    'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
 ));</pre>
 
 <p>That being said, there are some notable drawbacks.  First of all, you have to
@@ -71,9 +71,9 @@ to possible standards-compliance issues.</p>
 <p>Furthermore, this position becomes untenable when a single web page must hold
 multiple portions of user-submitted content.  Since there's obviously no way
 to find out before-hand what IDs users will use, the blacklist is helpless.
-And even since HTML Purifier validates each segment seperately, perhaps doing
+And since HTML Purifier validates each segment separately, perhaps doing
 so at different times, it would be extremely difficult to dynamically update
-the blacklist inbetween runs.</p>
+the blacklist in between runs.</p>
 
 <p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
 all, they might have simply specified a duplicate ID by accident.</p>
diff --git a/docs/enduser-tidy.html b/docs/enduser-tidy.html
index b3f79f60..56c9b288 100644
--- a/docs/enduser-tidy.html
+++ b/docs/enduser-tidy.html
@@ -22,7 +22,7 @@ out:</p>
 
 <p class="emphasis">This ain't HTML Tidy!</p>
 
-<p>Rather, Tidy stands for a cool set of Tidy-inspired in HTML Purifier
+<p>Rather, Tidy stands for a cool set of Tidy-inspired features in HTML Purifier
 that allows users to submit deprecated elements and attributes and get
 valid strict markup back. For example:</p>
 
@@ -33,8 +33,8 @@ valid strict markup back. For example:</p>
 <pre>&lt;div style=&quot;text-align:center;&quot;&gt;Centered&lt;/div&gt;</pre>
 
 <p>...when this particular fix is run on the HTML. This tutorial will give
-you down the lowdown of what exactly HTML Purifier will do when Tidy
-is on, and how to fine tune this behavior. Once again, <strong>you do
+you the lowdown of what exactly HTML Purifier will do when Tidy
+is on, and how to fine-tune this behavior. Once again, <strong>you do
 not need Tidy installed on your PHP to use these features!</strong></p>
 
 <h2>What does it do?</h2>
@@ -221,7 +221,7 @@ general syntax:</p>
 
 <p>The lowdown is, quite frankly, HTML Purifier's default settings are
 probably good enough. The next step is to bump the level up to heavy,
-and if that still doesn't satisfy your appetite, do some fine tuning.
+and if that still doesn't satisfy your appetite, do some fine-tuning.
 Other than that, don't worry about it: this all works silently and
 effectively in the background.</p>
 
diff --git a/docs/enduser-utf8.html b/docs/enduser-utf8.html
index b8cee57d..062eed7b 100644
--- a/docs/enduser-utf8.html
+++ b/docs/enduser-utf8.html
@@ -96,7 +96,7 @@ which can be a rewarding (but difficult) task.</p>
 <h2 id="findcharset">Finding the real encoding</h2>
 
 <p>In the beginning, there was ASCII, and things were simple. But they
-weren't good, for no one could write in Cryllic or Thai. So there
+weren't good, for no one could write in Cyrillic or Thai. So there
 exploded a proliferation of character encodings to remedy the problem
 by extending the characters ASCII could express. This ridiculously
 simplified version of the history of character encodings shows us that
@@ -138,7 +138,7 @@ browser:</p>
     <dd>View &gt; Encoding: bulleted item is unofficial name</dd>
 </dl>
 
-<p>Internet Explorer won't give you the mime (i.e. useful/real) name of the
+<p>Internet Explorer won't give you the MIME (i.e. useful/real) name of the
 character encoding, so you'll have to look it up using their description.
 Some common ones:</p>
 
@@ -216,6 +216,12 @@ if your <code>META</code> tag claims that either:</p>
 
 <h2 id="fixcharset">Fixing the encoding</h2>
 
+<p class="aside">The advice given here is for pages being served as
+vanilla <code>text/html</code>.  Different practices must be used
+for <code>application/xml</code> or <code>application/xml+xhtml</code>, see
+<a href="http://www.w3.org/TR/2002/NOTE-xhtml-media-types-20020430/">W3C's
+document on XHTML media types</a> for more information.</p>
+
 <p>If your <code>META</code> encoding and your real encoding match,
 savvy! You can skip this section. If they don't...</p>
 
@@ -302,7 +308,8 @@ languages</a>. The appropriate code is:</p>
 
 <p>...replacing UTF-8 with whatever your embedded encoding is.
 This code must come before any output, so be careful about
-stray whitespace in your application.</p>
+stray whitespace in your application (i.e., any whitespace before 
+output excluding whitespace within &lt;?php ?&gt; tags).</p>
 
 <h4 id="fixcharset-server-phpini">PHP ini directive</h4>
 
@@ -313,8 +320,8 @@ header call: <code><a href="http://php.net/ini.core#ini.default-charset">default
 
 <p>...will also do the trick. If PHP is running as an Apache module (and
 not as FastCGI, consult
-<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess do apply this property
-globally:</p>
+<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess to apply this property
+across many PHP files:</p>
 
 <pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset &quot;UTF-8&quot;</pre>
 
@@ -360,10 +367,11 @@ to send anything at all:</p>
 
 <pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
 
-<p>...making your <code>META</code> tags the sole source of
-character encoding information. In these cases, it is
-<em>especially</em> important to make sure you have valid <code>META</code>
-tags on your pages and all the text before them is ASCII.</p>
+<p>...making your internal charset declaration (usually the <code>META</code> tags)
+the sole source of character encoding 
+information. In these cases, it is <em>especially</em> important to make 
+sure you have valid <code>META</code> tags on your pages and all the 
+text before them is ASCII.</p>
 
 <blockquote class="aside"><p>These directives can also be
 placed in httpd.conf file for Apache, but
@@ -428,28 +436,30 @@ IIS to change character encodings, I'd be grateful.</p>
 
 <p><code>META</code> tags are the most common source of embedded
 encodings, but they can also come from somewhere else: XML
-processing instructions. They look like:</p>
+Declarations. They look like:</p>
 
 <pre>&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;</pre>
 
 <p>...and are most often found in XML documents (including XHTML).</p>
 
-<p>For XHTML, this processing instruction theoretically
+<p>For XHTML, this XML Declaration theoretically
 overrides the <code>META</code> tag. In reality, this happens only when the
 XHTML is actually served as legit XML and not HTML, which is almost always
 never due to Internet Explorer's lack of support for 
 <code>application/xhtml+xml</code> (even though doing so is often
-argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
+argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good 
+practice</a> and is required by the XHTML 1.1 specification).</p>
 
-<p>For XML, however, this processing instruction is extremely important.
+<p>For XML, however, this XML Declaration is extremely important.
 Since most webservers are not configured to send charsets for .xml files,
 this is the only thing a parser has to go on. Furthermore, the default
 for XML files is UTF-8, which often butts heads with more common
 ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
 
 <p>In short, if you use XHTML and have gone through the
-trouble of adding the XML header, make sure it jives
-with your <code>META</code> tags and HTTP headers.</p>
+trouble of adding the XML Declaration, make sure it jives
+with your <code>META</code> tags (which should only be present 
+if served in text/html) and HTTP headers.</p>
 
 <h3 id="fixcharset-internals">Inside the process</h3>
 
@@ -506,7 +516,7 @@ usage in one language sometimes requires the occasional special character
 that, without surprise, is not available in your character set. Sometimes
 developers get around this by adding support for multiple encodings: when
 using Chinese, use Big5, when using Japanese, use Shift-JIS, when
-using Greek, etc. Other times, they use character entities with great
+using Greek, etc. Other times, they use character references with great
 zeal.</p>
 
 <p>UTF-8, however, obviates the need for any of these complicated
@@ -520,14 +530,14 @@ you don't have to use those user-unfriendly entities.</p>
 
 <p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
 a special character outside of their scope often will use a character
-entity to achieve the desired effect. For instance, &theta; can be
+entity reference to achieve the desired effect. For instance, &theta; can be
 written <code>&amp;theta;</code>, regardless of the character encoding's
 support of Greek letters.</p>
 
 <p>This works nicely for limited use of special characters, but
 say you wanted this sentence of Chinese text: &#28608;&#20809;,
 &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;.
-The entity-ized version would look like this:</p>
+The ampersand encoded version would look like this:</p>
 
 <pre>&amp;#28608;&amp;#20809;, &amp;#36889;&amp;#20841;&amp;#20491;&amp;#23383;&amp;#26159;&amp;#29978;&amp;#40636;&amp;#24847;&amp;#24605;</pre>
 
@@ -545,7 +555,7 @@ an application that originally used ISO-8859-1 but switched to UTF-8
 when it became far to cumbersome to support foreign languages. Bots
 will now actually go through articles and convert character entities
 to their corresponding real characters for the sake of user-friendliness
-and searcheability. See
+and searchability. See
 <a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
 page on special characters</a> for more details.
 </p></blockquote>
@@ -593,7 +603,7 @@ browser you're using, they might:</p>
 <ul>
     <li>Replace the unsupported characters with useless question marks,</li>
     <li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
-    <li>Replace the character with a character entity, or</li>
+    <li>Replace the character with a character entity reference, or</li>
     <li>Send it anyway as a different character encoding mixed in
         with the original encoding (usually Windows-1252 rather than
         iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
@@ -609,7 +619,7 @@ since UTF-8 supports every character.</p>
 
 <h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
 
-<p>Multipart form submission takes a way a lot of the ambiguity
+<p>Multipart form submission takes away a lot of the ambiguity
 that percent-encoding had: the server now can explicitly ask for
 certain encodings, and the client can explicitly tell the server
 during the form submission what encoding the fields are in.</p>
@@ -622,9 +632,9 @@ Each method has deficiencies, especially the former.</p>
 <p>If you tell the browser to send the form in the same encoding as
 the page, you still have the trouble of what to do with characters
 that are outside of the character encoding's range. The behavior, once
-again, varies: Firefox 2.0 entity-izes them while Internet Explorer
-7.0 mangles them beyond intelligibility. For serious internationalization purposes,
-this is not an option.</p>
+again, varies: Firefox 2.0 converts them to character entity references
+while Internet Explorer 7.0 mangles them beyond intelligibility. For
+serious internationalization purposes, this is not an option.</p>
 
 <p>The other possibility is to set Accept-Encoding to UTF-8, which
 begs the question: Why aren't you using UTF-8 for everything then?
@@ -664,12 +674,12 @@ it up to the module iconv to do the dirty work.</p>
 <p>This approach, however, is not perfect. iconv is blithely unaware
 of HTML character entities. HTML Purifier, in order to
 protect against sophisticated escaping schemes, normalizes all character
-and numeric entities before processing the text. This leads to
+and numeric entitie references before processing the text. This leads to
 one important ramification:</p>
 
 <p><strong>Any character that is not supported by the target character
 set, regardless of whether or not it is in the form of a character
-entity or a raw character, will be silently ignored.</strong></p>
+entity reference or a raw character, will be silently ignored.</strong></p>
 
 <p>Example of this principle at work: say you have <code>&amp;theta;</code>
 in your HTML, but the output is in Latin-1 (which, understandably,
@@ -678,7 +688,7 @@ set the encoding correctly using %Core.Encoding):</p>
 
 <ul>
     <li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
-        (note that theta is preserved since it doesn't actually use
+        (note that theta is preserved here since it doesn't actually use
         any non-ASCII characters): <code>&amp;theta;</code></li>
     <li>The <code>EntityParser</code> will transform all named and numeric
         character entities to their corresponding raw UTF-8 equivalents:
@@ -701,7 +711,7 @@ Purifier has provided a slightly more palatable workaround using
     <li>The <code>EntityParser</code> transforms entities: <code>&theta;</code></li>
     <li>HTML Purifier processes the code: <code>&theta;</code></li>
     <li>The <code>Encoder</code> replaces all non-ASCII characters
-        with numeric entities: <code>&amp;#952;</code></li>
+        with numeric entity reference: <code>&amp;#952;</code></li>
     <li>For good measure, <code>Encoder</code> transforms encoding back to
         original (which is strictly unnecessary for 99% of encodings
         out there): <code>&amp;#952;</code> (remember, it's all ASCII!)</li>
@@ -711,19 +721,19 @@ Purifier has provided a slightly more palatable workaround using
 the land of Unicode characters, and is totally unacceptable for Chinese
 or Japanese texts. The even bigger kicker is that, supposing the
 input encoding was actually ISO-8859-7, which <em>does</em> support
-theta, the character would get entity-ized anyway! (The Encoder does
-not discriminate).</p>
+theta, the character would get converted into a character entity reference
+anyway! (The Encoder does not discriminate).</p>
 
 <p>The current functionality is about where HTML Purifier will be for
 the rest of eternity. HTML Purifier could attempt to preserve the original
-form of the entities so that they could be substituted back in, only the
+form of the character references so that they could be substituted back in, only the
 DOM extension kills them off irreversibly. HTML Purifier could also attempt
 to be smart and only convert non-ASCII characters that weren't supported
 by the target encoding, but that would require reimplementing iconv
 with HTML awareness, something I will not do.</p>
 
 <p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
-not being sarcastic here: some people could care less about other languages)</p>
+not being sarcastic here: some people could care less about other languages).</p>
 
 <h2 id="migrate">Migrate to UTF-8</h2>
 
@@ -985,7 +995,7 @@ and yes, it is variable width. Other traits:</p>
 in different ways. It is beyond the scope of this document to explain
 what precisely these implications are. PHPWact provides
 a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
-on what to expect from each functions, although coverage is spotty in
+on what to expect from each function, although coverage is spotty in
 some areas. Their more general notes on
 <a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
 are also worth looking at for information on UTF-8. Some rules of thumb
@@ -999,7 +1009,7 @@ when dealing with Unicode text:</p>
     <li>Think twice before using functions that:<ul>
         <li>...count characters (strlen will return bytes, not characters;
             str_split and word_wrap may corrupt)</li>
-        <li>...entity-ize things (UTF-8 doesn't need entities)</li>
+        <li>...convert characters to entity references (UTF-8 doesn't need entities)</li>
         <li>...do very complex string processing (*printf)</li>
     </ul></li>
 </ul>
diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index af61751b..43fe616b 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -22,7 +22,7 @@
  */
 
 /*
-    HTML Purifier 2.1.1 - Standards Compliant HTML Filtering
+    HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
     Copyright (C) 2006 Edward Z. Yang
 
     This library is free software; you can redistribute it and/or
@@ -77,7 +77,7 @@ This directive has been available since 2.0.0.
 class HTMLPurifier
 {
     
-    var $version = '2.1.1';
+    var $version = '2.1.2';
     
     var $config;
     var $filters;
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index dcf9849c..365748c0 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -6,6 +6,7 @@ require_once 'HTMLPurifier/URIScheme.php';
 require_once 'HTMLPurifier/URISchemeRegistry.php';
 require_once 'HTMLPurifier/AttrDef/URI/Host.php';
 require_once 'HTMLPurifier/PercentEncoder.php';
+require_once 'HTMLPurifier/AttrDef/URI/Email.php';
 
 // special case filtering directives 
 
diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php
index 5a7085db..aaec099a 100644
--- a/library/HTMLPurifier/AttrDef/URI/Email.php
+++ b/library/HTMLPurifier/AttrDef/URI/Email.php
@@ -1,6 +1,7 @@
 <?php
 
 require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
 
 class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
 {
diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php
index 4cb70be7..93abb0d0 100644
--- a/library/HTMLPurifier/AttrTypes.php
+++ b/library/HTMLPurifier/AttrTypes.php
@@ -44,6 +44,9 @@ class HTMLPurifier_AttrTypes
         $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
         $this->info['Color']    = new HTMLPurifier_AttrDef_HTML_Color();
         
+        // unimplemented aliases
+        $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
+        
         // number is really a positive integer (one or more digits)
         // FIXME: ^^ not always, see start and value of list items
         $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
diff --git a/library/HTMLPurifier/CSSDefinition.php b/library/HTMLPurifier/CSSDefinition.php
index 8de2aa7b..2acf7cf8 100644
--- a/library/HTMLPurifier/CSSDefinition.php
+++ b/library/HTMLPurifier/CSSDefinition.php
@@ -204,7 +204,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
         $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
         
         $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
-            'collapse', 'seperate'));
+            'collapse', 'separate'));
         
         $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
             'top', 'bottom'));
@@ -219,6 +219,8 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
             new HTMLPurifier_AttrDef_CSS_Percentage()
         ));
         
+        $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
+        
         // partial support
         $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
         
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 73be27b1..7e330e47 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
     /**
      * HTML Purifier's version
      */
-    var $version = '2.1.1';
+    var $version = '2.1.2';
     
     /**
      * Two-level associative array of configuration directives
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index aaeb8bae..fe6bd141 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -330,7 +330,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
         if (isset($this->info_content_sets['Block'][$block_wrapper])) {
             $this->info_block_wrapper = $block_wrapper;
         } else {
-            trigger_error('Cannot use non-block element as block wrapper.',
+            trigger_error('Cannot use non-block element as block wrapper',
                 E_USER_ERROR);
         }
         
@@ -340,7 +340,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
             $this->info_parent = $parent;
             $this->info_parent_def = $def;
         } else {
-            trigger_error('Cannot use unrecognized element as parent.',
+            trigger_error('Cannot use unrecognized element as parent',
                 E_USER_ERROR);
             $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
         }
diff --git a/library/HTMLPurifier/HTMLModule/Object.php b/library/HTMLPurifier/HTMLModule/Object.php
new file mode 100644
index 00000000..33734772
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Object.php
@@ -0,0 +1,47 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Object Module, defines elements for generic object inclusion
+ * @warning Users will commonly use <embed> to cater to legacy browsers: this
+ *      module does not allow this sort of behavior
+ */
+class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Object';
+    
+    function HTMLPurifier_HTMLModule_Object() {
+        
+        $this->addElement('object', false, 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', 
+            array(
+                'archive' => 'URI',
+                'classid' => 'URI',
+                'codebase' => 'URI',
+                'codetype' => 'Text',
+                'data' => 'URI',
+                'declare' => 'Bool#declare',
+                'height' => 'Length',
+                'name' => 'CDATA',
+                'standby' => 'Text',
+                'tabindex' => 'Number',
+                'type' => 'ContentType',
+                'width' => 'Length'
+            )
+        );
+
+        $this->addElement('param', false, false, 'Empty', false,
+            array(
+                'id' => 'ID',
+                'name*' => 'Text',
+                'type' => 'Text',
+                'value' => 'Text',
+                'valuetype' => 'Enum#data,ref,object'
+           )
+        );
+    
+    }
+    
+}
+
diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php
index d4f10d0c..74a233ff 100644
--- a/library/HTMLPurifier/HTMLModuleManager.php
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@@ -29,6 +29,7 @@ require_once 'HTMLPurifier/HTMLModule/Scripting.php';
 require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
 require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
 require_once 'HTMLPurifier/HTMLModule/Ruby.php';
+require_once 'HTMLPurifier/HTMLModule/Object.php';
 
 // tidy modules
 require_once 'HTMLPurifier/HTMLModule/Tidy.php';
@@ -172,7 +173,7 @@ class HTMLPurifier_HTMLModuleManager
         $common = array(
             'CommonAttributes', 'Text', 'Hypertext', 'List',
             'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
-            'StyleAttribute', 'Scripting'
+            'StyleAttribute', 'Scripting', 'Object'
         );
         $transitional = array('Legacy', 'Target');
         $xml = array('XMLCommonAttributes');
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index 29295db7..f52579ab 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -189,6 +189,9 @@ class HTMLPurifier_Lexer
                 return new HTMLPurifier_Lexer_DOMLex();
             case 'DirectLex':
                 return new HTMLPurifier_Lexer_DirectLex();
+            case 'PH5P':
+                // experimental Lexer that must be manually included
+                return new HTMLPurifier_Lexer_PH5P();
             default:
                 trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
         }
diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index 17f23e34..1a21687b 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -53,14 +53,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
         }
         
         // preprocess html, essential for UTF-8
-        $html =
-            '<!DOCTYPE html '.
-                'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
-                '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
-            '<html><head>'.
-            '<meta http-equiv="Content-Type" content="text/html;'.
-                ' charset=utf-8" />'.
-            '</head><body><div>'.$html.'</div></body></html>';
+        $html = $this->wrapHTML($html, $config, $context);
         
         $doc = new DOMDocument();
         $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
@@ -177,5 +170,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
         return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
     }
     
+    /**
+     * Wraps an HTML fragment in the necessary HTML
+     */
+    function wrapHTML($html, $config, &$context) {
+        $def = $config->getDefinition('HTML');
+        $ret = '';
+        
+        if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
+            $ret .= '<!DOCTYPE html ';
+            if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" ';
+            if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" ';
+            $ret .= '>';
+        }
+        
+        $ret .= '<html><head>';
+        $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
+        $ret .= '</head><body><div>'.$html.'</div></body></html>';
+        return $ret;
+    }
+    
 }
 
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index cd7cb4c1..6f8c8ff6 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -237,7 +237,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 // trailing slash. Remember, we could have a tag like <br>, so
                 // any later token processing scripts must convert improperly
                 // classified EmptyTags from StartTags.
-                $is_self_closing= (strpos($segment,'/') === $strlen_segment-1);
+                $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
                 if ($is_self_closing) {
                     $strlen_segment--;
                     $segment = substr($segment, 0, $strlen_segment);
diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php
new file mode 100644
index 00000000..5720c33a
--- /dev/null
+++ b/library/HTMLPurifier/Lexer/PH5P.php
@@ -0,0 +1,3886 @@
+<?php
+
+require_once 'HTMLPurifier/Lexer/DOMLex.php';
+
+/**
+ * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
+ * Requires PHP5, and occupies space in the HTML5 pseudo-namespace (may
+ * cause conflicts, sorry).
+ */
+
+class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
+    
+    public function tokenizeHTML($html, $config, &$context) {
+        $html = $this->normalize($html, $config, $context);
+        $html = $this->wrapHTML( $html, $config, $context);
+        $parser = new HTML5($html);
+        $doc = $parser->save();
+        $tokens = array();
+        $this->tokenizeDOM(
+            $doc->getElementsByTagName('html')->item(0)-> // <html>
+                  getElementsByTagName('body')->item(0)-> //   <body>
+                  getElementsByTagName('div')->item(0)    //     <div>
+            , $tokens);
+        return $tokens;
+    }
+    
+}
+
+// begin PHP5P source code here
+
+/*
+
+Copyright 2007 Jeroen van der Meer <http://jero.net/> 
+
+Permission is hereby granted, free of charge, to any person obtaining a 
+copy of this software and associated documentation files (the 
+"Software"), to deal in the Software without restriction, including 
+without limitation the rights to use, copy, modify, merge, publish, 
+distribute, sublicense, and/or sell copies of the Software, and to 
+permit persons to whom the Software is furnished to do so, subject to 
+the following conditions: 
+
+The above copyright notice and this permission notice shall be included 
+in all copies or substantial portions of the Software. 
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
+
+*/
+
+class HTML5 {
+    private $data;
+    private $char;
+    private $EOF;
+    private $state;
+    private $tree;
+    private $token;
+    private $content_model;
+    private $escape = false;
+    private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
+    'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
+    'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
+    'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
+    'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
+    'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
+    'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
+    'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
+    'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
+    'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
+    'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
+    'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
+    'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
+    'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
+    'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
+    'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
+    'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
+    'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
+    'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
+    'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
+    'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
+    'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
+    'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
+    'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
+    'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
+    'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
+    'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
+    'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
+    'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
+    'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
+    'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
+    'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
+    'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
+    'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
+    'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
+    'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
+    'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
+    'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
+    'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
+    'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
+    'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
+    'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
+
+    const PCDATA    = 0;
+    const RCDATA    = 1;
+    const CDATA     = 2;
+    const PLAINTEXT = 3;
+
+    const DOCTYPE  = 0;
+    const STARTTAG = 1;
+    const ENDTAG   = 2;
+    const COMMENT  = 3;
+    const CHARACTR = 4;
+    const EOF      = 5;
+
+    public function __construct($data) {
+        $data = str_replace("\r\n", "\n", $data);
+        $date = str_replace("\r", null, $data);
+
+        $this->data = $data;
+        $this->char = -1;
+        $this->EOF  = strlen($data);
+        $this->tree = new HTML5TreeConstructer;
+        $this->content_model = self::PCDATA;
+
+        $this->state = 'data';
+
+        while($this->state !== null) {
+            $this->{$this->state.'State'}();
+        }
+    }
+
+    public function save() {
+        return $this->tree->save();
+    }
+
+    private function char() {
+        return ($this->char < $this->EOF)
+            ? $this->data[$this->char]
+            : false;
+    }
+
+    private function character($s, $l = 0) {
+        if($s + $l < $this->EOF) {
+            if($l === 0) {
+                return $this->data[$s];
+            } else {
+                return substr($this->data, $s, $l);
+            }
+        }
+    }
+
+    private function characters($char_class, $start) {
+        return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
+    }
+
+    private function dataState() {
+        // Consume the next input character
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
+            /* U+0026 AMPERSAND (&)
+            When the content model flag is set to one of the PCDATA or RCDATA
+            states: switch to the entity data state. Otherwise: treat it as per
+            the "anything else"    entry below. */
+            $this->state = 'entityData';
+
+        } elseif($char === '-') {
+            /* If the content model flag is set to either the RCDATA state or
+            the CDATA state, and the escape flag is false, and there are at
+            least three characters before this one in the input stream, and the
+            last four characters in the input stream, including this one, are
+            U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
+            and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
+            if(($this->content_model === self::RCDATA || $this->content_model ===
+            self::CDATA) && $this->escape === false &&
+            $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
+                $this->escape = true;
+            }
+
+            /* In any case, emit the input character as a character token. Stay
+            in the data state. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+        /* U+003C LESS-THAN SIGN (<) */
+        } elseif($char === '<' && ($this->content_model === self::PCDATA ||
+        (($this->content_model === self::RCDATA ||
+        $this->content_model === self::CDATA) && $this->escape === false))) {
+            /* When the content model flag is set to the PCDATA state: switch
+            to the tag open state.
+
+            When the content model flag is set to either the RCDATA state or
+            the CDATA state and the escape flag is false: switch to the tag
+            open state.
+
+            Otherwise: treat it as per the "anything else" entry below. */
+            $this->state = 'tagOpen';
+
+        /* U+003E GREATER-THAN SIGN (>) */
+        } elseif($char === '>') {
+            /* If the content model flag is set to either the RCDATA state or
+            the CDATA state, and the escape flag is true, and the last three
+            characters in the input stream including this one are U+002D
+            HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
+            set the escape flag to false. */
+            if(($this->content_model === self::RCDATA ||
+            $this->content_model === self::CDATA) && $this->escape === true &&
+            $this->character($this->char, 3) === '-->') {
+                $this->escape = false;
+            }
+
+            /* In any case, emit the input character as a character token.
+            Stay in the data state. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Emit an end-of-file token. */
+            $this->EOF();
+
+        } elseif($this->content_model === self::PLAINTEXT) {
+            /* When the content model flag is set to the PLAINTEXT state
+            THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
+            the text and emit it as a character token. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => substr($this->data, $this->char)
+            ));
+
+            $this->EOF();
+
+        } else {
+            /* Anything else
+            THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
+            otherwise would also be treated as a character token and emit it
+            as a single character token. Stay in the data state. */
+            $len  = strcspn($this->data, '<&', $this->char);
+            $char = substr($this->data, $this->char, $len);
+            $this->char += $len - 1;
+
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+            $this->state = 'data';
+        }
+    }
+
+    private function entityDataState() {
+        // Attempt to consume an entity.
+        $entity = $this->entity();
+
+        // If nothing is returned, emit a U+0026 AMPERSAND character token.
+        // Otherwise, emit the character token that was returned.
+        $char = (!$entity) ? '&' : $entity;
+        $this->emitToken(array(
+            'type' => self::CHARACTR,
+            'data' => $char
+        ));
+
+        // Finally, switch to the data state.
+        $this->state = 'data';
+    }
+
+    private function tagOpenState() {
+        switch($this->content_model) {
+            case self::RCDATA:
+            case self::CDATA:
+                /* If the next input character is a U+002F SOLIDUS (/) character,
+                consume it and switch to the close tag open state. If the next
+                input character is not a U+002F SOLIDUS (/) character, emit a
+                U+003C LESS-THAN SIGN character token and switch to the data
+                state to process the next input character. */
+                if($this->character($this->char + 1) === '/') {
+                    $this->char++;
+                    $this->state = 'closeTagOpen';
+
+                } else {
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<'
+                    ));
+
+                    $this->state = 'data';
+                }
+            break;
+
+            case self::PCDATA:
+                // If the content model flag is set to the PCDATA state
+                // Consume the next input character:
+                $this->char++;
+                $char = $this->char();
+
+                if($char === '!') {
+                    /* U+0021 EXCLAMATION MARK (!)
+                    Switch to the markup declaration open state. */
+                    $this->state = 'markupDeclarationOpen';
+
+                } elseif($char === '/') {
+                    /* U+002F SOLIDUS (/)
+                    Switch to the close tag open state. */
+                    $this->state = 'closeTagOpen';
+
+                } elseif(preg_match('/^[A-Za-z]$/', $char)) {
+                    /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+                    Create a new start tag token, set its tag name to the lowercase
+                    version of the input character (add 0x0020 to the character's code
+                    point), then switch to the tag name state. (Don't emit the token
+                    yet; further details will be filled in before it is emitted.) */
+                    $this->token = array(
+                        'name'  => strtolower($char),
+                        'type'  => self::STARTTAG,
+                        'attr'  => array()
+                    );
+
+                    $this->state = 'tagName';
+
+                } elseif($char === '>') {
+                    /* U+003E GREATER-THAN SIGN (>)
+                    Parse error. Emit a U+003C LESS-THAN SIGN character token and a
+                    U+003E GREATER-THAN SIGN character token. Switch to the data state. */
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<>'
+                    ));
+
+                    $this->state = 'data';
+
+                } elseif($char === '?') {
+                    /* U+003F QUESTION MARK (?)
+                    Parse error. Switch to the bogus comment state. */
+                    $this->state = 'bogusComment';
+
+                } else {
+                    /* Anything else
+                    Parse error. Emit a U+003C LESS-THAN SIGN character token and
+                    reconsume the current input character in the data state. */
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<'
+                    ));
+
+                    $this->char--;
+                    $this->state = 'data';
+                }
+            break;
+        }
+    }
+
+    private function closeTagOpenState() {
+        $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
+        $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
+
+        if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
+        (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
+        $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
+            /* If the content model flag is set to the RCDATA or CDATA states then
+            examine the next few characters. If they do not match the tag name of
+            the last start tag token emitted (case insensitively), or if they do but
+            they are not immediately followed by one of the following characters:
+                * U+0009 CHARACTER TABULATION
+                * U+000A LINE FEED (LF)
+                * U+000B LINE TABULATION
+                * U+000C FORM FEED (FF)
+                * U+0020 SPACE
+                * U+003E GREATER-THAN SIGN (>)
+                * U+002F SOLIDUS (/)
+                * EOF
+            ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
+            token, a U+002F SOLIDUS character token, and switch to the data state
+            to process the next input character. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => '</'
+            ));
+
+            $this->state = 'data';
+
+        } else {
+            /* Otherwise, if the content model flag is set to the PCDATA state,
+            or if the next few characters do match that tag name, consume the
+            next input character: */
+            $this->char++;
+            $char = $this->char();
+
+            if(preg_match('/^[A-Za-z]$/', $char)) {
+                /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+                Create a new end tag token, set its tag name to the lowercase version
+                of the input character (add 0x0020 to the character's code point), then
+                switch to the tag name state. (Don't emit the token yet; further details
+                will be filled in before it is emitted.) */
+                $this->token = array(
+                    'name'  => strtolower($char),
+                    'type'  => self::ENDTAG
+                );
+
+                $this->state = 'tagName';
+
+            } elseif($char === '>') {
+                /* U+003E GREATER-THAN SIGN (>)
+                Parse error. Switch to the data state. */
+                $this->state = 'data';
+
+            } elseif($this->char === $this->EOF) {
+                /* EOF
+                Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
+                SOLIDUS character token. Reconsume the EOF character in the data state. */
+                $this->emitToken(array(
+                    'type' => self::CHARACTR,
+                    'data' => '</'
+                ));
+
+                $this->char--;
+                $this->state = 'data';
+
+            } else {
+                /* Parse error. Switch to the bogus comment state. */
+                $this->state = 'bogusComment';
+            }
+        }
+    }
+
+    private function tagNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } elseif($char === '/') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current tag token's tag name.
+            Stay in the tag name state. */
+            $this->token['name'] .= strtolower($char);
+            $this->state = 'tagName';
+        }
+    }
+
+    private function beforeAttributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Stay in the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Start a new attribute in the current tag token. Set that attribute's
+            name to the current input character, and its value to the empty string.
+            Switch to the attribute name state. */
+            $this->token['attr'][] = array(
+                'name'  => strtolower($char),
+                'value' => null
+            );
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function attributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute name state. */
+            $this->state = 'afterAttributeName';
+
+        } elseif($char === '=') {
+            /* U+003D EQUALS SIGN (=)
+            Switch to the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's name.
+            Stay in the attribute name state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['name'] .= strtolower($char);
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function afterAttributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the after attribute name state. */
+            $this->state = 'afterAttributeName';
+
+        } elseif($char === '=') {
+            /* U+003D EQUALS SIGN (=)
+            Switch to the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the
+            before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Start a new attribute in the current tag token. Set that attribute's
+            name to the current input character, and its value to the empty string.
+            Switch to the attribute name state. */
+            $this->token['attr'][] = array(
+                'name'  => strtolower($char),
+                'value' => null
+            );
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function beforeAttributeValueState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '"') {
+            /* U+0022 QUOTATION MARK (")
+            Switch to the attribute value (double-quoted) state. */
+            $this->state = 'attributeValueDoubleQuoted';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the attribute value (unquoted) state and reconsume
+            this input character. */
+            $this->char--;
+            $this->state = 'attributeValueUnquoted';
+
+        } elseif($char === '\'') {
+            /* U+0027 APOSTROPHE (')
+            Switch to the attribute value (single-quoted) state. */
+            $this->state = 'attributeValueSingleQuoted';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Switch to the attribute value (unquoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueUnquoted';
+        }
+    }
+
+    private function attributeValueDoubleQuotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if($char === '"') {
+            /* U+0022 QUOTATION MARK (")
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState('double');
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the character
+            in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (double-quoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueDoubleQuoted';
+        }
+    }
+
+    private function attributeValueSingleQuotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if($char === '\'') {
+            /* U+0022 QUOTATION MARK (')
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState('single');
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the character
+            in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (single-quoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueSingleQuoted';
+        }
+    }
+
+    private function attributeValueUnquotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState();
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (unquoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueUnquoted';
+        }
+    }
+
+    private function entityInAttributeValueState() {
+        // Attempt to consume an entity.
+        $entity = $this->entity();
+
+        // If nothing is returned, append a U+0026 AMPERSAND character to the
+        // current attribute's value. Otherwise, emit the character token that
+        // was returned.
+        $char = (!$entity)
+            ? '&'
+            : $entity;
+
+        $last = count($this->token['attr']) - 1;
+        $this->token['attr'][$last]['value'] .= $char;
+    }
+
+    private function bogusCommentState() {
+        /* Consume every character up to the first U+003E GREATER-THAN SIGN
+        character (>) or the end of the file (EOF), whichever comes first. Emit
+        a comment token whose data is the concatenation of all the characters
+        starting from and including the character that caused the state machine
+        to switch into the bogus comment state, up to and including the last
+        consumed character before the U+003E character, if any, or up to the
+        end of the file otherwise. (If the comment was started by the end of
+        the file (EOF), the token is empty.) */
+        $data = $this->characters('^>', $this->char);
+        $this->emitToken(array(
+            'data' => $data,
+            'type' => self::COMMENT
+        ));
+
+        $this->char += strlen($data);
+
+        /* Switch to the data state. */
+        $this->state = 'data';
+
+        /* If the end of the file was reached, reconsume the EOF character. */
+        if($this->char === $this->EOF) {
+            $this->char = $this->EOF - 1;
+        }
+    }
+
+    private function markupDeclarationOpenState() {
+        /* If the next two characters are both U+002D HYPHEN-MINUS (-)
+        characters, consume those two characters, create a comment token whose
+        data is the empty string, and switch to the comment state. */
+        if($this->character($this->char + 1, 2) === '--') {
+            $this->char += 2;
+            $this->state = 'comment';
+            $this->token = array(
+                'data' => null,
+                'type' => self::COMMENT
+            );
+
+        /* Otherwise if the next seven chacacters are a case-insensitive match
+        for the word "DOCTYPE", then consume those characters and switch to the
+        DOCTYPE state. */
+        } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
+            $this->char += 7;
+            $this->state = 'doctype';
+
+        /* Otherwise, is is a parse error. Switch to the bogus comment state.
+        The next character that is consumed, if any, is the first character
+        that will be in the comment. */
+        } else {
+            $this->char++;
+            $this->state = 'bogusComment';
+        }
+    }
+
+    private function commentState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        /* U+002D HYPHEN-MINUS (-) */
+        if($char === '-') {
+            /* Switch to the comment dash state  */
+            $this->state = 'commentDash';
+
+        /* EOF */
+        } elseif($this->char === $this->EOF) {
+            /* Parse error. Emit the comment token. Reconsume the EOF character
+            in the data state. */
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        /* Anything else */
+        } else {
+            /* Append the input character to the comment token's data. Stay in
+            the comment state. */
+            $this->token['data'] .= $char;
+        }
+    }
+
+    private function commentDashState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        /* U+002D HYPHEN-MINUS (-) */
+        if($char === '-') {
+            /* Switch to the comment end state  */
+            $this->state = 'commentEnd';
+
+        /* EOF */
+        } elseif($this->char === $this->EOF) {
+            /* Parse error. Emit the comment token. Reconsume the EOF character
+            in the data state. */
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        /* Anything else */
+        } else {
+            /* Append a U+002D HYPHEN-MINUS (-) character and the input
+            character to the comment token's data. Switch to the comment state. */
+            $this->token['data'] .= '-'.$char;
+            $this->state = 'comment';
+        }
+    }
+
+    private function commentEndState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '-') {
+            $this->token['data'] .= '-';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['data'] .= '--'.$char;
+            $this->state = 'comment';
+        }
+    }
+
+    private function doctypeState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            $this->state = 'beforeDoctypeName';
+
+        } else {
+            $this->char--;
+            $this->state = 'beforeDoctypeName';
+        }
+    }
+
+    private function beforeDoctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            // Stay in the before DOCTYPE name state.
+
+        } elseif(preg_match('/^[a-z]$/', $char)) {
+            $this->token = array(
+                'name' => strtoupper($char),
+                'type' => self::DOCTYPE,
+                'error' => true
+            );
+
+            $this->state = 'doctypeName';
+
+        } elseif($char === '>') {
+            $this->emitToken(array(
+                'name' => null,
+                'type' => self::DOCTYPE,
+                'error' => true
+            ));
+
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken(array(
+                'name' => null,
+                'type' => self::DOCTYPE,
+                'error' => true
+            ));
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token = array(
+                'name' => $char,
+                'type' => self::DOCTYPE,
+                'error' => true
+            );
+
+            $this->state = 'doctypeName';
+        }
+    }
+
+    private function doctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            $this->state = 'AfterDoctypeName';
+
+        } elseif($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif(preg_match('/^[a-z]$/', $char)) {
+            $this->token['name'] .= strtoupper($char);
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['name'] .= $char;
+        }
+
+        $this->token['error'] = ($this->token['name'] === 'HTML')
+            ? false
+            : true;
+    }
+
+    private function afterDoctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            // Stay in the DOCTYPE name state.
+
+        } elseif($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['error'] = true;
+            $this->state = 'bogusDoctype';
+        }
+    }
+
+    private function bogusDoctypeState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            // Stay in the bogus DOCTYPE state.
+        }
+    }
+
+    private function entity() {
+        $start = $this->char;
+
+        // This section defines how to consume an entity. This definition is
+        // used when parsing entities in text and in attributes.
+
+        // The behaviour depends on the identity of the next character (the
+        // one immediately after the U+0026 AMPERSAND character): 
+
+        switch($this->character($this->char + 1)) {
+            // U+0023 NUMBER SIGN (#)
+            case '#':
+
+                // The behaviour further depends on the character after the
+                // U+0023 NUMBER SIGN:
+                switch($this->character($this->char + 1)) {
+                    // U+0078 LATIN SMALL LETTER X
+                    // U+0058 LATIN CAPITAL LETTER X
+                    case 'x':
+                    case 'X':
+                        // Follow the steps below, but using the range of
+                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+                        // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
+                        // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
+                        // A, through to U+0046 LATIN CAPITAL LETTER F (in other
+                        // words, 0-9, A-F, a-f).
+                        $char = 1;
+                        $char_class = '0-9A-Fa-f';
+                    break;
+
+                    // Anything else
+                    default:
+                        // Follow the steps below, but using the range of
+                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+                        // NINE (i.e. just 0-9).
+                        $char = 0;
+                        $char_class = '0-9';
+                    break;
+                }
+
+                // Consume as many characters as match the range of characters
+                // given above.
+                $this->char++;
+                $e_name = $this->characters($char_class, $this->char + $char + 1);
+                $entity = $this->character($start, $this->char);
+                $cond = strlen($e_name) > 0;
+
+                // The rest of the parsing happens bellow.
+            break;
+
+            // Anything else
+            default:
+                // Consume the maximum number of characters possible, with the
+                // consumed characters case-sensitively matching one of the
+                // identifiers in the first column of the entities table.
+                $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
+                $len = strlen($e_name);
+
+                for($c = 1; $c <= $len; $c++) {
+                    $id = substr($e_name, 0, $c);
+                    $this->char++;
+
+                    if(in_array($id, $this->entities)) {
+                        if ($e_name[$c-1] !== ';') {
+                            if ($c < $len && $e_name[$c] == ';') {
+                                $this->char++; // consume extra semicolon
+                            }
+                        }
+                        $entity = $id;
+                        break;
+                    }
+                }
+
+                $cond = isset($entity);
+                // The rest of the parsing happens bellow.
+            break;
+        }
+
+        if(!$cond) {
+            // If no match can be made, then this is a parse error. No
+            // characters are consumed, and nothing is returned.
+            $this->char = $start;
+            return false;
+        }
+
+        // Return a character token for the character corresponding to the
+        // entity name (as given by the second column of the entities table).
+        return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
+    }
+
+    private function emitToken($token) {
+        $emit = $this->tree->emitToken($token);
+
+        if(is_int($emit)) {
+            $this->content_model = $emit;
+
+        } elseif($token['type'] === self::ENDTAG) {
+            $this->content_model = self::PCDATA;
+        }
+    }
+
+    private function EOF() {
+        $this->state = null;
+        $this->tree->emitToken(array(
+            'type' => self::EOF
+        ));
+    }
+}
+
+class HTML5TreeConstructer {
+    public $stack = array();
+
+    private $phase;
+    private $mode;
+    private $dom;
+    private $foster_parent = null;
+    private $a_formatting  = array();
+
+    private $head_pointer = null;
+    private $form_pointer = null;
+
+    private $scoping = array('button','caption','html','marquee','object','table','td','th');
+    private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
+    private $special = array('address','area','base','basefont','bgsound',
+    'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
+    'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
+    'h6','head','hr','iframe','image','img','input','isindex','li','link',
+    'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
+    'option','p','param','plaintext','pre','script','select','spacer','style',
+    'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
+
+    // The different phases.
+    const INIT_PHASE = 0;
+    const ROOT_PHASE = 1;
+    const MAIN_PHASE = 2;
+    const END_PHASE  = 3;
+
+    // The different insertion modes for the main phase.
+    const BEFOR_HEAD = 0;
+    const IN_HEAD    = 1;
+    const AFTER_HEAD = 2;
+    const IN_BODY    = 3;
+    const IN_TABLE   = 4;
+    const IN_CAPTION = 5;
+    const IN_CGROUP  = 6;
+    const IN_TBODY   = 7;
+    const IN_ROW     = 8;
+    const IN_CELL    = 9;
+    const IN_SELECT  = 10;
+    const AFTER_BODY = 11;
+    const IN_FRAME   = 12;
+    const AFTR_FRAME = 13;
+
+    // The different types of elements.
+    const SPECIAL    = 0;
+    const SCOPING    = 1;
+    const FORMATTING = 2;
+    const PHRASING   = 3;
+
+    const MARKER     = 0;
+
+    public function __construct() {
+        $this->phase = self::INIT_PHASE;
+        $this->mode = self::BEFOR_HEAD;
+        $this->dom = new DOMDocument;
+
+        $this->dom->encoding = 'UTF-8';
+        $this->dom->preserveWhiteSpace = true;
+        $this->dom->substituteEntities = true;
+        $this->dom->strictErrorChecking = false;
+    }
+
+    // Process tag tokens
+    public function emitToken($token) {
+        switch($this->phase) {
+            case self::INIT_PHASE: return $this->initPhase($token); break;
+            case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
+            case self::MAIN_PHASE: return $this->mainPhase($token); break;
+            case self::END_PHASE : return $this->trailingEndPhase($token); break;
+        }
+    }
+
+    private function initPhase($token) {
+        /* Initially, the tree construction stage must handle each token
+        emitted from the tokenisation stage as follows: */
+
+        /* A DOCTYPE token that is marked as being in error
+        A comment token
+        A start tag token
+        An end tag token
+        A character token that is not one of one of U+0009 CHARACTER TABULATION,
+            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+            or U+0020 SPACE
+        An end-of-file token */
+        if((isset($token['error']) && $token['error']) ||
+        $token['type'] === HTML5::COMMENT ||
+        $token['type'] === HTML5::STARTTAG ||
+        $token['type'] === HTML5::ENDTAG ||
+        $token['type'] === HTML5::EOF ||
+        ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
+        !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
+            /* This specification does not define how to handle this case. In
+            particular, user agents may ignore the entirety of this specification
+            altogether for such documents, and instead invoke special parse modes
+            with a greater emphasis on backwards compatibility. */
+
+            $this->phase = self::ROOT_PHASE;
+            return $this->rootElementPhase($token);
+
+        /* A DOCTYPE token marked as being correct */
+        } elseif(isset($token['error']) && !$token['error']) {
+            /* Append a DocumentType node to the Document  node, with the name
+            attribute set to the name given in the DOCTYPE token (which will be
+            "HTML"), and the other attributes specific to DocumentType objects
+            set to null, empty lists, or the empty string as appropriate. */
+            $doctype = new DOMDocumentType(null, null, 'HTML');
+
+            /* Then, switch to the root element phase of the tree construction
+            stage. */
+            $this->phase = self::ROOT_PHASE;
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
+        $token['data'])) {
+            /* Append that character  to the Document node. */
+            $text = $this->dom->createTextNode($token['data']);
+            $this->dom->appendChild($text);
+        }
+    }
+
+    private function rootElementPhase($token) {
+        /* After the initial phase, as each token is emitted from the tokenisation
+        stage, it must be processed as described in this section. */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the Document object with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->dom->appendChild($comment);
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append that character  to the Document node. */
+            $text = $this->dom->createTextNode($token['data']);
+            $this->dom->appendChild($text);
+
+        /* A character token that is not one of U+0009 CHARACTER TABULATION,
+            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
+            (FF), or U+0020 SPACE
+        A start tag token
+        An end tag token
+        An end-of-file token */
+        } elseif(($token['type'] === HTML5::CHARACTR &&
+        !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+        $token['type'] === HTML5::STARTTAG ||
+        $token['type'] === HTML5::ENDTAG ||
+        $token['type'] === HTML5::EOF) {
+            /* Create an HTMLElement node with the tag name html, in the HTML
+            namespace. Append it to the Document object. Switch to the main
+            phase and reprocess the current token. */
+            $html = $this->dom->createElement('html');
+            $this->dom->appendChild($html);
+            $this->stack[] = $html;
+
+            $this->phase = self::MAIN_PHASE;
+            return $this->mainPhase($token);
+        }
+    }
+
+    private function mainPhase($token) {
+        /* Tokens in the main phase must be handled as follows: */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A start tag token with the tag name "html" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
+            /* If this start tag token was not the first start tag token, then
+            it is a parse error. */
+
+            /* For each attribute on the token, check to see if the attribute
+            is already present on the top element of the stack of open elements.
+            If it is not, add the attribute and its corresponding value to that
+            element. */
+            foreach($token['attr'] as $attr) {
+                if(!$this->stack[0]->hasAttribute($attr['name'])) {
+                    $this->stack[0]->setAttribute($attr['name'], $attr['value']);
+                }
+            }
+
+        /* An end-of-file token */
+        } elseif($token['type'] === HTML5::EOF) {
+            /* Generate implied end tags. */
+            $this->generateImpliedEndTags();
+
+        /* Anything else. */
+        } else {
+            /* Depends on the insertion mode: */
+            switch($this->mode) {
+                case self::BEFOR_HEAD: return $this->beforeHead($token); break;
+                case self::IN_HEAD:    return $this->inHead($token); break;
+                case self::AFTER_HEAD: return $this->afterHead($token); break;
+                case self::IN_BODY:    return $this->inBody($token); break;
+                case self::IN_TABLE:   return $this->inTable($token); break;
+                case self::IN_CAPTION: return $this->inCaption($token); break;
+                case self::IN_CGROUP:  return $this->inColumnGroup($token); break;
+                case self::IN_TBODY:   return $this->inTableBody($token); break;
+                case self::IN_ROW:     return $this->inRow($token); break;
+                case self::IN_CELL:    return $this->inCell($token); break;
+                case self::IN_SELECT:  return $this->inSelect($token); break;
+                case self::AFTER_BODY: return $this->afterBody($token); break;
+                case self::IN_FRAME:   return $this->inFrameset($token); break;
+                case self::AFTR_FRAME: return $this->afterFrameset($token); break;
+                case self::END_PHASE:  return $this->trailingEndPhase($token); break;
+            }
+        }
+    }
+
+    private function beforeHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token with the tag name "head" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
+            /* Create an element for the token, append the new element to the
+            current node and push it onto the stack of open elements. */
+            $element = $this->insertElement($token);
+
+            /* Set the head element pointer to this new element node. */
+            $this->head_pointer = $element;
+
+            /* Change the insertion mode to "in head". */
+            $this->mode = self::IN_HEAD;
+
+        /* A start tag token whose tag name is one of: "base", "link", "meta",
+        "script", "style", "title". Or an end tag with the tag name "html".
+        Or a character token that is not one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE. Or any other start tag token */
+        } elseif($token['type'] === HTML5::STARTTAG ||
+        ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
+        ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
+        $token['data']))) {
+            /* Act as if a start tag token with the tag name "head" and no
+            attributes had been seen, then reprocess the current token. */
+            $this->beforeHead(array(
+                'name' => 'head',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inHead($token);
+
+        /* Any other end tag */
+        } elseif($token['type'] === HTML5::ENDTAG) {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function inHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE.
+
+        THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
+        or script element, append the character to the current node regardless
+        of its content. */
+        if(($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
+        $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
+        array('title', 'style', 'script')))) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('title', 'style', 'script'))) {
+            array_pop($this->stack);
+            return HTML5::PCDATA;
+
+        /* A start tag with the tag name "title" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+
+            } else {
+                $element = $this->insertElement($token);
+            }
+
+            /* Switch the tokeniser's content model flag  to the RCDATA state. */
+            return HTML5::RCDATA;
+
+        /* A start tag with the tag name "style" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+
+            } else {
+                $this->insertElement($token);
+            }
+
+            /* Switch the tokeniser's content model flag  to the CDATA state. */
+            return HTML5::CDATA;
+
+        /* A start tag with the tag name "script" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
+            /* Create an element for the token. */
+            $element = $this->insertElement($token, false);
+            $this->head_pointer->appendChild($element);
+
+            /* Switch the tokeniser's content model flag  to the CDATA state. */
+            return HTML5::CDATA;
+
+        /* A start tag with the tag name "base", "link", or "meta" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('base', 'link', 'meta'))) {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+                array_pop($this->stack);
+
+            } else {
+                $this->insertElement($token);
+            }
+
+        /* An end tag with the tag name "head" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
+            /* If the current node is a head element, pop the current node off
+            the stack of open elements. */
+            if($this->head_pointer->isSameNode(end($this->stack))) {
+                array_pop($this->stack);
+
+            /* Otherwise, this is a parse error. */
+            } else {
+                // k
+            }
+
+            /* Change the insertion mode to "after head". */
+            $this->mode = self::AFTER_HEAD;
+
+        /* A start tag with the tag name "head" or an end tag except "html". */
+        } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
+        ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* If the current node is a head element, act as if an end tag
+            token with the tag name "head" had been seen. */
+            if($this->head_pointer->isSameNode(end($this->stack))) {
+                $this->inHead(array(
+                    'name' => 'head',
+                    'type' => HTML5::ENDTAG
+                ));
+
+            /* Otherwise, change the insertion mode to "after head". */
+            } else {
+                $this->mode = self::AFTER_HEAD;
+            }
+
+            /* Then, reprocess the current token. */
+            return $this->afterHead($token);
+        }
+    }
+
+    private function afterHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token with the tag name "body" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
+            /* Insert a body element for the token. */
+            $this->insertElement($token);
+
+            /* Change the insertion mode to "in body". */
+            $this->mode = self::IN_BODY;
+
+        /* A start tag token with the tag name "frameset" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
+            /* Insert a frameset element for the token. */
+            $this->insertElement($token);
+
+            /* Change the insertion mode to "in frameset". */
+            $this->mode = self::IN_FRAME;
+
+        /* A start tag token whose tag name is one of: "base", "link", "meta",
+        "script", "style", "title" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('base', 'link', 'meta', 'script', 'style', 'title'))) {
+            /* Parse error. Switch the insertion mode back to "in head" and
+            reprocess the token. */
+            $this->mode = self::IN_HEAD;
+            return $this->inHead($token);
+
+        /* Anything else */
+        } else {
+            /* Act as if a start tag token with the tag name "body" and no
+            attributes had been seen, and then reprocess the current token. */
+            $this->afterHead(array(
+                'name' => 'body',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inBody($token);
+        }
+    }
+
+    private function inBody($token) {
+        /* Handle the token as follows: */
+
+        switch($token['type']) {
+            /* A character token */
+            case HTML5::CHARACTR:
+                /* Reconstruct the active formatting elements, if any. */
+                $this->reconstructActiveFormattingElements();
+
+                /* Append the token's character to the current node. */
+                $this->insertText($token['data']);
+            break;
+
+            /* A comment token */
+            case HTML5::COMMENT:
+                /* Append a Comment node to the current node with the data
+                attribute set to the data given in the comment token. */
+                $this->insertComment($token['data']);
+            break;
+
+            case HTML5::STARTTAG:
+            switch($token['name']) {
+                /* A start tag token whose tag name is one of: "script",
+                "style" */
+                case 'script': case 'style':
+                    /* Process the token as if the insertion mode had been "in
+                    head". */
+                    return $this->inHead($token);
+                break;
+
+                /* A start tag token whose tag name is one of: "base", "link",
+                "meta", "title" */
+                case 'base': case 'link': case 'meta': case 'title':
+                    /* Parse error. Process the token as if the insertion mode
+                    had    been "in head". */
+                    return $this->inHead($token);
+                break;
+
+                /* A start tag token with the tag name "body" */
+                case 'body':
+                    /* Parse error. If the second element on the stack of open
+                    elements is not a body element, or, if the stack of open
+                    elements has only one node on it, then ignore the token.
+                    (innerHTML case) */
+                    if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
+                        // Ignore
+
+                    /* Otherwise, for each attribute on the token, check to see
+                    if the attribute is already present on the body element (the
+                    second element)    on the stack of open elements. If it is not,
+                    add the attribute and its corresponding value to that
+                    element. */
+                    } else {
+                        foreach($token['attr'] as $attr) {
+                            if(!$this->stack[1]->hasAttribute($attr['name'])) {
+                                $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+                            }
+                        }
+                    }
+                break;
+
+                /* A start tag whose tag name is one of: "address",
+                "blockquote", "center", "dir", "div", "dl", "fieldset",
+                "listing", "menu", "ol", "p", "ul" */
+                case 'address': case 'blockquote': case 'center': case 'dir':
+                case 'div': case 'dl': case 'fieldset': case 'listing':
+                case 'menu': case 'ol': case 'p': case 'ul':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag whose tag name is "form" */
+                case 'form':
+                    /* If the form element pointer is not null, ignore the
+                    token with a parse error. */
+                    if($this->form_pointer !== null) {
+                        // Ignore.
+
+                    /* Otherwise: */
+                    } else {
+                        /* If the stack of open elements has a p element in
+                        scope, then act as if an end tag with the tag name p
+                        had been seen. */
+                        if($this->elementInScope('p')) {
+                            $this->emitToken(array(
+                                'name' => 'p',
+                                'type' => HTML5::ENDTAG
+                            ));
+                        }
+
+                        /* Insert an HTML element for the token, and set the
+                        form element pointer to point to the element created. */
+                        $element = $this->insertElement($token);
+                        $this->form_pointer = $element;
+                    }
+                break;
+
+                /* A start tag whose tag name is "li", "dd" or "dt" */
+                case 'li': case 'dd': case 'dt':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    $stack_length = count($this->stack) - 1;
+
+                    for($n = $stack_length; 0 <= $n; $n--) {
+                        /* 1. Initialise node to be the current node (the
+                        bottommost node of the stack). */
+                        $stop = false;
+                        $node = $this->stack[$n];
+                        $cat  = $this->getElementCategory($node->tagName);
+
+                        /* 2. If node is an li, dd or dt element, then pop all
+                        the    nodes from the current node up to node, including
+                        node, then stop this algorithm. */
+                        if($token['name'] === $node->tagName ||    ($token['name'] !== 'li'
+                        && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
+                            for($x = $stack_length; $x >= $n ; $x--) {
+                                array_pop($this->stack);
+                            }
+
+                            break;
+                        }
+
+                        /* 3. If node is not in the formatting category, and is
+                        not    in the phrasing category, and is not an address or
+                        div element, then stop this algorithm. */
+                        if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
+                        $node->tagName !== 'address' && $node->tagName !== 'div') {
+                            break;
+                        }
+                    }
+
+                    /* Finally, insert an HTML element with the same tag
+                    name as the    token's. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag token whose tag name is "plaintext" */
+                case 'plaintext':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    return HTML5::PLAINTEXT;
+                break;
+
+                /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+                "h5", "h6" */
+                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* If the stack of open elements has in scope an element whose
+                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+                    this is a parse error; pop elements from the stack until an
+                    element with one of those tag names has been popped from the
+                    stack. */
+                    while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
+                        array_pop($this->stack);
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag whose tag name is "a" */
+                case 'a':
+                    /* If the list of active formatting elements contains
+                    an element whose tag name is "a" between the end of the
+                    list and the last marker on the list (or the start of
+                    the list if there is no marker on the list), then this
+                    is a parse error; act as if an end tag with the tag name
+                    "a" had been seen, then remove that element from the list
+                    of active formatting elements and the stack of open
+                    elements if the end tag didn't already remove it (it
+                    might not have if the element is not in table scope). */
+                    $leng = count($this->a_formatting);
+
+                    for($n = $leng - 1; $n >= 0; $n--) {
+                        if($this->a_formatting[$n] === self::MARKER) {
+                            break;
+
+                        } elseif($this->a_formatting[$n]->nodeName === 'a') {
+                            $this->emitToken(array(
+                                'name' => 'a',
+                                'type' => HTML5::ENDTAG
+                            ));
+                            break;
+                        }
+                    }
+
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $el = $this->insertElement($token);
+
+                    /* Add that element to the list of active formatting
+                    elements. */
+                    $this->a_formatting[] = $el;
+                break;
+
+                /* A start tag whose tag name is one of: "b", "big", "em", "font",
+                "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+                case 'b': case 'big': case 'em': case 'font': case 'i':
+                case 'nobr': case 's': case 'small': case 'strike':
+                case 'strong': case 'tt': case 'u':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $el = $this->insertElement($token);
+
+                    /* Add that element to the list of active formatting
+                    elements. */
+                    $this->a_formatting[] = $el;
+                break;
+
+                /* A start tag token whose tag name is "button" */
+                case 'button':
+                    /* If the stack of open elements has a button element in scope,
+                    then this is a parse error; act as if an end tag with the tag
+                    name "button" had been seen, then reprocess the token. (We don't
+                    do that. Unnecessary.) */
+                    if($this->elementInScope('button')) {
+                        $this->inBody(array(
+                            'name' => 'button',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Insert a marker at the end of the list of active
+                    formatting elements. */
+                    $this->a_formatting[] = self::MARKER;
+                break;
+
+                /* A start tag token whose tag name is one of: "marquee", "object" */
+                case 'marquee': case 'object':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Insert a marker at the end of the list of active
+                    formatting elements. */
+                    $this->a_formatting[] = self::MARKER;
+                break;
+
+                /* A start tag token whose tag name is "xmp" */
+                case 'xmp':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Switch the content model flag to the CDATA state. */
+                    return HTML5::CDATA;
+                break;
+
+                /* A start tag whose tag name is "table" */
+                case 'table':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Change the insertion mode to "in table". */
+                    $this->mode = self::IN_TABLE;
+                break;
+
+                /* A start tag whose tag name is one of: "area", "basefont",
+                "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
+                case 'area': case 'basefont': case 'bgsound': case 'br':
+                case 'embed': case 'img': case 'param': case 'spacer':
+                case 'wbr':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Immediately pop the current node off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "hr" */
+                case 'hr':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Immediately pop the current node off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "image" */
+                case 'image':
+                    /* Parse error. Change the token's tag name to "img" and
+                    reprocess it. (Don't ask.) */
+                    $token['name'] = 'img';
+                    return $this->inBody($token);
+                break;
+
+                /* A start tag whose tag name is "input" */
+                case 'input':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an input element for the token. */
+                    $element = $this->insertElement($token, false);
+
+                    /* If the form element pointer is not null, then associate the
+                    input element with the form element pointed to by the form
+                    element pointer. */
+                    $this->form_pointer !== null
+                        ? $this->form_pointer->appendChild($element)
+                        : end($this->stack)->appendChild($element);
+
+                    /* Pop that input element off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "isindex" */
+                case 'isindex':
+                    /* Parse error. */
+                    // w/e
+
+                    /* If the form element pointer is not null,
+                    then ignore the token. */
+                    if($this->form_pointer === null) {
+                        /* Act as if a start tag token with the tag name "form" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'body',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "hr" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'hr',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "p" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'p',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "label"
+                        had been seen. */
+                        $this->inBody(array(
+                            'name' => 'label',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a stream of character tokens had been seen. */
+                        $this->insertText('This is a searchable index. '.
+                        'Insert your search keywords here: ');
+
+                        /* Act as if a start tag token with the tag name "input"
+                        had been seen, with all the attributes from the "isindex"
+                        token, except with the "name" attribute set to the value
+                        "isindex" (ignoring any explicit "name" attribute). */
+                        $attr = $token['attr'];
+                        $attr[] = array('name' => 'name', 'value' => 'isindex');
+
+                        $this->inBody(array(
+                            'name' => 'input',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => $attr
+                        ));
+
+                        /* Act as if a stream of character tokens had been seen
+                        (see below for what they should say). */
+                        $this->insertText('This is a searchable index. '.
+                        'Insert your search keywords here: ');
+
+                        /* Act as if an end tag token with the tag name "label"
+                        had been seen. */
+                        $this->inBody(array(
+                            'name' => 'label',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if an end tag token with the tag name "p" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if a start tag token with the tag name "hr" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'hr',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if an end tag token with the tag name "form" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'form',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+                break;
+
+                /* A start tag whose tag name is "textarea" */
+                case 'textarea':
+                    $this->insertElement($token);
+
+                    /* Switch the tokeniser's content model flag to the
+                    RCDATA state. */
+                    return HTML5::RCDATA;
+                break;
+
+                /* A start tag whose tag name is one of: "iframe", "noembed",
+                "noframes" */
+                case 'iframe': case 'noembed': case 'noframes':
+                    $this->insertElement($token);
+
+                    /* Switch the tokeniser's content model flag to the CDATA state. */
+                    return HTML5::CDATA;
+                break;
+
+                /* A start tag whose tag name is "select" */
+                case 'select':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Change the insertion mode to "in select". */
+                    $this->mode = self::IN_SELECT;
+                break;
+
+                /* A start or end tag whose tag name is one of: "caption", "col",
+                "colgroup", "frame", "frameset", "head", "option", "optgroup",
+                "tbody", "td", "tfoot", "th", "thead", "tr". */
+                case 'caption': case 'col': case 'colgroup': case 'frame':
+                case 'frameset': case 'head': case 'option': case 'optgroup':
+                case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
+                case 'tr':
+                    // Parse error. Ignore the token.
+                break;
+
+                /* A start or end tag whose tag name is one of: "event-source",
+                "section", "nav", "article", "aside", "header", "footer",
+                "datagrid", "command" */
+                case 'event-source': case 'section': case 'nav': case 'article':
+                case 'aside': case 'header': case 'footer': case 'datagrid':
+                case 'command':
+                    // Work in progress!
+                break;
+
+                /* A start tag token not covered by the previous entries */
+                default:
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    $this->insertElement($token);
+                break;
+            }
+            break;
+
+            case HTML5::ENDTAG:
+            switch($token['name']) {
+                /* An end tag with the tag name "body" */
+                case 'body':
+                    /* If the second element in the stack of open elements is
+                    not a body element, this is a parse error. Ignore the token.
+                    (innerHTML case) */
+                    if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
+                        // Ignore.
+
+                    /* If the current node is not the body element, then this
+                    is a parse error. */
+                    } elseif(end($this->stack)->nodeName !== 'body') {
+                        // Parse error.
+                    }
+
+                    /* Change the insertion mode to "after body". */
+                    $this->mode = self::AFTER_BODY;
+                break;
+
+                /* An end tag with the tag name "html" */
+                case 'html':
+                    /* Act as if an end tag with tag name "body" had been seen,
+                    then, if that token wasn't ignored, reprocess the current
+                    token. */
+                    $this->inBody(array(
+                        'name' => 'body',
+                        'type' => HTML5::ENDTAG
+                    ));
+
+                    return $this->afterBody($token);
+                break;
+
+                /* An end tag whose tag name is one of: "address", "blockquote",
+                "center", "dir", "div", "dl", "fieldset", "listing", "menu",
+                "ol", "pre", "ul" */
+                case 'address': case 'blockquote': case 'center': case 'dir':
+                case 'div': case 'dl': case 'fieldset': case 'listing':
+                case 'menu': case 'ol': case 'pre': case 'ul':
+                    /* If the stack of open elements has an element in scope
+                    with the same tag name as that of the token, then generate
+                    implied end tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with
+                        the same tag name as that of the token, then this
+                        is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has an element in
+                        scope with the same tag name as that of the token,
+                        then pop elements from this stack until an element
+                        with that tag name has been popped from the stack. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is "form" */
+                case 'form':
+                    /* If the stack of open elements has an element in scope
+                    with the same tag name as that of the token, then generate
+                    implied    end tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                    } 
+
+                    if(end($this->stack)->nodeName !== $token['name']) {
+                        /* Now, if the current node is not an element with the
+                        same tag name as that of the token, then this is a parse
+                        error. */
+                        // w/e
+
+                    } else {
+                        /* Otherwise, if the current node is an element with
+                        the same tag name as that of the token pop that element
+                        from the stack. */
+                        array_pop($this->stack);
+                    }
+
+                    /* In any case, set the form element pointer to null. */
+                    $this->form_pointer = null;
+                break;
+
+                /* An end tag whose tag name is "p" */
+                case 'p':
+                    /* If the stack of open elements has a p element in scope,
+                    then generate implied end tags, except for p elements. */
+                    if($this->elementInScope('p')) {
+                        $this->generateImpliedEndTags(array('p'));
+
+                        /* If the current node is not a p element, then this is
+                        a parse error. */
+                        // k
+
+                        /* If the stack of open elements has a p element in
+                        scope, then pop elements from this stack until the stack
+                        no longer has a p element in scope. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->elementInScope('p')) {
+                                array_pop($this->stack);
+
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is "dd", "dt", or "li" */
+                case 'dd': case 'dt': case 'li':
+                    /* If the stack of open elements has an element in scope
+                    whose tag name matches the tag name of the token, then
+                    generate implied end tags, except for elements with the
+                    same tag name as the token. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags(array($token['name']));
+
+                        /* If the current node is not an element with the same
+                        tag name as the token, then this is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has an element in scope
+                        whose tag name matches the tag name of the token, then
+                        pop elements from this stack until an element with that
+                        tag name has been popped from the stack. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
+                "h5", "h6" */
+                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+                    $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+
+                    /* If the stack of open elements has in scope an element whose
+                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+                    generate implied end tags. */
+                    if($this->elementInScope($elements)) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with the same
+                        tag name as that of the token, then this is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has in scope an element
+                        whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+                        "h6", then pop elements from the stack until an element
+                        with one of those tag names has been popped from the stack. */
+                        while($this->elementInScope($elements)) {
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is one of: "a", "b", "big", "em",
+                "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+                case 'a': case 'b': case 'big': case 'em': case 'font':
+                case 'i': case 'nobr': case 's': case 'small': case 'strike':
+                case 'strong': case 'tt': case 'u':
+                    /* 1. Let the formatting element be the last element in
+                    the list of active formatting elements that:
+                        * is between the end of the list and the last scope
+                        marker in the list, if any, or the start of the list
+                        otherwise, and
+                        * has the same tag name as the token.
+                    */
+                    while(true) {
+                        for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
+                            if($this->a_formatting[$a] === self::MARKER) {
+                                break;
+
+                            } elseif($this->a_formatting[$a]->tagName === $token['name']) {
+                                $formatting_element = $this->a_formatting[$a];
+                                $in_stack = in_array($formatting_element, $this->stack, true);
+                                $fe_af_pos = $a;
+                                break;
+                            }
+                        }
+
+                        /* If there is no such node, or, if that node is
+                        also in the stack of open elements but the element
+                        is not in scope, then this is a parse error. Abort
+                        these steps. The token is ignored. */
+                        if(!isset($formatting_element) || ($in_stack &&
+                        !$this->elementInScope($token['name']))) {
+                            break;
+
+                        /* Otherwise, if there is such a node, but that node
+                        is not in the stack of open elements, then this is a
+                        parse error; remove the element from the list, and
+                        abort these steps. */
+                        } elseif(isset($formatting_element) && !$in_stack) {
+                            unset($this->a_formatting[$fe_af_pos]);
+                            $this->a_formatting = array_merge($this->a_formatting);
+                            break;
+                        }
+
+                        /* 2. Let the furthest block be the topmost node in the
+                        stack of open elements that is lower in the stack
+                        than the formatting element, and is not an element in
+                        the phrasing or formatting categories. There might
+                        not be one. */
+                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
+                        $length = count($this->stack);
+
+                        for($s = $fe_s_pos + 1; $s < $length; $s++) {
+                            $category = $this->getElementCategory($this->stack[$s]->nodeName);
+
+                            if($category !== self::PHRASING && $category !== self::FORMATTING) {
+                                $furthest_block = $this->stack[$s];
+                            }
+                        }
+
+                        /* 3. If there is no furthest block, then the UA must
+                        skip the subsequent steps and instead just pop all
+                        the nodes from the bottom of the stack of open
+                        elements, from the current node up to the formatting
+                        element, and remove the formatting element from the
+                        list of active formatting elements. */
+                        if(!isset($furthest_block)) {
+                            for($n = $length - 1; $n >= $fe_s_pos; $n--) {
+                                array_pop($this->stack);
+                            }
+
+                            unset($this->a_formatting[$fe_af_pos]);
+                            $this->a_formatting = array_merge($this->a_formatting);
+                            break;
+                        }
+
+                        /* 4. Let the common ancestor be the element
+                        immediately above the formatting element in the stack
+                        of open elements. */
+                        $common_ancestor = $this->stack[$fe_s_pos - 1];
+
+                        /* 5. If the furthest block has a parent node, then
+                        remove the furthest block from its parent node. */
+                        if($furthest_block->parentNode !== null) {
+                            $furthest_block->parentNode->removeChild($furthest_block);
+                        }
+
+                        /* 6. Let a bookmark note the position of the
+                        formatting element in the list of active formatting
+                        elements relative to the elements on either side
+                        of it in the list. */
+                        $bookmark = $fe_af_pos;
+
+                        /* 7. Let node and last node  be the furthest block.
+                        Follow these steps: */
+                        $node = $furthest_block;
+                        $last_node = $furthest_block;
+
+                        while(true) {
+                            for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
+                                /* 7.1 Let node be the element immediately
+                                prior to node in the stack of open elements. */
+                                $node = $this->stack[$n];
+
+                                /* 7.2 If node is not in the list of active
+                                formatting elements, then remove node from
+                                the stack of open elements and then go back
+                                to step 1. */
+                                if(!in_array($node, $this->a_formatting, true)) {
+                                    unset($this->stack[$n]);
+                                    $this->stack = array_merge($this->stack);
+
+                                } else {
+                                    break;
+                                }
+                            }
+
+                            /* 7.3 Otherwise, if node is the formatting
+                            element, then go to the next step in the overall
+                            algorithm. */
+                            if($node === $formatting_element) {
+                                break;
+
+                            /* 7.4 Otherwise, if last node is the furthest
+                            block, then move the aforementioned bookmark to
+                            be immediately after the node in the list of
+                            active formatting elements. */
+                            } elseif($last_node === $furthest_block) {
+                                $bookmark = array_search($node, $this->a_formatting, true) + 1;
+                            }
+
+                            /* 7.5 If node has any children, perform a
+                            shallow clone of node, replace the entry for
+                            node in the list of active formatting elements
+                            with an entry for the clone, replace the entry
+                            for node in the stack of open elements with an
+                            entry for the clone, and let node be the clone. */
+                            if($node->hasChildNodes()) {
+                                $clone = $node->cloneNode();
+                                $s_pos = array_search($node, $this->stack, true);
+                                $a_pos = array_search($node, $this->a_formatting, true);
+
+                                $this->stack[$s_pos] = $clone;
+                                $this->a_formatting[$a_pos] = $clone;
+                                $node = $clone;
+                            }
+
+                            /* 7.6 Insert last node into node, first removing
+                            it from its previous parent node if any. */
+                            if($last_node->parentNode !== null) {
+                                $last_node->parentNode->removeChild($last_node);
+                            }
+
+                            $node->appendChild($last_node);
+
+                            /* 7.7 Let last node be node. */
+                            $last_node = $node;
+                        }
+
+                        /* 8. Insert whatever last node ended up being in
+                        the previous step into the common ancestor node,
+                        first removing it from its previous parent node if
+                        any. */
+                        if($last_node->parentNode !== null) {
+                            $last_node->parentNode->removeChild($last_node);
+                        }
+
+                        $common_ancestor->appendChild($last_node);
+
+                        /* 9. Perform a shallow clone of the formatting
+                        element. */
+                        $clone = $formatting_element->cloneNode();
+
+                        /* 10. Take all of the child nodes of the furthest
+                        block and append them to the clone created in the
+                        last step. */
+                        while($furthest_block->hasChildNodes()) {
+                            $child = $furthest_block->firstChild;
+                            $furthest_block->removeChild($child);
+                            $clone->appendChild($child);
+                        }
+
+                        /* 11. Append that clone to the furthest block. */
+                        $furthest_block->appendChild($clone);
+
+                        /* 12. Remove the formatting element from the list
+                        of active formatting elements, and insert the clone
+                        into the list of active formatting elements at the
+                        position of the aforementioned bookmark. */
+                        $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
+                        unset($this->a_formatting[$fe_af_pos]);
+                        $this->a_formatting = array_merge($this->a_formatting);
+
+                        $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
+                        $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
+                        $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
+
+                        /* 13. Remove the formatting element from the stack
+                        of open elements, and insert the clone into the stack
+                        of open elements immediately after (i.e. in a more
+                        deeply nested position than) the position of the
+                        furthest block in that stack. */
+                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
+                        $fb_s_pos = array_search($furthest_block, $this->stack, true);
+                        unset($this->stack[$fe_s_pos]);
+
+                        $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
+                        $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
+                        $this->stack = array_merge($s_part1, array($clone), $s_part2);
+
+                        /* 14. Jump back to step 1 in this series of steps. */
+                        unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
+                    }
+                break;
+
+                /* An end tag token whose tag name is one of: "button",
+                "marquee", "object" */
+                case 'button': case 'marquee': case 'object':
+                    /* If the stack of open elements has an element in scope whose
+                    tag name matches the tag name of the token, then generate implied
+                    tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with the same
+                        tag name as the token, then this is a parse error. */
+                        // k
+
+                        /* Now, if the stack of open elements has an element in scope
+                        whose tag name matches the tag name of the token, then pop
+                        elements from the stack until that element has been popped from
+                        the stack, and clear the list of active formatting elements up
+                        to the last marker. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+
+                        $marker = end(array_keys($this->a_formatting, self::MARKER, true));
+
+                        for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
+                            array_pop($this->a_formatting);
+                        }
+                    }
+                break;
+
+                /* Or an end tag whose tag name is one of: "area", "basefont",
+                "bgsound", "br", "embed", "hr", "iframe", "image", "img",
+                "input", "isindex", "noembed", "noframes", "param", "select",
+                "spacer", "table", "textarea", "wbr" */
+                case 'area': case 'basefont': case 'bgsound': case 'br':
+                case 'embed': case 'hr': case 'iframe': case 'image':
+                case 'img': case 'input': case 'isindex': case 'noembed':
+                case 'noframes': case 'param': case 'select': case 'spacer':
+                case 'table': case 'textarea': case 'wbr':
+                    // Parse error. Ignore the token.
+                break;
+
+                /* An end tag token not covered by the previous entries */
+                default:
+                    for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                        /* Initialise node to be the current node (the bottommost
+                        node of the stack). */
+                        $node = end($this->stack);
+
+                        /* If node has the same tag name as the end tag token,
+                        then: */
+                        if($token['name'] === $node->nodeName) {
+                            /* Generate implied end tags. */
+                            $this->generateImpliedEndTags();
+
+                            /* If the tag name of the end tag token does not
+                            match the tag name of the current node, this is a
+                            parse error. */
+                            // k
+
+                            /* Pop all the nodes from the current node up to
+                            node, including node, then stop this algorithm. */
+                            for($x = count($this->stack) - $n; $x >= $n; $x--) {
+                                array_pop($this->stack);
+                            }
+                                    
+                        } else {
+                            $category = $this->getElementCategory($node);
+
+                            if($category !== self::SPECIAL && $category !== self::SCOPING) {
+                                /* Otherwise, if node is in neither the formatting
+                                category nor the phrasing category, then this is a
+                                parse error. Stop this algorithm. The end tag token
+                                is ignored. */
+                                return false;
+                            }
+                        }
+                    }
+                break;
+            }
+            break;
+        }
+    }
+
+    private function inTable($token) {
+        $clear = array('html', 'table');
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $text = $this->dom->createTextNode($token['data']);
+            end($this->stack)->appendChild($text);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            end($this->stack)->appendChild($comment);
+
+        /* A start tag whose tag name is "caption" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'caption') {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert a marker at the end of the list of active
+            formatting elements. */
+            $this->a_formatting[] = self::MARKER;
+
+            /* Insert an HTML element for the token, then switch the
+            insertion mode to "in caption". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CAPTION;
+
+        /* A start tag whose tag name is "colgroup" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'colgroup') {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the
+            insertion mode to "in column group". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CGROUP;
+
+        /* A start tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'col') {
+            $this->inTable(array(
+                'name' => 'colgroup',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            $this->inColumnGroup($token);
+
+        /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('tbody', 'tfoot', 'thead'))) {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the insertion
+            mode to "in table body". */
+            $this->insertElement($token);
+            $this->mode = self::IN_TBODY;
+
+        /* A start tag whose tag name is one of: "td", "th", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        in_array($token['name'], array('td', 'th', 'tr'))) {
+            /* Act as if a start tag token with the tag name "tbody" had been
+            seen, then reprocess the current token. */
+            $this->inTable(array(
+                'name' => 'tbody',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inTableBody($token);
+
+        /* A start tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'table') {
+            /* Parse error. Act as if an end tag token with the tag name "table"
+            had been seen, then, if that token wasn't ignored, reprocess the
+            current token. */
+            $this->inTable(array(
+                'name' => 'table',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->mainPhase($token);
+
+        /* An end tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'table') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                return false;
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags. */
+                $this->generateImpliedEndTags();
+
+                /* Now, if the current node is not a table element, then this
+                is a parse error. */
+                // w/e
+
+                /* Pop elements from this stack until a table element has been
+                popped from the stack. */
+                while(true) {
+                    $current = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($current === 'table') {
+                        break;
+                    }
+                }
+
+                /* Reset the insertion mode appropriately. */
+                $this->resetInsertionMode();
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
+        'tfoot', 'th', 'thead', 'tr'))) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* Parse error. Process the token as if the insertion mode was "in
+            body", with the following exception: */
+
+            /* If the current node is a table, tbody, tfoot, thead, or tr
+            element, then, whenever a node would be inserted into the current
+            node, it must instead be inserted into the foster parent element. */
+            if(in_array(end($this->stack)->nodeName,
+            array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+                /* The foster parent element is the parent element of the last
+                table element in the stack of open elements, if there is a
+                table element and it has such a parent element. If there is no
+                table element in the stack of open elements (innerHTML case),
+                then the foster parent element is the first element in the
+                stack of open elements (the html  element). Otherwise, if there
+                is a table element in the stack of open elements, but the last
+                table element in the stack of open elements has no parent, or
+                its parent node is not an element, then the foster parent
+                element is the element before the last table element in the
+                stack of open elements. */
+                for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                    if($this->stack[$n]->nodeName === 'table') {
+                        $table = $this->stack[$n];
+                        break;
+                    }
+                }
+
+                if(isset($table) && $table->parentNode !== null) {
+                    $this->foster_parent = $table->parentNode;
+
+                } elseif(!isset($table)) {
+                    $this->foster_parent = $this->stack[0];
+
+                } elseif(isset($table) && ($table->parentNode === null ||
+                $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
+                    $this->foster_parent = $this->stack[$n - 1];
+                }
+            }
+
+            $this->inBody($token);
+        }
+    }
+
+    private function inCaption($token) {
+        /* An end tag whose tag name is "caption" */
+        if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags. */
+                $this->generateImpliedEndTags();
+
+                /* Now, if the current node is not a caption element, then this
+                is a parse error. */
+                // w/e
+
+                /* Pop elements from this stack until a caption element has
+                been popped from the stack. */
+                while(true) {
+                    $node = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($node === 'caption') {
+                        break;
+                    }
+                }
+
+                /* Clear the list of active formatting elements up to the last
+                marker. */
+                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+                /* Switch the insertion mode to "in table". */
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
+        name is "table" */
+        } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'table')) {
+            /* Parse error. Act as if an end tag with the tag name "caption"
+            had been seen, then, if that token wasn't ignored, reprocess the
+            current token. */
+            $this->inCaption(array(
+                'name' => 'caption',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inTable($token);
+
+        /* An end tag whose tag name is one of: "body", "col", "colgroup",
+        "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in body". */
+            $this->inBody($token);
+        }
+    }
+
+    private function inColumnGroup($token) {
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $text = $this->dom->createTextNode($token['data']);
+            end($this->stack)->appendChild($text);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            end($this->stack)->appendChild($comment);
+
+        /* A start tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
+            /* Insert a col element for the token. Immediately pop the current
+            node off the stack of open elements. */
+            $this->insertElement($token);
+            array_pop($this->stack);
+
+        /* An end tag whose tag name is "colgroup" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'colgroup') {
+            /* If the current node is the root html element, then this is a
+            parse error, ignore the token. (innerHTML case) */
+            if(end($this->stack)->nodeName === 'html') {
+                // Ignore
+
+            /* Otherwise, pop the current node (which will be a colgroup
+            element) from the stack of open elements. Switch the insertion
+            mode to "in table". */
+            } else {
+                array_pop($this->stack);
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* An end tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Act as if an end tag with the tag name "colgroup" had been seen,
+            and then, if that token wasn't ignored, reprocess the current token. */
+            $this->inColumnGroup(array(
+                'name' => 'colgroup',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inTable($token);
+        }
+    }
+
+    private function inTableBody($token) {
+        $clear = array('tbody', 'tfoot', 'thead', 'html');
+
+        /* A start tag whose tag name is "tr" */
+        if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
+            /* Clear the stack back to a table body context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert a tr element for the token, then switch the insertion
+            mode to "in row". */
+            $this->insertElement($token);
+            $this->mode = self::IN_ROW;
+
+        /* A start tag whose tag name is one of: "th", "td" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        ($token['name'] === 'th' ||    $token['name'] === 'td')) {
+            /* Parse error. Act as if a start tag with the tag name "tr" had
+            been seen, then reprocess the current token. */
+            $this->inTableBody(array(
+                'name' => 'tr',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inRow($token);
+
+        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table body context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Pop the current node from the stack of open elements. Switch
+                the insertion mode to "in table". */
+                array_pop($this->stack);
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
+        } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
+        ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
+            /* If the stack of open elements does not have a tbody, thead, or
+            tfoot element in table scope, this is a parse error. Ignore the
+            token. (innerHTML case) */
+            if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table body context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Act as if an end tag with the same tag name as the current
+                node ("tbody", "tfoot", or "thead") had been seen, then
+                reprocess the current token. */
+                $this->inTableBody(array(
+                    'name' => end($this->stack)->nodeName,
+                    'type' => HTML5::ENDTAG
+                ));
+
+                return $this->mainPhase($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "td", "th", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in table". */
+            $this->inTable($token);
+        }
+    }
+
+    private function inRow($token) {
+        $clear = array('tr', 'html');
+
+        /* A start tag whose tag name is one of: "th", "td" */
+        if($token['type'] === HTML5::STARTTAG &&
+        ($token['name'] === 'th' || $token['name'] === 'td')) {
+            /* Clear the stack back to a table row context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the insertion
+            mode to "in cell". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CELL;
+
+            /* Insert a marker at the end of the list of active formatting
+            elements. */
+            $this->a_formatting[] = self::MARKER;
+
+        /* An end tag whose tag name is "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table row context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Pop the current node (which will be a tr element) from the
+                stack of open elements. Switch the insertion mode to "in table
+                body". */
+                array_pop($this->stack);
+                $this->mode = self::IN_TBODY;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
+            /* Act as if an end tag with the tag name "tr" had been seen, then,
+            if that token wasn't ignored, reprocess the current token. */
+            $this->inRow(array(
+                'name' => 'tr',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inCell($token);
+
+        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Otherwise, act as if an end tag with the tag name "tr" had
+                been seen, then reprocess the current token. */
+                $this->inRow(array(
+                    'name' => 'tr',
+                    'type' => HTML5::ENDTAG
+                ));
+
+                return $this->inCell($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "td", "th" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in table". */
+            $this->inTable($token);
+        }
+    }
+
+    private function inCell($token) {
+        /* An end tag whose tag name is one of: "td", "th" */
+        if($token['type'] === HTML5::ENDTAG &&
+        ($token['name'] === 'td' || $token['name'] === 'th')) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as that of the token, then this is a
+            parse error and the token must be ignored. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags, except for elements with the same
+                tag name as the token. */
+                $this->generateImpliedEndTags(array($token['name']));
+
+                /* Now, if the current node is not an element with the same tag
+                name as the token, then this is a parse error. */
+                // k
+
+                /* Pop elements from this stack until an element with the same
+                tag name as the token has been popped from the stack. */
+                while(true) {
+                    $node = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($node === $token['name']) {
+                        break;
+                    }
+                }
+
+                /* Clear the list of active formatting elements up to the last
+                marker. */
+                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+                /* Switch the insertion mode to "in row". (The current node
+                will be a tr element at this point.) */
+                $this->mode = self::IN_ROW;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            /* If the stack of open elements does not have a td or th element
+            in table scope, then this is a parse error; ignore the token.
+            (innerHTML case) */
+            if(!$this->elementInScope(array('td', 'th'), true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            /* If the stack of open elements does not have a td or th element
+            in table scope, then this is a parse error; ignore the token.
+            (innerHTML case) */
+            if(!$this->elementInScope(array('td', 'th'), true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html'))) {
+            /* Parse error. Ignore the token. */
+
+        /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
+        "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as that of the token (which can only
+            happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
+            then this is a parse error and the token must be ignored. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in body". */
+            $this->inBody($token);
+        }
+    }
+
+    private function inSelect($token) {
+        /* Handle the token as follows: */
+
+        /* A character token */
+        if($token['type'] === HTML5::CHARACTR) {
+            /* Append the token's character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token whose tag name is "option" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'option') {
+            /* If the current node is an option element, act as if an end tag
+            with the tag name "option" had been seen. */
+            if(end($this->stack)->nodeName === 'option') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+        /* A start tag token whose tag name is "optgroup" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'optgroup') {
+            /* If the current node is an option element, act as if an end tag
+            with the tag name "option" had been seen. */
+            if(end($this->stack)->nodeName === 'option') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* If the current node is an optgroup element, act as if an end tag
+            with the tag name "optgroup" had been seen. */
+            if(end($this->stack)->nodeName === 'optgroup') {
+                $this->inSelect(array(
+                    'name' => 'optgroup',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+        /* An end tag token whose tag name is "optgroup" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'optgroup') {
+            /* First, if the current node is an option element, and the node
+            immediately before it in the stack of open elements is an optgroup
+            element, then act as if an end tag with the tag name "option" had
+            been seen. */
+            $elements_in_stack = count($this->stack);
+
+            if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
+            $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* If the current node is an optgroup element, then pop that node
+            from the stack of open elements. Otherwise, this is a parse error,
+            ignore the token. */
+            if($this->stack[$elements_in_stack - 1] === 'optgroup') {
+                array_pop($this->stack);
+            }
+
+        /* An end tag token whose tag name is "option" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'option') {
+            /* If the current node is an option element, then pop that node
+            from the stack of open elements. Otherwise, this is a parse error,
+            ignore the token. */
+            if(end($this->stack)->nodeName === 'option') {
+                array_pop($this->stack);
+            }
+
+        /* An end tag whose tag name is "select" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'select') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // w/e
+
+            /* Otherwise: */
+            } else {
+                /* Pop elements from the stack of open elements until a select
+                element has been popped from the stack. */
+                while(true) {
+                    $current = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($current === 'select') {
+                        break;
+                    }
+                }
+
+                /* Reset the insertion mode appropriately. */
+                $this->resetInsertionMode();
+            }
+
+        /* A start tag whose tag name is "select" */
+        } elseif($token['name'] === 'select' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Parse error. Act as if the token had been an end tag with the
+            tag name "select" instead. */
+            $this->inSelect(array(
+                'name' => 'select',
+                'type' => HTML5::ENDTAG
+            ));
+
+        /* An end tag whose tag name is one of: "caption", "table", "tbody",
+        "tfoot", "thead", "tr", "td", "th" */
+        } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
+        'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
+            /* Parse error. */
+            // w/e
+
+            /* If the stack of open elements has an element in table scope with
+            the same tag name as that of the token, then act as if an end tag
+            with the tag name "select" had been seen, and reprocess the token.
+            Otherwise, ignore the token. */
+            if($this->elementInScope($token['name'], true)) {
+                $this->inSelect(array(
+                    'name' => 'select',
+                    'type' => HTML5::ENDTAG
+                ));
+
+                $this->mainPhase($token);
+            }
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function afterBody($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Process the token as it would be processed if the insertion mode
+            was "in body". */
+            $this->inBody($token);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the first element in the stack of open
+            elements (the html element), with the data attribute set to the
+            data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->stack[0]->appendChild($comment);
+
+        /* An end tag with the tag name "html" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
+            /* If the parser was originally created in order to handle the
+            setting of an element's innerHTML attribute, this is a parse error;
+            ignore the token. (The element will be an html element in this
+            case.) (innerHTML case) */
+
+            /* Otherwise, switch to the trailing end phase. */
+            $this->phase = self::END_PHASE;
+
+        /* Anything else */
+        } else {
+            /* Parse error. Set the insertion mode to "in body" and reprocess
+            the token. */
+            $this->mode = self::IN_BODY;
+            return $this->inBody($token);
+        }
+    }
+
+    private function inFrameset($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag with the tag name "frameset" */
+        } elseif($token['name'] === 'frameset' &&
+        $token['type'] === HTML5::STARTTAG) {
+            $this->insertElement($token);
+
+        /* An end tag with the tag name "frameset" */
+        } elseif($token['name'] === 'frameset' &&
+        $token['type'] === HTML5::ENDTAG) {
+            /* If the current node is the root html element, then this is a
+            parse error; ignore the token. (innerHTML case) */
+            if(end($this->stack)->nodeName === 'html') {
+                // Ignore
+
+            } else {
+                /* Otherwise, pop the current node from the stack of open
+                elements. */
+                array_pop($this->stack);
+
+                /* If the parser was not originally created in order to handle
+                the setting of an element's innerHTML attribute (innerHTML case),
+                and the current node is no longer a frameset element, then change
+                the insertion mode to "after frameset". */
+                $this->mode = self::AFTR_FRAME;
+            }
+
+        /* A start tag with the tag name "frame" */
+        } elseif($token['name'] === 'frame' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+            /* Immediately pop the current node off the stack of open elements. */
+            array_pop($this->stack);
+
+        /* A start tag with the tag name "noframes" */
+        } elseif($token['name'] === 'noframes' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Process the token as if the insertion mode had been "in body". */
+            $this->inBody($token);
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function afterFrameset($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* An end tag with the tag name "html" */
+        } elseif($token['name'] === 'html' &&
+        $token['type'] === HTML5::ENDTAG) {
+            /* Switch to the trailing end phase. */
+            $this->phase = self::END_PHASE;
+
+        /* A start tag with the tag name "noframes" */
+        } elseif($token['name'] === 'noframes' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Process the token as if the insertion mode had been "in body". */
+            $this->inBody($token);
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function trailingEndPhase($token) {
+        /* After the main phase, as each token is emitted from the tokenisation
+        stage, it must be processed as described in this section. */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the Document object with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->dom->appendChild($comment);
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Process the token as it would be processed in the main phase. */
+            $this->mainPhase($token);
+
+        /* A character token that is not one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE. Or a start tag token. Or an end tag token. */
+        } elseif(($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+        $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
+            /* Parse error. Switch back to the main phase and reprocess the
+            token. */
+            $this->phase = self::MAIN_PHASE;
+            return $this->mainPhase($token);
+
+        /* An end-of-file token */
+        } elseif($token['type'] === HTML5::EOF) {
+            /* OMG DONE!! */
+        }
+    }
+
+    private function insertElement($token, $append = true) {
+        $el = $this->dom->createElement($token['name']);
+
+        foreach($token['attr'] as $attr) {
+            if(!$el->hasAttribute($attr['name'])) {
+                $el->setAttribute($attr['name'], $attr['value']);
+            }
+        }
+
+        $this->appendToRealParent($el);
+        $this->stack[] = $el;
+
+        return $el;
+    }
+
+    private function insertText($data) {
+        $text = $this->dom->createTextNode($data);
+        $this->appendToRealParent($text);
+    }
+
+    private function insertComment($data) {
+        $comment = $this->dom->createComment($data);
+        $this->appendToRealParent($comment);
+    }
+
+    private function appendToRealParent($node) {
+        if($this->foster_parent === null) {
+            end($this->stack)->appendChild($node);
+
+        } elseif($this->foster_parent !== null) {
+            /* If the foster parent element is the parent element of the
+            last table element in the stack of open elements, then the new
+            node must be inserted immediately before the last table element
+            in the stack of open elements in the foster parent element;
+            otherwise, the new node must be appended to the foster parent
+            element. */
+            for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                if($this->stack[$n]->nodeName === 'table' &&
+                $this->stack[$n]->parentNode !== null) {
+                    $table = $this->stack[$n];
+                    break;
+                }
+            }
+
+            if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
+                $this->foster_parent->insertBefore($node, $table);
+            else
+                $this->foster_parent->appendChild($node);
+
+            $this->foster_parent = null;
+        }
+    }
+
+    private function elementInScope($el, $table = false) {
+        if(is_array($el)) {
+            foreach($el as $element) {
+                if($this->elementInScope($element, $table)) {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        $leng = count($this->stack);
+
+        for($n = 0; $n < $leng; $n++) {
+            /* 1. Initialise node to be the current node (the bottommost node of
+            the stack). */
+            $node = $this->stack[$leng - 1 - $n];
+
+            if($node->tagName === $el) {
+                /* 2. If node is the target node, terminate in a match state. */
+                return true;
+
+            } elseif($node->tagName === 'table') {
+                /* 3. Otherwise, if node is a table element, terminate in a failure
+                state. */
+                return false;
+
+            } elseif($table === true && in_array($node->tagName, array('caption', 'td',
+            'th', 'button', 'marquee', 'object'))) {
+                /* 4. Otherwise, if the algorithm is the "has an element in scope"
+                variant (rather than the "has an element in table scope" variant),
+                and node is one of the following, terminate in a failure state. */
+                return false;
+
+            } elseif($node === $node->ownerDocument->documentElement) {
+                /* 5. Otherwise, if node is an html element (root element), terminate
+                in a failure state. (This can only happen if the node is the topmost
+                node of the    stack of open elements, and prevents the next step from
+                being invoked if there are no more elements in the stack.) */
+                return false;
+            }
+
+            /* Otherwise, set node to the previous entry in the stack of open
+            elements and return to step 2. (This will never fail, since the loop
+            will always terminate in the previous step if the top of the stack
+            is reached.) */
+        }
+    }
+
+    private function reconstructActiveFormattingElements() {
+        /* 1. If there are no entries in the list of active formatting elements,
+        then there is nothing to reconstruct; stop this algorithm. */
+        $formatting_elements = count($this->a_formatting);
+
+        if($formatting_elements === 0) {
+            return false;
+        }
+
+        /* 3. Let entry be the last (most recently added) element in the list
+        of active formatting elements. */
+        $entry = end($this->a_formatting);
+
+        /* 2. If the last (most recently added) entry in the list of active
+        formatting elements is a marker, or if it is an element that is in the
+        stack of open elements, then there is nothing to reconstruct; stop this
+        algorithm. */
+        if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+            return false;
+        }
+
+        for($a = $formatting_elements - 1; $a >= 0; true) {
+            /* 4. If there are no entries before entry in the list of active
+            formatting elements, then jump to step 8. */
+            if($a === 0) {
+                $step_seven = false;
+                break;
+            }
+
+            /* 5. Let entry be the entry one earlier than entry in the list of
+            active formatting elements. */
+            $a--;
+            $entry = $this->a_formatting[$a];
+
+            /* 6. If entry is neither a marker nor an element that is also in
+            thetack of open elements, go to step 4. */
+            if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+                break;
+            }
+        }
+
+        while(true) {
+            /* 7. Let entry be the element one later than entry in the list of
+            active formatting elements. */
+            if(isset($step_seven) && $step_seven === true) {
+                $a++;
+                $entry = $this->a_formatting[$a];
+            }
+
+            /* 8. Perform a shallow clone of the element entry to obtain clone. */
+            $clone = $entry->cloneNode();
+
+            /* 9. Append clone to the current node and push it onto the stack
+            of open elements  so that it is the new current node. */
+            end($this->stack)->appendChild($clone);
+            $this->stack[] = $clone;
+
+            /* 10. Replace the entry for entry in the list with an entry for
+            clone. */
+            $this->a_formatting[$a] = $clone;
+
+            /* 11. If the entry for clone in the list of active formatting
+            elements is not the last entry in the list, return to step 7. */
+            if(end($this->a_formatting) !== $clone) {
+                $step_seven = true;
+            } else {
+                break;
+            }
+        }
+    }
+
+    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
+        /* When the steps below require the UA to clear the list of active
+        formatting elements up to the last marker, the UA must perform the
+        following steps: */
+
+        while(true) {
+            /* 1. Let entry be the last (most recently added) entry in the list
+            of active formatting elements. */
+            $entry = end($this->a_formatting);
+
+            /* 2. Remove entry from the list of active formatting elements. */
+            array_pop($this->a_formatting);
+
+            /* 3. If entry was a marker, then stop the algorithm at this point.
+            The list has been cleared up to the last marker. */
+            if($entry === self::MARKER) {
+                break;
+            }
+        }
+    }
+
+    private function generateImpliedEndTags(array $exclude = array()) {
+        /* When the steps below require the UA to generate implied end tags,
+        then, if the current node is a dd element, a dt element, an li element,
+        a p element, a td element, a th  element, or a tr element, the UA must
+        act as if an end tag with the respective tag name had been seen and
+        then generate implied end tags again. */
+        $node = end($this->stack);
+        $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
+
+        while(in_array(end($this->stack)->nodeName, $elements)) {
+            array_pop($this->stack);
+        }
+    }
+
+    private function getElementCategory($name) {
+        if(in_array($name, $this->special))
+            return self::SPECIAL;
+
+        elseif(in_array($name, $this->scoping))
+            return self::SCOPING;
+
+        elseif(in_array($name, $this->formatting))
+            return self::FORMATTING;
+
+        else
+            return self::PHRASING;
+    }
+
+    private function clearStackToTableContext($elements) {
+        /* When the steps above require the UA to clear the stack back to a
+        table context, it means that the UA must, while the current node is not
+        a table element or an html element, pop elements from the stack of open
+        elements. If this causes any elements to be popped from the stack, then
+        this is a parse error. */
+        while(true) {
+            $node = end($this->stack)->nodeName;
+
+            if(in_array($node, $elements)) {
+                break;
+            } else {
+                array_pop($this->stack);
+            }
+        }
+    }
+
+    private function resetInsertionMode() {
+        /* 1. Let last be false. */
+        $last = false;
+        $leng = count($this->stack);
+
+        for($n = $leng - 1; $n >= 0; $n--) {
+            /* 2. Let node be the last node in the stack of open elements. */
+            $node = $this->stack[$n];
+
+            /* 3. If node is the first node in the stack of open elements, then
+            set last to true. If the element whose innerHTML  attribute is being
+            set is neither a td  element nor a th element, then set node to the
+            element whose innerHTML  attribute is being set. (innerHTML  case) */
+            if($this->stack[0]->isSameNode($node)) {
+                $last = true;
+            }
+
+            /* 4. If node is a select element, then switch the insertion mode to
+            "in select" and abort these steps. (innerHTML case) */
+            if($node->nodeName === 'select') {
+                $this->mode = self::IN_SELECT;
+                break;
+
+            /* 5. If node is a td or th element, then switch the insertion mode
+            to "in cell" and abort these steps. */
+            } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
+                $this->mode = self::IN_CELL;
+                break;
+
+            /* 6. If node is a tr element, then switch the insertion mode to
+            "in    row" and abort these steps. */
+            } elseif($node->nodeName === 'tr') {
+                $this->mode = self::IN_ROW;
+                break;
+
+            /* 7. If node is a tbody, thead, or tfoot element, then switch the
+            insertion mode to "in table body" and abort these steps. */
+            } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
+                $this->mode = self::IN_TBODY;
+                break;
+
+            /* 8. If node is a caption element, then switch the insertion mode
+            to "in caption" and abort these steps. */
+            } elseif($node->nodeName === 'caption') {
+                $this->mode = self::IN_CAPTION;
+                break;
+
+            /* 9. If node is a colgroup element, then switch the insertion mode
+            to "in column group" and abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'colgroup') {
+                $this->mode = self::IN_CGROUP;
+                break;
+
+            /* 10. If node is a table element, then switch the insertion mode
+            to "in table" and abort these steps. */
+            } elseif($node->nodeName === 'table') {
+                $this->mode = self::IN_TABLE;
+                break;
+
+            /* 11. If node is a head element, then switch the insertion mode
+            to "in body" ("in body"! not "in head"!) and abort these steps.
+            (innerHTML case) */
+            } elseif($node->nodeName === 'head') {
+                $this->mode = self::IN_BODY;
+                break;
+
+            /* 12. If node is a body element, then switch the insertion mode to
+            "in body" and abort these steps. */
+            } elseif($node->nodeName === 'body') {
+                $this->mode = self::IN_BODY;
+                break;
+
+            /* 13. If node is a frameset element, then switch the insertion
+            mode to "in frameset" and abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'frameset') {
+                $this->mode = self::IN_FRAME;
+                break;
+
+            /* 14. If node is an html element, then: if the head element
+            pointer is null, switch the insertion mode to "before head",
+            otherwise, switch the insertion mode to "after head". In either
+            case, abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'html') {
+                $this->mode = ($this->head_pointer === null)
+                    ? self::BEFOR_HEAD
+                    : self::AFTER_HEAD;
+
+                break;
+
+            /* 15. If last is true, then set the insertion mode to "in body"
+            and    abort these steps. (innerHTML case) */
+            } elseif($last) {
+                $this->mode = self::IN_BODY;
+                break;
+            }
+        }
+    }
+
+    private function closeCell() {
+        /* If the stack of open elements has a td or th element in table scope,
+        then act as if an end tag token with that tag name had been seen. */
+        foreach(array('td', 'th') as $cell) {
+            if($this->elementInScope($cell, true)) {
+                $this->inCell(array(
+                    'name' => $cell,
+                    'type' => HTML5::ENDTAG
+                ));
+
+                break;
+            }
+        }
+    }
+
+    public function save() {
+        return $this->dom;
+    }
+}
diff --git a/library/HTMLPurifier/Printer/ConfigForm.php b/library/HTMLPurifier/Printer/ConfigForm.php
index 31da35f8..21b8314b 100644
--- a/library/HTMLPurifier/Printer/ConfigForm.php
+++ b/library/HTMLPurifier/Printer/ConfigForm.php
@@ -25,7 +25,9 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
     
     /**
      * Whether or not to compress directive names, clipping them off
-     * after a certain amount of letters
+     * after a certain amount of letters. False to disable or integer letters
+     * before clipping.
+     * @protected
      */
     var $compress = false;
     
@@ -41,11 +43,13 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
         $this->docURL = $doc_url;
         $this->name   = $name;
         $this->compress = $compress;
+        // initialize sub-printers
         $this->fields['default']    = new HTMLPurifier_Printer_ConfigForm_default();
         $this->fields['bool']       = new HTMLPurifier_Printer_ConfigForm_bool();
     }
     
     /**
+     * Sets default column and row size for textareas in sub-printers
      * @param $cols Integer columns of textarea, null to use default
      * @param $rows Integer rows of textarea, null to use default
      */
@@ -55,15 +59,14 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
     }
     
     /**
-     * Retrieves styling, in case the directory it's in is not publically
-     * available
+     * Retrieves styling, in case it is not accessible by webserver
      */
     function getCSS() {
         return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
     }
     
     /**
-     * Retrieves JavaScript, in case directory is not public
+     * Retrieves JavaScript, in case it is not accessible by webserver
      */
     function getJavaScript() {
         return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
@@ -97,14 +100,14 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
             $ret .= $this->renderNamespace($ns, $directives);
         }
         if ($render_controls) {
-             $ret .= $this->start('tfoot');
+             $ret .= $this->start('tbody');
              $ret .= $this->start('tr');
                  $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
-                     $ret .= $this->elementEmpty('input', array('type' => 'Submit', 'value' => 'Submit'));
+                     $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit'));
                      $ret .= '[<a href="?">Reset</a>]';
                  $ret .= $this->end('td');
              $ret .= $this->end('tr');
-             $ret .= $this->end('tfoot');
+             $ret .= $this->end('tbody');
         }
         $ret .= $this->end('table');
         return $ret;
diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php
index 52650c63..4a0a90f1 100644
--- a/library/HTMLPurifier/Printer/HTMLDefinition.php
+++ b/library/HTMLPurifier/Printer/HTMLDefinition.php
@@ -102,6 +102,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
             $ret .= $this->element('td', $this->listifyTagLookup($lookup));
             $ret .= $this->end('tr');
         }
+        $ret .= $this->end('table');
         return $ret;
     }
     
@@ -179,7 +180,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
                     $def->validateChildren(array(), $this->config, $context);
                 }
                 $elements = $def->elements;
-            } elseif ($def->type == 'chameleon') {
+            }
+            if ($def->type == 'chameleon') {
                 $attr['rowspan'] = 2;
             } elseif ($def->type == 'empty') {
                 $elements = array();
diff --git a/library/HTMLPurifier/URISchemeRegistry.php b/library/HTMLPurifier/URISchemeRegistry.php
index 7716042d..a94b684a 100644
--- a/library/HTMLPurifier/URISchemeRegistry.php
+++ b/library/HTMLPurifier/URISchemeRegistry.php
@@ -1,5 +1,12 @@
 <?php
 
+require_once 'HTMLPurifier/URIScheme/http.php';
+require_once 'HTMLPurifier/URIScheme/https.php';
+require_once 'HTMLPurifier/URIScheme/mailto.php';
+require_once 'HTMLPurifier/URIScheme/ftp.php';
+require_once 'HTMLPurifier/URIScheme/nntp.php';
+require_once 'HTMLPurifier/URIScheme/news.php';
+
 HTMLPurifier_ConfigSchema::define(
     'URI', 'AllowedSchemes', array(
         'http'  => true, // "Hypertext Transfer Protocol", nuf' said
@@ -7,7 +14,6 @@ HTMLPurifier_ConfigSchema::define(
         // quite useful, but not necessary
         'mailto' => true,// Email
         'ftp'   => true, // "File Transfer Protocol"
-        'irc'   => true, // "Internet Relay Chat", usually needs another app
         // for Usenet, these two are similar, but distinct
         'nntp'  => true, // individual Netnews articles
         'news'  => true  // newsgroup or individual Netnews articles
@@ -54,12 +60,6 @@ class HTMLPurifier_URISchemeRegistry
      */
     var $schemes = array();
     
-    /**
-     * Directory where scheme objects can be found
-     * @private
-     */
-    var $_scheme_dir = null;
-    
     /**
      * Retrieves a scheme validator object
      * @param $scheme String scheme name like http or mailto
@@ -79,21 +79,16 @@ class HTMLPurifier_URISchemeRegistry
         }
         
         if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
-        if (empty($this->_dir)) $this->_dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/URIScheme/';
-        
         if (!isset($allowed_schemes[$scheme])) return $null;
         
-        // this bit of reflection is not very efficient, and a bit
-        // hacky too
         $class = 'HTMLPurifier_URIScheme_' . $scheme;
-        if (!class_exists($class)) include_once $this->_dir . $scheme . '.php';
         if (!class_exists($class)) return $null;
         $this->schemes[$scheme] = new $class();
         return $this->schemes[$scheme];
     }
     
     /**
-     * Registers a custom scheme to the cache.
+     * Registers a custom scheme to the cache, bypassing reflection.
      * @param $scheme Scheme name
      * @param $scheme_obj HTMLPurifier_URIScheme object
      */
diff --git a/maintenance/PH5P.patch b/maintenance/PH5P.patch
new file mode 100644
index 00000000..37e4dbf1
--- /dev/null
+++ b/maintenance/PH5P.patch
@@ -0,0 +1,45 @@
+--- old.php	2007-08-19 14:42:33.640625000 -0400
++++ new.php	2007-08-19 14:41:51.609375000 -0400
+@@ -211,7 +211,10 @@
+         // If nothing is returned, emit a U+0026 AMPERSAND character token.
+         // Otherwise, emit the character token that was returned.
+         $char = (!$entity) ? '&' : $entity;
+-        $this->emitToken($char);
++        $this->emitToken(array(
++            'type' => self::CHARACTR,
++            'data' => $char
++        ));
+ 
+         // Finally, switch to the data state.
+         $this->state = 'data';
+@@ -708,7 +711,7 @@
+         } elseif($char === '&') {
+             /* U+0026 AMPERSAND (&)
+             Switch to the entity in attribute value state. */
+-            $this->entityInAttributeValueState('non');
++            $this->entityInAttributeValueState();
+ 
+         } elseif($char === '>') {
+             /* U+003E GREATER-THAN SIGN (>)
+@@ -738,7 +741,8 @@
+             ? '&'
+             : $entity;
+ 
+-        $this->emitToken($char);
++        $last = count($this->token['attr']) - 1;
++        $this->token['attr'][$last]['value'] .= $char;
+     }
+ 
+     private function bogusCommentState() {
+@@ -1066,6 +1070,11 @@
+                     $this->char++;
+ 
+                     if(in_array($id, $this->entities)) {
++                        if ($e_name[$c-1] !== ';') {
++                            if ($c < $len && $e_name[$c] == ';') {
++                                $this->char++; // consume extra semicolon
++                            }
++                        }
+                         $entity = $id;
+                         break;
+                     }
diff --git a/maintenance/common.php b/maintenance/common.php
index d5437b77..c38d351b 100644
--- a/maintenance/common.php
+++ b/maintenance/common.php
@@ -1,5 +1,7 @@
 <?php
 
+require_once 'compat-function-file-put-contents.php';
+
 function assertCli() {
     if (php_sapi_name() != 'cli' && !getenv('PHP_IS_CLI')) {
         echo 'Script cannot be called from web-browser (if you are calling via cli,
@@ -7,3 +9,135 @@ set environment variable PHP_IS_CLI to work around this).';
         exit;
     }
 }
+
+/**
+ * Filesystem tools not provided by default; can recursively create, copy
+ * and delete folders. Some template methods are provided for extensibility.
+ * @note This class must be instantiated to be used, although it does
+ *       not maintain state.
+ */
+class FSTools
+{
+    
+    /**
+     * Recursively creates a directory
+     * @param string $folder Name of folder to create
+     * @note Adapted from the PHP manual comment 76612
+     */
+    function mkdir($folder) {
+        $folders = preg_split("#[\\\\/]#", $folder);
+        $base = '';
+        for($i = 0, $c = count($folders); $i < $c; $i++) {
+            if(empty($folders[$i])) {
+                if (!$i) {
+                    // special case for root level
+                    $base .= DIRECTORY_SEPARATOR;
+                }
+                continue;
+            }
+            $base .= $folders[$i];
+            if(!is_dir($base)){
+                mkdir($base);
+            }
+            $base .= DIRECTORY_SEPARATOR;
+        }
+    }
+    
+    /**
+     * Copy a file, or recursively copy a folder and its contents; modified
+     * so that copied files, if PHP, have includes removed
+     *
+     * @author      Aidan Lister <aidan@php.net>
+     * @version     1.0.1-modified
+     * @link        http://aidanlister.com/repos/v/function.copyr.php
+     * @param       string   $source    Source path
+     * @param       string   $dest      Destination path
+     * @return      bool     Returns TRUE on success, FALSE on failure
+     */
+    function copyr($source, $dest) {
+        // Simple copy for a file
+        if (is_file($source)) {
+            return $this->copy($source, $dest);
+        }
+        // Make destination directory
+        if (!is_dir($dest)) {
+            mkdir($dest);
+        }
+        // Loop through the folder
+        $dir = dir($source);
+        while (false !== $entry = $dir->read()) {
+            // Skip pointers
+            if ($entry == '.' || $entry == '..') {
+                continue;
+            }
+            if (!$this->copyable($entry)) {
+                continue;
+            }
+            // Deep copy directories
+            if ($dest !== "$source/$entry") {
+                $this->copyr("$source/$entry", "$dest/$entry");
+            }
+        }
+        // Clean up
+        $dir->close();
+        return true;
+    }
+    
+    /**
+     * Stub for PHP's built-in copy function, can be used to overload
+     * functionality
+     */
+    function copy($source, $dest) {
+        return copy($source, $dest);
+    }
+    
+    /**
+     * Overloadable function that tests a filename for copyability. By
+     * default, everything should be copied; you can restrict things to
+     * ignore hidden files, unreadable files, etc.
+     */
+    function copyable($file) {
+        return true;
+    }
+    
+    /**
+     * Delete a file, or a folder and its contents
+     *
+     * @author      Aidan Lister <aidan@php.net>
+     * @version     1.0.3
+     * @link        http://aidanlister.com/repos/v/function.rmdirr.php
+     * @param       string   $dirname    Directory to delete
+     * @return      bool     Returns TRUE on success, FALSE on failure
+     */
+    function rmdirr($dirname)
+    {
+        // Sanity check
+        if (!file_exists($dirname)) {
+            return false;
+        }
+     
+        // Simple delete for a file
+        if (is_file($dirname) || is_link($dirname)) {
+            return unlink($dirname);
+        }
+     
+        // Loop through the folder
+        $dir = dir($dirname);
+        while (false !== $entry = $dir->read()) {
+            // Skip pointers
+            if ($entry == '.' || $entry == '..') {
+                continue;
+            }
+            // Recurse
+            $this->rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
+        }
+     
+        // Clean up
+        $dir->close();
+        return rmdir($dirname);
+    }
+    
+    
+}
+
+
diff --git a/maintenance/compat-function-file-put-contents.php b/maintenance/compat-function-file-put-contents.php
new file mode 100644
index 00000000..bf8effc9
--- /dev/null
+++ b/maintenance/compat-function-file-put-contents.php
@@ -0,0 +1,107 @@
+<?php
+// $Id: file_put_contents.php,v 1.27 2007/04/17 10:09:56 arpad Exp $
+
+
+if (!defined('FILE_USE_INCLUDE_PATH')) {
+    define('FILE_USE_INCLUDE_PATH', 1);
+}
+
+if (!defined('LOCK_EX')) {
+    define('LOCK_EX', 2);
+}
+
+if (!defined('FILE_APPEND')) {
+    define('FILE_APPEND', 8);
+}
+
+
+/**
+ * Replace file_put_contents()
+ *
+ * @category    PHP
+ * @package     PHP_Compat
+ * @license     LGPL - http://www.gnu.org/licenses/lgpl.html
+ * @copyright   2004-2007 Aidan Lister <aidan@php.net>, Arpad Ray <arpad@php.net>
+ * @link        http://php.net/function.file_put_contents
+ * @author      Aidan Lister <aidan@php.net>
+ * @version     $Revision: 1.27 $
+ * @internal    resource_context is not supported
+ * @since       PHP 5
+ * @require     PHP 4.0.0 (user_error)
+ */
+function php_compat_file_put_contents($filename, $content, $flags = null, $resource_context = null)
+{
+    // If $content is an array, convert it to a string
+    if (is_array($content)) {
+        $content = implode('', $content);
+    }
+
+    // If we don't have a string, throw an error
+    if (!is_scalar($content)) {
+        user_error('file_put_contents() The 2nd parameter should be either a string or an array',
+            E_USER_WARNING);
+        return false;
+    }
+
+    // Get the length of data to write
+    $length = strlen($content);
+
+    // Check what mode we are using
+    $mode = ($flags & FILE_APPEND) ?
+                'a' :
+                'wb';
+
+    // Check if we're using the include path
+    $use_inc_path = ($flags & FILE_USE_INCLUDE_PATH) ?
+                true :
+                false;
+
+    // Open the file for writing
+    if (($fh = @fopen($filename, $mode, $use_inc_path)) === false) {
+        user_error('file_put_contents() failed to open stream: Permission denied',
+            E_USER_WARNING);
+        return false;
+    }
+
+    // Attempt to get an exclusive lock
+    $use_lock = ($flags & LOCK_EX) ? true : false ;
+    if ($use_lock === true) {
+        if (!flock($fh, LOCK_EX)) {
+            return false;
+        }
+    }
+
+    // Write to the file
+    $bytes = 0;
+    if (($bytes = @fwrite($fh, $content)) === false) {
+        $errormsg = sprintf('file_put_contents() Failed to write %d bytes to %s',
+                        $length,
+                        $filename);
+        user_error($errormsg, E_USER_WARNING);
+        return false;
+    }
+
+    // Close the handle
+    @fclose($fh);
+
+    // Check all the data was written
+    if ($bytes != $length) {
+        $errormsg = sprintf('file_put_contents() Only %d of %d bytes written, possibly out of free disk space.',
+                        $bytes,
+                        $length);
+        user_error($errormsg, E_USER_WARNING);
+        return false;
+    }
+
+    // Return length
+    return $bytes;
+}
+
+
+// Define
+if (!function_exists('file_put_contents')) {
+    function file_put_contents($filename, $content, $flags = null, $resource_context = null)
+    {
+        return php_compat_file_put_contents($filename, $content, $flags, $resource_context);
+    }
+}
diff --git a/maintenance/merge-library.php b/maintenance/merge-library.php
index 46c3c891..a9299084 100755
--- a/maintenance/merge-library.php
+++ b/maintenance/merge-library.php
@@ -6,20 +6,38 @@ assertCli();
 
 /**
  * Compiles all of HTML Purifier's library files into one big file
- * named HTMLPurifier.standalone.php. Operates recursively, and will
- * barf if there are conditional includes.
- * 
- * Details: also creates blank "include" files in the test/blank directory
- * in order to simulate require_once's inside the test files.
+ * named HTMLPurifier.standalone.php.
  */
 
 /**
- * Global array that tracks already loaded includes
+ * Global hash that tracks already loaded includes
  */
 $GLOBALS['loaded'] = array('HTMLPurifier.php' => true);
 
 /**
- * @param $text Text to replace includes from
+ * Custom FSTools for this script that overloads some behavior
+ * @warning The overloading of copy() is not necessarily global for
+ *          this script. Watch out!
+ */
+class MergeLibraryFSTools extends FSTools
+{
+    function copyable($entry) {
+        // Skip hidden files
+        if ($entry[0] == '.') {
+            return false;
+        }
+        return true;
+    }
+    function copy($source, $dest) {
+        copy_and_remove_includes($source, $dest);
+    }
+}
+$FS = new MergeLibraryFSTools();
+
+/**
+ * Replaces the includes inside PHP source code with the corresponding
+ * source.
+ * @param string $text PHP source code to replace includes from
  */
 function replace_includes($text) {
     return preg_replace_callback(
@@ -32,6 +50,8 @@ function replace_includes($text) {
 /**
  * Removes leading PHP tags from included files. Assumes that there is
  * no trailing tag.
+ * @note This is safe for files that have internal <?php
+ * @param string $text Text to have leading PHP tag from
  */
 function remove_php_tags($text) {
     return substr($text, 5);
@@ -40,125 +60,48 @@ function remove_php_tags($text) {
 /**
  * Creates an appropriate blank file, recursively generating directories
  * if necessary
+ * @param string $file Filename to create blank for
  */
 function create_blank($file) {
+    global $FS;
     $dir = dirname($file);
     $base = realpath('../tests/blanks/') . DIRECTORY_SEPARATOR ;
-    if ($dir != '.') mkdir_deep($base . $dir);
+    if ($dir != '.') {
+        $FS->mkdir($base . $dir);
+    }
     file_put_contents($base . $file, '');
 }
 
 /**
- * Recursively creates a directory
- * @note Adapted from the PHP manual comment 76612
+ * Copies the contents of a directory to the standalone directory
+ * @param string $dir Directory to copy
  */
-function mkdir_deep($folder) {
-    $folders = preg_split("#[\\\\/]#", $folder);
-    $base = '';
-    for($i = 0, $c = count($folders); $i < $c; $i++) {
-        if(empty($folders[$i])) {
-            if (!$i) {
-                // special case for root level
-                $base .= DIRECTORY_SEPARATOR;
-            }
-            continue;
-        }
-        $base .= $folders[$i];
-        if(!is_dir($base)){
-            mkdir($base);
-        }
-        $base .= DIRECTORY_SEPARATOR;
-    }
+function make_dir_standalone($dir) {
+    global $FS;
+    return $FS->copyr($dir, 'standalone/' . $dir);
 }
 
 /**
- * Copy a file, or recursively copy a folder and its contents
- *
- * @author      Aidan Lister <aidan@php.net>
- * @version     1.0.1
- * @link        http://aidanlister.com/repos/v/function.copyr.php
- * @param       string   $source    Source path
- * @param       string   $dest      Destination path
- * @return      bool     Returns TRUE on success, FALSE on failure
+ * Copies the contents of a file to the standalone directory
+ * @param string $file File to copy
  */
-function copyr($source, $dest) {
-    // Simple copy for a file
-    if (is_file($source)) {
-        return copy($source, $dest);
-    }
-    // Make destination directory
-    if (!is_dir($dest)) {
-        mkdir($dest);
-    }
-    // Loop through the folder
-    $dir = dir($source);
-    while (false !== $entry = $dir->read()) {
-        // Skip pointers
-        if ($entry == '.' || $entry == '..') {
-            continue;
-        }
-        // Skip hidden files
-        if ($entry[0] == '.') {
-            continue;
-        }
-        // Deep copy directories
-        if ($dest !== "$source/$entry") {
-            copyr("$source/$entry", "$dest/$entry");
-        }
-    }
-    // Clean up
-    $dir->close();
+function make_file_standalone($file) {
+    global $FS;
+    $FS->mkdir('standalone/' . dirname($file));
+    copy_and_remove_includes($file, 'standalone/' . $file);
     return true;
 }
 
 /**
- * Delete a file, or a folder and its contents
- *
- * @author      Aidan Lister <aidan@php.net>
- * @version     1.0.3
- * @link        http://aidanlister.com/repos/v/function.rmdirr.php
- * @param       string   $dirname    Directory to delete
- * @return      bool     Returns TRUE on success, FALSE on failure
+ * Copies a file to another location recursively, if it is a PHP file
+ * remove includes
+ * @param string $file Original file
+ * @param string $sfile New location of file
  */
-function rmdirr($dirname)
-{
-    // Sanity check
-    if (!file_exists($dirname)) {
-        return false;
-    }
- 
-    // Simple delete for a file
-    if (is_file($dirname) || is_link($dirname)) {
-        return unlink($dirname);
-    }
- 
-    // Loop through the folder
-    $dir = dir($dirname);
-    while (false !== $entry = $dir->read()) {
-        // Skip pointers
-        if ($entry == '.' || $entry == '..') {
-            continue;
-        }
- 
-        // Recurse
-        rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
-    }
- 
-    // Clean up
-    $dir->close();
-    return rmdir($dirname);
-}
-
-/**
- * Copies the contents of a directory to the standalone directory
- */
-function make_dir_standalone($dir) {
-    return copyr($dir, 'standalone/' . $dir);
-}
-
-function make_file_standalone($file) {
-    mkdir_deep('standalone/' . dirname($file));
-    return copy($file, 'standalone/' . $file);
+function copy_and_remove_includes($file, $sfile) {
+    $contents = file_get_contents($file);
+    if (strrchr($file, '.') === '.php') $contents = replace_includes($contents);
+    return file_put_contents($sfile, $contents);
 }
 
 /**
@@ -167,8 +110,14 @@ function make_file_standalone($file) {
  */
 function replace_includes_callback($matches) {
     $file = $matches[1];
-    // PHP 5 only file
-    if ($file == 'HTMLPurifier/Lexer/DOMLex.php') {
+    $preserve = array(
+      // PHP 5 only
+      'HTMLPurifier/Lexer/DOMLex.php' => 1,
+      'HTMLPurifier/Printer.php' => 1,
+      // PEAR (external)
+      'XML/HTMLSax3.php' => 1
+    );
+    if (isset($preserve[$file])) {
         return $matches[0];
     }
     if (isset($GLOBALS['loaded'][$file])) return '';
@@ -192,16 +141,22 @@ file_put_contents('HTMLPurifier.standalone.php', $contents);
 echo ' done!' . PHP_EOL;
 
 echo 'Creating standalone directory...';
-rmdirr('standalone'); // ensure a clean copy
-mkdir_deep('standalone/HTMLPurifier/DefinitionCache/Serializer');
-make_dir_standalone('HTMLPurifier/EntityLookup');
-make_dir_standalone('HTMLPurifier/Language');
-make_file_standalone('HTMLPurifier/Printer/ConfigForm.js');
-make_file_standalone('HTMLPurifier/Printer/ConfigForm.css');
-make_dir_standalone('HTMLPurifier/URIScheme');
-// PHP 5 only file
-mkdir_deep('standalone/HTMLPurifier/Lexer');
-make_file_standalone('HTMLPurifier/Lexer/DOMLex.php');
-make_file_standalone('HTMLPurifier/TokenFactory.php');
-echo ' done!' . PHP_EOL;
+$FS->rmdirr('standalone'); // ensure a clean copy
 
+// data files
+$FS->mkdir('standalone/HTMLPurifier/DefinitionCache/Serializer');
+make_dir_standalone('HTMLPurifier/EntityLookup');
+
+// non-standard inclusion setup
+make_dir_standalone('HTMLPurifier/Language');
+
+// optional components
+make_file_standalone('HTMLPurifier/Printer.php'); 
+make_dir_standalone('HTMLPurifier/Printer');
+make_dir_standalone('HTMLPurifier/Filter');
+make_file_standalone('HTMLPurifier/Lexer/PEARSax3.php');
+
+// PHP 5 only files
+make_file_standalone('HTMLPurifier/Lexer/DOMLex.php');
+make_file_standalone('HTMLPurifier/Lexer/PH5P.php');
+echo ' done!' . PHP_EOL;
diff --git a/smoketests/all.php b/smoketests/all.php
index 3f514e84..b17ce22b 100644
--- a/smoketests/all.php
+++ b/smoketests/all.php
@@ -31,7 +31,7 @@ while (false !== ($filename = readdir($dh))) {
     if ($filename == 'all.php') continue;
     if ($filename == 'testSchema.php') continue;
     ?>
-    <iframe src="<?php echo escapeHTML($filename); ?>"></iframe>
+    <iframe src="<?php echo escapeHTML($filename); if (isset($_GET['standalone'])) {echo '?standalone';} ?>"></iframe>
     <?php
 }
 
diff --git a/smoketests/common.php b/smoketests/common.php
index 8b3785a9..965ea1c9 100644
--- a/smoketests/common.php
+++ b/smoketests/common.php
@@ -2,7 +2,11 @@
 
 header('Content-type: text/html; charset=UTF-8');
 
-require_once '../library/HTMLPurifier.auto.php';
+if (!isset($_GET['standalone'])) {
+    require_once '../library/HTMLPurifier.auto.php';
+} else {
+    require_once '../library/HTMLPurifier.standalone.php';
+}
 error_reporting(E_ALL | E_STRICT);
 
 function escapeHTML($string) {
diff --git a/tests/Debugger.php b/tests/Debugger.php
index 0fccd041..55443a0a 100644
--- a/tests/Debugger.php
+++ b/tests/Debugger.php
@@ -54,14 +54,14 @@ function isInScopes($array = array()) {
 }
 /**#@-*/
 
-function printTokens($tokens, $index) {
+function printTokens($tokens, $index = null) {
     $string = '<pre>';
     $generator = new HTMLPurifier_Generator();
     foreach ($tokens as $i => $token) {
-        if ($index == $i) $string .= '[<strong>';
+        if ($index === $i) $string .= '[<strong>';
         $string .= "<sup>$i</sup>";
         $string .= $generator->escape($generator->generateFromToken($token));
-        if ($index == $i) $string .= '</strong>]';
+        if ($index === $i) $string .= '</strong>]';
     }
     $string .= '</pre>';
     echo $string;
diff --git a/tests/HTMLPurifier/AttrDef/CSSTest.php b/tests/HTMLPurifier/AttrDef/CSSTest.php
index 2d1b4c60..994fad12 100644
--- a/tests/HTMLPurifier/AttrDef/CSSTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSSTest.php
@@ -67,6 +67,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
         $this->assertDef('border:1px solid #000;');
         $this->assertDef('border-bottom:2em double #FF00FA;');
         $this->assertDef('border-collapse:collapse;');
+        $this->assertDef('border-collapse:separate;');
         $this->assertDef('caption-side:top;');
         $this->assertDef('vertical-align:middle;');
         $this->assertDef('vertical-align:12px;');
@@ -79,6 +80,8 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
         $this->assertDef('background-repeat:repeat-y;');
         $this->assertDef('background-attachment:fixed;');
         $this->assertDef('background-position:left 90%;');
+        $this->assertDef('border-spacing:1em;');
+        $this->assertDef('border-spacing:1em 2em;');
         
         // duplicates
         $this->assertDef('text-align:right;text-align:left;',
diff --git a/tests/HTMLPurifier/AttrTransform/BdoDirTest.php b/tests/HTMLPurifier/AttrTransform/BdoDirTest.php
index d1ef2468..631e019c 100644
--- a/tests/HTMLPurifier/AttrTransform/BdoDirTest.php
+++ b/tests/HTMLPurifier/AttrTransform/BdoDirTest.php
@@ -11,18 +11,19 @@ class HTMLPurifier_AttrTransform_BdoDirTest extends HTMLPurifier_AttrTransformHa
         $this->obj = new HTMLPurifier_AttrTransform_BdoDir();
     }
     
-    function test() {
-        
+    function testAddDefaultDir() {
         $this->assertResult( array(), array('dir' => 'ltr') );
-        
-        // leave existing dir alone
+    }
+    
+    function testPreserveExistingDir() {
         $this->assertResult( array('dir' => 'rtl') );
-        
-        // use a different default
+    }
+    
+    function testAlternateDefault() {
+        $this->config->set('Attr', 'DefaultTextDir', 'rtl');
         $this->assertResult(
             array(),
-            array('dir' => 'rtl'),
-            array('Attr.DefaultTextDir' => 'rtl')
+            array('dir' => 'rtl')
         );
         
     }
diff --git a/tests/HTMLPurifier/AttrTransform/BgColorTest.php b/tests/HTMLPurifier/AttrTransform/BgColorTest.php
index 69429ad8..ac46a312 100644
--- a/tests/HTMLPurifier/AttrTransform/BgColorTest.php
+++ b/tests/HTMLPurifier/AttrTransform/BgColorTest.php
@@ -3,6 +3,10 @@
 require_once 'HTMLPurifier/AttrTransform/BgColor.php';
 require_once 'HTMLPurifier/AttrTransformHarness.php';
 
+// we currently rely on the CSS validator to fix any problems.
+// This means that this transform, strictly speaking, supports
+// a superset of the functionality.
+
 class HTMLPurifier_AttrTransform_BgColorTest extends HTMLPurifier_AttrTransformHarness
 {
     
@@ -11,31 +15,31 @@ class HTMLPurifier_AttrTransform_BgColorTest extends HTMLPurifier_AttrTransformH
         $this->obj = new HTMLPurifier_AttrTransform_BgColor();
     }
     
-    function test() {
-        
+    function testEmptyInput() {
         $this->assertResult( array() );
-        
-        // we currently rely on the CSS validator to fix any problems.
-        // This means that this transform, strictly speaking, supports
-        // a superset of the functionality.
-        
+    }
+    
+    function testBasicTransform() {
         $this->assertResult(
             array('bgcolor' => '#000000'),
             array('style' => 'background-color:#000000;')
         );
-        
+    }
+    
+    function testPrependNewCSS() {
         $this->assertResult(
             array('bgcolor' => '#000000', 'style' => 'font-weight:bold'),
             array('style' => 'background-color:#000000;font-weight:bold')
         );
-        
+    }
+    
+    function testLenientTreatmentOfInvalidInput() {
         // this may change when we natively support the datatype and
         // validate its contents before forwarding it on
         $this->assertResult(
             array('bgcolor' => '#F00'),
             array('style' => 'background-color:#F00;')
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php b/tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php
index 70b4fbdb..6c6ce40b 100644
--- a/tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php
+++ b/tests/HTMLPurifier/AttrTransform/BoolToCSSTest.php
@@ -11,27 +11,29 @@ class HTMLPurifier_AttrTransform_BoolToCSSTest extends HTMLPurifier_AttrTransfor
         $this->obj = new HTMLPurifier_AttrTransform_BoolToCSS('foo', 'bar:3in;');
     }
     
-    function test() {
-        
+    function testEmptyInput() {
         $this->assertResult( array() );
-        
+    }
+    
+    function testBasicTransform() {
         $this->assertResult(
             array('foo' => 'foo'),
             array('style' => 'bar:3in;')
         );
-        
-        // boolean attribute just has to be set: we don't care about
-        // anything else
+    }
+    
+    function testIgnoreValueOfBooleanAttribute() {
         $this->assertResult(
             array('foo' => 'no'),
             array('style' => 'bar:3in;')
         );
-        
+    }
+    
+    function testPrependCSS() {
         $this->assertResult(
             array('foo' => 'foo', 'style' => 'background-color:#F00;'),
             array('style' => 'bar:3in;background-color:#F00;')
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/BorderTest.php b/tests/HTMLPurifier/AttrTransform/BorderTest.php
index 25fb3c66..a89c3b3d 100644
--- a/tests/HTMLPurifier/AttrTransform/BorderTest.php
+++ b/tests/HTMLPurifier/AttrTransform/BorderTest.php
@@ -12,27 +12,29 @@ class HTMLPurifier_AttrTransform_BorderTest extends HTMLPurifier_AttrTransformHa
         $this->obj = new HTMLPurifier_AttrTransform_Border();
     }
     
-    function test() {
-        
+    function testEmptyInput() {
         $this->assertResult( array() );
-        
+    }
+    
+    function testBasicTransform() {
         $this->assertResult(
             array('border' => '1'),
             array('style' => 'border:1px solid;')
         );
-        
-        // once again, no validation done here, we expect CSS validator
-        // to catch it
+    }
+    
+    function testLenientTreatmentOfInvalidInput() {
         $this->assertResult(
             array('border' => '10%'),
             array('style' => 'border:10%px solid;')
         );
-        
+    }
+    
+    function testPrependNewCSS() {
         $this->assertResult(
             array('border' => '23', 'style' => 'font-weight:bold;'),
             array('style' => 'border:23px solid;font-weight:bold;')
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/EnumToCSSTest.php b/tests/HTMLPurifier/AttrTransform/EnumToCSSTest.php
index 38bc0f14..6bb013f0 100644
--- a/tests/HTMLPurifier/AttrTransform/EnumToCSSTest.php
+++ b/tests/HTMLPurifier/AttrTransform/EnumToCSSTest.php
@@ -6,38 +6,44 @@ require_once 'HTMLPurifier/AttrTransformHarness.php';
 class HTMLPurifier_AttrTransform_EnumToCSSTest extends HTMLPurifier_AttrTransformHarness
 {
     
-    function testRegular() {
-        
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
             'left'  => 'text-align:left;',
             'right' => 'text-align:right;'
         ));
-        
-        // leave empty arrays alone
+    }
+    
+    function testEmptyInput() {
         $this->assertResult( array() );
-        
-        // leave arrays without interesting stuff alone
+    }
+    
+    function testPreserveArraysWithoutInterestingAttributes() {
         $this->assertResult( array('style' => 'font-weight:bold;') );
-        
-        // test each of the conversions
-        
+    }
+    
+    function testConvertAlignLeft() {
         $this->assertResult(
             array('align' => 'left'),
             array('style' => 'text-align:left;')
         );
-        
+    }
+    
+    function testConvertAlignRight() {
         $this->assertResult(
             array('align' => 'right'),
             array('style' => 'text-align:right;')
         );
-        
-        // drop garbage value
+    }
+    
+    function testRemoveInvalidAlign() {
         $this->assertResult(
             array('align' => 'invalid'),
             array()
         );
-        
-        // test CSS munging
+    }
+    
+    function testPrependNewCSS() {
         $this->assertResult(
             array('align' => 'left', 'style' => 'font-weight:bold;'),
             array('style' => 'text-align:left;font-weight:bold;')
@@ -46,31 +52,23 @@ class HTMLPurifier_AttrTransform_EnumToCSSTest extends HTMLPurifier_AttrTransfor
     }
     
     function testCaseInsensitive() {
-        
         $this->obj = new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
             'right' => 'text-align:right;'
         ));
-        
-        // test case insensitivity
         $this->assertResult(
             array('align' => 'RIGHT'),
             array('style' => 'text-align:right;')
         );
-        
     }
     
     function testCaseSensitive() {
-        
         $this->obj = new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
             'right' => 'text-align:right;'
         ), true);
-        
-        // test case insensitivity
         $this->assertResult(
             array('align' => 'RIGHT'),
             array()
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/ImgRequiredTest.php b/tests/HTMLPurifier/AttrTransform/ImgRequiredTest.php
index b1d871b4..5746f8d6 100644
--- a/tests/HTMLPurifier/AttrTransform/ImgRequiredTest.php
+++ b/tests/HTMLPurifier/AttrTransform/ImgRequiredTest.php
@@ -11,39 +11,37 @@ class HTMLPurifier_AttrTransform_ImgRequiredTest extends HTMLPurifier_AttrTransf
         $this->obj = new HTMLPurifier_AttrTransform_ImgRequired();
     }
     
-    function test() {
-        
+    function testAddMissingAttr() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             array(),
-            array('src' => '', 'alt' => 'Invalid image'),
-            array(
-                'Core.RemoveInvalidImg' => false
-            )
+            array('src' => '', 'alt' => 'Invalid image')
         );
-        
+    }
+    
+    function testAlternateDefaults() {
+        $this->config->set('Attr', 'DefaultInvalidImage', 'blank.png');
+        $this->config->set('Attr', 'DefaultInvalidImageAlt', 'Pawned!');
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             array(),
-            array('src' => 'blank.png', 'alt' => 'Pawned!'),
-            array(
-                'Attr.DefaultInvalidImage' => 'blank.png',
-                'Attr.DefaultInvalidImageAlt' => 'Pawned!',
-                'Core.RemoveInvalidImg' => false
-            )
+            array('src' => 'blank.png', 'alt' => 'Pawned!')
         );
-        
+    }
+    
+    function testGenerateAlt() {
         $this->assertResult(
             array('src' => '/path/to/foobar.png'),
             array('src' => '/path/to/foobar.png', 'alt' => 'foobar.png')
         );
-        
+    }
+    
+    function testAddDefaultSrc() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             array('alt' => 'intrigue'),
-            array('alt' => 'intrigue', 'src' => ''),
-            array(
-                'Core.RemoveInvalidImg' => false
-            )
+            array('alt' => 'intrigue', 'src' => '')
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php b/tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php
index 8fc9178f..a40fc32c 100644
--- a/tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php
+++ b/tests/HTMLPurifier/AttrTransform/ImgSpaceTest.php
@@ -9,33 +9,35 @@ class HTMLPurifier_AttrTransform_ImgSpaceTest extends HTMLPurifier_AttrTransform
     
     function setUp() {
         parent::setUp();
+        $this->obj = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
     }
     
-    function testVertical() {
-        
-        $this->obj = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
-        
+    function testEmptyInput() {
         $this->assertResult( array() );
-        
+    }
+    
+    function testVerticalBasicUsage() {
         $this->assertResult(
             array('vspace' => '1'),
             array('style' => 'margin-top:1px;margin-bottom:1px;')
         );
-        
-        // no validation done here, we expect CSS validator to catch it
+    }
+    
+    function testLenientHandlingOfInvalidInput() {
         $this->assertResult(
             array('vspace' => '10%'),
             array('style' => 'margin-top:10%px;margin-bottom:10%px;')
         );
-        
+    }
+    
+    function testPrependNewCSS() {
         $this->assertResult(
             array('vspace' => '23', 'style' => 'font-weight:bold;'),
             array('style' => 'margin-top:23px;margin-bottom:23px;font-weight:bold;')
         );
-        
     }
     
-    function testHorizontal() {
+    function testHorizontalBasicUsage() {
         $this->obj = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
         $this->assertResult(
             array('hspace' => '1'),
@@ -43,7 +45,7 @@ class HTMLPurifier_AttrTransform_ImgSpaceTest extends HTMLPurifier_AttrTransform
         );
     }
     
-    function testInvalid() {
+    function testInvalidConstructionParameter() {
         $this->expectError('ispace is not valid space attribute');
         $this->obj = new HTMLPurifier_AttrTransform_ImgSpace('ispace');
         $this->assertResult(
diff --git a/tests/HTMLPurifier/AttrTransform/LangTest.php b/tests/HTMLPurifier/AttrTransform/LangTest.php
index 42232ca3..89ae84be 100644
--- a/tests/HTMLPurifier/AttrTransform/LangTest.php
+++ b/tests/HTMLPurifier/AttrTransform/LangTest.php
@@ -13,35 +13,36 @@ class HTMLPurifier_AttrTransform_LangTest
         $this->obj = new HTMLPurifier_AttrTransform_Lang();
     }
     
-    function test() {
-        
-        // leave non-lang'ed elements alone
-        $this->assertResult(array(), true);
-        
-        // copy lang to xml:lang
+    function testEmptyInput() {
+        $this->assertResult(array());
+    }
+    
+    function testCopyLangToXMLLang() {
         $this->assertResult(
             array('lang' => 'en'),
             array('lang' => 'en', 'xml:lang' => 'en')
         );
-        
-        // preserve attributes
+    }
+    
+    function testPreserveAttributes() {
         $this->assertResult(
             array('src' => 'vert.png', 'lang' => 'fr'),
             array('src' => 'vert.png', 'lang' => 'fr', 'xml:lang' => 'fr')
         );
-        
-        // copy xml:lang to lang
+    }
+    
+    function testCopyXMLLangToLang() {
         $this->assertResult(
             array('xml:lang' => 'en'),
             array('xml:lang' => 'en', 'lang' => 'en')
         );
-        
-        // both set, override lang with xml:lang
+    }
+    
+    function testXMLLangOverridesLang() {
         $this->assertResult(
             array('lang' => 'fr', 'xml:lang' => 'de'),
             array('lang' => 'de', 'xml:lang' => 'de')
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/AttrTransform/LengthTest.php b/tests/HTMLPurifier/AttrTransform/LengthTest.php
index 62068e4b..8d2265f3 100644
--- a/tests/HTMLPurifier/AttrTransform/LengthTest.php
+++ b/tests/HTMLPurifier/AttrTransform/LengthTest.php
@@ -11,21 +11,32 @@ class HTMLPurifier_AttrTransform_LengthTest extends HTMLPurifier_AttrTransformHa
         $this->obj = new HTMLPurifier_AttrTransform_Length('width');
     }
     
-    function test() {
+    function testEmptyInput() {
         $this->assertResult( array() );
+    }
+    
+    function testTransformPixel() {
         $this->assertResult(
             array('width' => '10'),
             array('style' => 'width:10px;')
         );
+    }
+    
+    function testTransformPercentage() {
         $this->assertResult(
             array('width' => '10%'),
             array('style' => 'width:10%;')
         );
+    }
+    
+    function testPrependNewCSS() {
         $this->assertResult(
             array('width' => '10%', 'style' => 'font-weight:bold'),
             array('style' => 'width:10%;font-weight:bold')
         );
-        // this behavior might change
+    }
+    
+    function testLenientTreatmentOfInvalidInput() {
         $this->assertResult(
             array('width' => 'asdf'),
             array('style' => 'width:asdf;')
diff --git a/tests/HTMLPurifier/AttrTransform/NameTest.php b/tests/HTMLPurifier/AttrTransform/NameTest.php
index 30e9e58b..fef690d1 100644
--- a/tests/HTMLPurifier/AttrTransform/NameTest.php
+++ b/tests/HTMLPurifier/AttrTransform/NameTest.php
@@ -11,12 +11,18 @@ class HTMLPurifier_AttrTransform_NameTest extends HTMLPurifier_AttrTransformHarn
         $this->obj = new HTMLPurifier_AttrTransform_Name();
     }
     
-    function test() {
+    function testEmpty() {
         $this->assertResult( array() );
+    }
+    
+    function testTransformNameToID() {
         $this->assertResult(
             array('name' => 'free'),
             array('id' => 'free')
         );
+    }
+    
+    function testExistingIDOverridesName() {
         $this->assertResult(
             array('name' => 'tryit', 'id' => 'tobad'),
             array('id' => 'tobad')
diff --git a/tests/HTMLPurifier/AttrTransformHarness.php b/tests/HTMLPurifier/AttrTransformHarness.php
index e6ae1a93..0aa13a53 100644
--- a/tests/HTMLPurifier/AttrTransformHarness.php
+++ b/tests/HTMLPurifier/AttrTransformHarness.php
@@ -6,6 +6,7 @@ class HTMLPurifier_AttrTransformHarness extends HTMLPurifier_ComplexHarness
 {
     
     function setUp() {
+        parent::setUp();
         $this->func = 'transform';
     }
     
diff --git a/tests/HTMLPurifier/AttrValidator_ErrorsTest.php b/tests/HTMLPurifier/AttrValidator_ErrorsTest.php
index 08f59c99..8371962b 100644
--- a/tests/HTMLPurifier/AttrValidator_ErrorsTest.php
+++ b/tests/HTMLPurifier/AttrValidator_ErrorsTest.php
@@ -35,7 +35,7 @@ class HTMLPurifier_AttrValidator_ErrorsTest extends HTMLPurifier_ErrorsHarness
         $this->invoke($token);
     }
     
-    // to lazy to check for global post and global pre
+    // too lazy to check for global post and global pre
     
     function testAttributeRemoved() {
         $this->expectErrorCollection(E_ERROR, 'AttrValidator: Attribute removed');
diff --git a/tests/HTMLPurifier/ChildDef/ChameleonTest.php b/tests/HTMLPurifier/ChildDef/ChameleonTest.php
index 676bbe48..a54d5fb2 100644
--- a/tests/HTMLPurifier/ChildDef/ChameleonTest.php
+++ b/tests/HTMLPurifier/ChildDef/ChameleonTest.php
@@ -6,28 +6,36 @@ require_once 'HTMLPurifier/ChildDef/Chameleon.php';
 class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
 {
     
-    function test() {
-        
+    var $isInline;
+    
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_ChildDef_Chameleon(
             'b | i',      // allowed only when in inline context
             'b | i | div' // allowed only when in block context
         );
-        
+        $this->context->register('IsInline', $this->isInline);
+    }
+    
+    function testInlineAlwaysAllowed() {
+        $this->isInline = true;
         $this->assertResult(
-            '<b>Allowed.</b>', true,
-            array(), array('IsInline' => true)
+            '<b>Allowed.</b>'
         );
-        
+    }
+    
+    function testBlockNotAllowedInInline() {
+        $this->isInline = true;
         $this->assertResult(
-            '<div>Not allowed.</div>', '',
-            array(), array('IsInline' => true)
+            '<div>Not allowed.</div>', ''
         );
-        
+    }
+    
+    function testBlockAllowedInNonInline() {
+        $this->isInline = false;
         $this->assertResult(
-            '<div>Allowed.</div>', true,
-            array(), array('IsInline' => false)
+            '<div>Allowed.</div>'
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/ChildDef/OptionalTest.php b/tests/HTMLPurifier/ChildDef/OptionalTest.php
index bdb5ac05..154353df 100644
--- a/tests/HTMLPurifier/ChildDef/OptionalTest.php
+++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php
@@ -6,13 +6,17 @@ require_once 'HTMLPurifier/ChildDef/Optional.php';
 class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
 {
     
-    function test() {
-        
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
-        
+    }
+    
+    function testBasicUsage() {
         $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
+    }
+    
+    function testRemoveForbiddenText() {
         $this->assertResult('Not allowed text', '');
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/ChildDef/RequiredTest.php b/tests/HTMLPurifier/ChildDef/RequiredTest.php
index e708abea..f2b55bae 100644
--- a/tests/HTMLPurifier/ChildDef/RequiredTest.php
+++ b/tests/HTMLPurifier/ChildDef/RequiredTest.php
@@ -6,8 +6,7 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
 class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
 {
     
-    function testParsing() {
-        
+    function testPrepareString() {
         $def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
         $this->assertIdentical($def->elements,
           array(
@@ -15,51 +14,61 @@ class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
            ,'bang'   => true
            ,'gizmo'  => true
           ));
-        
+    }
+    
+    function testPrepareArray() {
         $def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
         $this->assertIdentical($def->elements,
           array(
             'href' => true
            ,'src'  => true
           ));
-        
     }
     
-    function testPCDATAForbidden() {
-        
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
-        
+    }
+    
+    function testEmptyInput() {
         $this->assertResult('', false);
+    }
+    
+    function testRemoveIllegalTagsAndElements() {
         $this->assertResult(
           '<dt>Term</dt>Text in an illegal location'.
              '<dd>Definition</dd><b>Illegal tag</b>',
           '<dt>Term</dt><dd>Definition</dd>');
         $this->assertResult('How do you do!', false);
-        
+    }
+    
+    function testIgnoreWhitespace() {
         // whitespace shouldn't trigger it
         $this->assertResult("\n<dd>Definition</dd>       ");
-        
+    }
+    
+    function testPreserveWhitespaceAfterRemoval() {
         $this->assertResult(
           '<dd>Definition</dd>       <b></b>       ',
           '<dd>Definition</dd>              '
         );
+    }
+    
+    function testDeleteNodeIfOnlyWhitespace() {
         $this->assertResult("\t      ", false);
-        
     }
     
     function testPCDATAAllowed() {
-        
         $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
-        
-        $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
-        
-        // with child escaping on
+        $this->assertResult('Out <b>Bold text</b><img />', 'Out <b>Bold text</b>');
+    }
+    
+    function testPCDATAAllowedWithEscaping() {
+        $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
+        $this->config->set('Core', 'EscapeInvalidChildren', true);
         $this->assertResult(
-            '<b>Bold text</b><img />',
-            '<b>Bold text</b>&lt;img /&gt;',
-            array(
-              'Core.EscapeInvalidChildren' => true
-            )
+            'Out <b>Bold text</b><img />',
+            'Out <b>Bold text</b>&lt;img /&gt;'
         );
         
     }
diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
index dd00bfa8..256d3a34 100644
--- a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
+++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
@@ -7,48 +7,77 @@ class   HTMLPurifier_ChildDef_StrictBlockquoteTest
 extends HTMLPurifier_ChildDefHarness
 {
     
-    function test() {
-        
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
-        
-        // assuming default wrap is p
-        
+    }
+    
+    function testEmptyInput() {
         $this->assertResult('');
+    }
+    
+    function testPreserveValidP() {
         $this->assertResult('<p>Valid</p>');
+    }
+    
+    function testPreserveValidDiv() {
         $this->assertResult('<div>Still valid</div>');
+    }
+    
+    function testWrapTextWithP() {
         $this->assertResult('Needs wrap', '<p>Needs wrap</p>');
+    }
+    
+    function testNoWrapForWhitespaceOrValidElements() {
         $this->assertResult('<p>Do not wrap</p>    <p>Whitespace</p>');
+    }
+    
+    function testWrapTextNextToValidElements() {
         $this->assertResult(
                'Wrap'. '<p>Do not wrap</p>',
             '<p>Wrap</p><p>Do not wrap</p>'
         );
+    }
+    
+    function testWrapInlineElements() {
         $this->assertResult(
             '<p>Do not</p>'.'<b>Wrap</b>',
             '<p>Do not</p><p><b>Wrap</b></p>'
         );
+    }
+    
+    function testWrapAndRemoveInvalidTags() {
         $this->assertResult(
             '<li>Not allowed</li>Paragraph.<p>Hmm.</p>',
             '<p>Not allowedParagraph.</p><p>Hmm.</p>'
         );
+    }
+    
+    function testWrapComplicatedSring() {
         $this->assertResult(
             $var = 'He said<br />perhaps<br />we should <b>nuke</b> them.',
             "<p>$var</p>"
         );
+    }
+    
+    function testWrapAndRemoveInvalidTagsComplex() {
         $this->assertResult(
             '<foo>Bar</foo><bas /><b>People</b>Conniving.'. '<p>Fools!</p>',
               '<p>Bar'.          '<b>People</b>Conniving.</p><p>Fools!</p>'
         );
-        
-        $this->assertResult('Needs wrap', '<div>Needs wrap</div>',
-            array('HTML.BlockWrapper' => 'div'));
+    }
+    
+    function testAlternateWrapper() {
+        $this->config->set('HTML', 'BlockWrapper', 'div');
+        $this->assertResult('Needs wrap', '<div>Needs wrap</div>');
         
     }
     
     function testError() {
+        $this->expectError('Cannot use non-block element as block wrapper');
         $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
-        $this->assertResult('Needs wrap', '<p>Needs wrap</p>',
-            array('HTML.BlockWrapper' => 'dav'));
-        $this->swallowErrors();
+        $this->config->set('HTML', 'BlockWrapper', 'dav');
+        $this->assertResult('Needs wrap', '<p>Needs wrap</p>');
     }
     
 }
diff --git a/tests/HTMLPurifier/ChildDef/TableTest.php b/tests/HTMLPurifier/ChildDef/TableTest.php
index 466ca6ee..62247d23 100644
--- a/tests/HTMLPurifier/ChildDef/TableTest.php
+++ b/tests/HTMLPurifier/ChildDef/TableTest.php
@@ -3,46 +3,58 @@
 require_once 'HTMLPurifier/ChildDefHarness.php';
 require_once 'HTMLPurifier/ChildDef/Table.php';
 
+// we're using empty tags to compact the tests: under real circumstances
+// there would be contents in them
+
 class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
 {
     
-    function test() {
-        
+    function setUp() {
+        parent::setUp();
         $this->obj = new HTMLPurifier_ChildDef_Table();
-        
+    }
+    
+    function testEmptyInput() {
         $this->assertResult('', false);
-        
-        // we're using empty tags to compact the tests: under real circumstances
-        // there would be contents in them
-        
+    }
+    
+    function testSingleRow() {
         $this->assertResult('<tr />');
+    }
+    
+    function testComplexContents() {
         $this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
             '<tr><td>asdf</td></tr></tbody>');
         $this->assertResult('<col /><col /><col /><tr />');
-        
-        // mixed up order
+    }
+    
+    function testReorderContents() {
         $this->assertResult(
           '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
           '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
-        
-        // duplicates of singles
-        // - first caption serves
-        // - trailing tfoots/theads get turned into tbodys
+    }
+    
+    function testDuplicateProcessing() {
         $this->assertResult(
           '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
           '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
         );
-        
-        // errant text dropped (until bubbling is implemented)
+    }
+    
+    function testRemoveText() {
         $this->assertResult('foo', false);
-        
-        // whitespace sticks to the previous element, last whitespace is
-        // stationary
-        $this->assertResult("\n   <tr />\n  <tr />\n ", true, array('Output.Newline' => "\n"));
+    }
+    
+    function testStickyWhitespaceOnTr() {
+        $this->config->set('Output', 'Newline', "\n");
+        $this->assertResult("\n   <tr />\n  <tr />\n ");
+    }
+    
+    function testStickyWhitespaceOnTSection() {
+        $this->config->set('Output', 'Newline', "\n");
         $this->assertResult(
           "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
-          "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t",
-          array('Output.Newline' => "\n")
+          "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
         );
         
     }
diff --git a/tests/HTMLPurifier/ChildDefHarness.php b/tests/HTMLPurifier/ChildDefHarness.php
index b0acb0bf..0650c2d0 100644
--- a/tests/HTMLPurifier/ChildDefHarness.php
+++ b/tests/HTMLPurifier/ChildDefHarness.php
@@ -7,6 +7,7 @@ class HTMLPurifier_ChildDefHarness extends HTMLPurifier_ComplexHarness
 {
     
     function setUp() {
+        parent::setUp();
         $this->obj       = null;
         $this->func      = 'validateChildren';
         $this->to_tokens = true;
diff --git a/tests/HTMLPurifier/ComplexHarness.php b/tests/HTMLPurifier/ComplexHarness.php
index 8ea7378d..19a4a480 100644
--- a/tests/HTMLPurifier/ComplexHarness.php
+++ b/tests/HTMLPurifier/ComplexHarness.php
@@ -67,41 +67,20 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
      * @param $context_array Context array in form of Key => Value or an actual
      *                       context object.
      */
-    function assertResult($input, $expect = true,
-        $config_array = array(), $context_array = array()
-    ) {
-        
-        // setup config 
-        if ($this->config) {
-            $config = HTMLPurifier_Config::create($this->config);
-            $config->autoFinalize = false;
-            $config->loadArray($config_array);
-        } else {
-            $config = HTMLPurifier_Config::create($config_array);
-        }
-        
-        // setup context object. Note that we are operating on a copy of it!
-        // When necessary, extend the test harness to allow post-tests
-        // on the context object
-        if (empty($this->context)) {
-            $context = new HTMLPurifier_Context();
-            $context->loadArray($context_array);
-        } else {
-            $context =& $this->context;
-        }
+    function assertResult($input, $expect = true) {
         
         if ($this->to_tokens && is_string($input)) {
             // $func may cause $input to change, so "clone" another copy
             // to sacrifice
-            $input   = $this->lexer->tokenizeHTML($s = $input, $config, $context);
-            $input_c = $this->lexer->tokenizeHTML($s, $config, $context);
+            $input   = $this->tokenize($temp = $input);
+            $input_c = $this->tokenize($temp);
         } else {
             $input_c = $input;
         }
         
         // call the function
         $func = $this->func;
-        $result = $this->obj->$func($input_c, $config, $context);
+        $result = $this->obj->$func($input_c, $this->config, $this->context);
         
         // test a bool result
         if (is_bool($result)) {
@@ -112,11 +91,9 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
         }
         
         if ($this->to_html) {
-            $result = $this->generator->
-              generateFromTokens($result, $config, $context);
+            $result = $this->generate($result);
             if (is_array($expect)) {
-                $expect = $this->generator->
-                  generateFromTokens($expect, $config, $context);
+                $expect = $this->generate($expect);
             }
         }
         
@@ -124,6 +101,20 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
         
     }
     
+    /**
+     * Tokenize HTML into tokens, uses member variables for common variables
+     */
+    function tokenize($html) {
+        return $this->lexer->tokenizeHTML($html, $this->config, $this->context);
+    }
+    
+    /**
+     * Generate textual HTML from tokens
+     */
+    function generate($tokens) {
+        return $this->generator->generateFromTokens($tokens, $this->config, $this->context);
+    }
+    
 }
 
 
diff --git a/tests/HTMLPurifier/EntityLookupTest.php b/tests/HTMLPurifier/EntityLookupTest.php
index f50ee611..5b031503 100644
--- a/tests/HTMLPurifier/EntityLookupTest.php
+++ b/tests/HTMLPurifier/EntityLookupTest.php
@@ -17,7 +17,7 @@ class HTMLPurifier_EntityLookupTest extends HTMLPurifier_Harness
         // special char
         $this->assertIdentical('"', $lookup->table['quot']);
         $this->assertIdentical('“', $lookup->table['ldquo']);
-        $this->assertIdentical('<', $lookup->table['lt']); //expressed strangely
+        $this->assertIdentical('<', $lookup->table['lt']); // expressed strangely in source file
         
         // symbol char
         $this->assertIdentical('θ', $lookup->table['theta']);
diff --git a/tests/HTMLPurifier/HTMLModule/ObjectTest.php b/tests/HTMLPurifier/HTMLModule/ObjectTest.php
new file mode 100644
index 00000000..44e1ffa5
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLModule/ObjectTest.php
@@ -0,0 +1,39 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModuleHarness.php';
+
+class HTMLPurifier_HTMLModule_ObjectTest extends HTMLPurifier_HTMLModuleHarness
+{
+    
+    function setUp() {
+        parent::setUp();
+        $this->config->set('HTML', 'Trusted', true);
+    }
+    
+    function testDefaultRemoval() {
+        $this->config->set('HTML', 'Trusted', false);
+        $this->assertResult(
+            '<object></object>', ''
+        );
+    }
+    
+    function testMinimal() {
+        $this->assertResult('<object></object>');
+    }
+    
+    function testStandardUseCase() {
+        $this->assertResult(
+'<object type="video/x-ms-wmv" data="http://domain.com/video.wmv" width="320" height="256">
+<param name="src" value="http://domain.com/video.wmv" />
+<param name="autostart" value="false" />
+<param name="controller" value="true" />
+<param name="pluginurl" value="http://www.microsoft.com/Windows/MediaPlayer/" />
+<a href="http://www.microsoft.com/Windows/MediaPlayer/">Windows Media player required</a>
+</object>'
+        );
+    }
+    
+    // more test-cases?
+    
+}
+
diff --git a/tests/HTMLPurifier/HTMLModule/ScriptingTest.php b/tests/HTMLPurifier/HTMLModule/ScriptingTest.php
index 2bb4a0e8..7f4151fd 100644
--- a/tests/HTMLPurifier/HTMLModule/ScriptingTest.php
+++ b/tests/HTMLPurifier/HTMLModule/ScriptingTest.php
@@ -5,47 +5,51 @@ require_once 'HTMLPurifier/HTMLModuleHarness.php';
 class HTMLPurifier_HTMLModule_ScriptingTest extends HTMLPurifier_HTMLModuleHarness
 {
     
-    function test() {
-        
-        // default (remove everything)
+    function setUp() {
+        parent::setUp();
+        $this->config->set('HTML', 'Trusted', true);
+        $this->config->set('Core', 'CommentScriptContents', false);
+    }
+    
+    function testDefaultRemoval() {
+        $this->config->set('HTML', 'Trusted', false);
         $this->assertResult(
             '<script type="text/javascript">foo();</script>', ''
         );
-        
-        // enabled
+    }
+    
+    function testPreserve() {
         $this->assertResult(
-            '<script type="text/javascript">foo();</script>', true,
-            array('HTML.Trusted' => true)
+            '<script type="text/javascript">foo();</script>'
         );
-        
-        // CDATA
+    }
+    
+    function testCDATAEnclosure() {
         $this->assertResult(
-'//<![CDATA[
+'<script type="text/javascript">//<![CDATA[
 alert("<This is compatible with XHTML>");
-//]]> ', true,
-            array('HTML.Trusted' => true)
+//]]></script>'
         );
-        
-        // max
+    }
+    
+    function testAllAttributes() {
         $this->assertResult(
             '<script
                 defer="defer"
                 src="test.js"
                 type="text/javascript"
-            >PCDATA</script>', true,
-            array('HTML.Trusted' => true, 'Core.CommentScriptContents' => false)
+            >PCDATA</script>'
         );
-        
-        // unsupported
+    }
+    
+    function testUnsupportedAttributes() {
         $this->assertResult(
             '<script
                 type="text/javascript"
                 charset="utf-8"
             >PCDATA</script>',
-            '<script type="text/javascript">PCDATA</script>',
-            array('HTML.Trusted' => true, 'Core.CommentScriptContents' => false)
+            '<script type="text/javascript">PCDATA</script>'
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 0f0b5e5e..23743dff 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -8,29 +8,35 @@ class HTMLPurifier_Injector_AutoParagraphTest extends HTMLPurifier_InjectorHarne
     
     function setup() {
         parent::setup();
-        $this->config = array('AutoFormat.AutoParagraph' => true);
+        $this->config->set('AutoFormat', 'AutoParagraph', true);
     }
     
-    function test() {
+    function testSingleParagraph() {
         $this->assertResult(
             'Foobar',
             '<p>Foobar</p>'
         );
-        
+    }
+    
+    function testSingleMultiLineParagraph() {
         $this->assertResult(
 'Par 1
 Par 1 still',
 '<p>Par 1
 Par 1 still</p>'
         );
-        
+    }
+    
+    function testTwoParagraphs() {
         $this->assertResult(
 'Par1
 
 Par2',
             '<p>Par1</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testTwoParagraphsWithLotsOfSpace() {
         $this->assertResult(
 'Par1
 
@@ -39,15 +45,18 @@ Par2',
 Par2',
             '<p>Par1</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testTwoParagraphsWithInlineElements() {
         $this->assertResult(
 '<b>Par1</b>
 
 <i>Par2</i>',
             '<p><b>Par1</b></p><p><i>Par2</i></p>'
         );
-        
-        
+    }
+    
+    function testSingleParagraphThatLooksLikeTwo() {
         $this->assertResult(
 '<b>Par1
 
@@ -56,29 +65,40 @@ Par2</b>',
 
 Par2</b></p>'
         );
-        
+    }
+    
+    function testAddParagraphAdjacentToParagraph() {
         $this->assertResult(
             'Par1<p>Par2</p>',
             '<p>Par1</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testParagraphUnclosedInlineElement() {
         $this->assertResult(
             '<b>Par1',
             '<p><b>Par1</b></p>'
         );
-        
+    }
+    
+    function testPreservePreTags() {
         $this->assertResult(
 '<pre>Par1
 
 Par1</pre>'
         );
-        
+    }
+    
+    function testIgnoreTrailingWhitespace() {
         $this->assertResult(
 'Par1
 
   ',
 '<p>Par1</p>'
         );
+    }
+    
+    function testDoNotParagraphBlockElements() {
         $this->assertResult(
 'Par1
 
@@ -87,19 +107,25 @@ Par1</pre>'
 Par3',
 '<p>Par1</p><div>Par2</div><p>Par3</p>'
         );
-        
+    }
+    
+    function testParagraphTextAndInlineNodes() {
         $this->assertResult(
 'Par<b>1</b>',
             '<p>Par<b>1</b></p>'
         );
-        
+    }
+    
+    function testIgnoreLeadingWhitespace() {
         $this->assertResult(
 '
 
 Par',
             '<p>Par</p>'
         );
-        
+    }
+    
+    function testIgnoreSurroundingWhitespace() {
         $this->assertResult(
 '
 
@@ -108,69 +134,90 @@ Par
 ',
             '<p>Par</p>'
         );
-        
+    }
+    
+    function testParagraphInsideBlockNode() {
         $this->assertResult(
 '<div>Par1
 
 Par2</div>',
             '<div><p>Par1</p><p>Par2</p></div>'
         );
-        
+    }
+    
+    function testParagraphInlineNodeInsideBlockNode() {
         $this->assertResult(
 '<div><b>Par1</b>
 
 Par2</div>',
             '<div><p><b>Par1</b></p><p>Par2</p></div>'
         );
-        
+    }
+    
+    function testNoParagraphWhenOnlyOneInsideBlockNode() {
         $this->assertResult('<div>Par1</div>');
-        
+    }
+    
+    function testParagraphTwoInlineNodesInsideBlockNode() {
         $this->assertResult(
 '<div><b>Par1</b>
 
 <i>Par2</i></div>',
             '<div><p><b>Par1</b></p><p><i>Par2</i></p></div>'
         );
-        
+    }
+    
+    function testPreserveInlineNodesInPreTag() {
         $this->assertResult(
 '<pre><b>Par1</b>
 
-<i>Par2</i></pre>',
-            true
+<i>Par2</i></pre>'
         );
-        
+    }
+    
+    function testSplitUpInternalsOfPTagInBlockNode() {
         $this->assertResult(
 '<div><p>Foo
 
 Bar</p></div>',
             '<div><p>Foo</p><p>Bar</p></div>'
         );
-        
+    }
+    
+    function testSplitUpInlineNodesInPTagInBlockNode() {
         $this->assertResult(
 '<div><p><b>Foo</b>
 
 <i>Bar</i></p></div>',
             '<div><p><b>Foo</b></p><p><i>Bar</i></p></div>'
         );
-        
+    }
+    
+    function testNoParagraphSingleInlineNodeInBlockNode() {
         $this->assertResult(
 '<div><b>Foo</b></div>',
             '<div><b>Foo</b></div>'
         );
-        
+    }
+    
+    function testParagraphInBlockquote() {
         $this->assertResult(
 '<blockquote>Par1
 
 Par2</blockquote>',
             '<blockquote><p>Par1</p><p>Par2</p></blockquote>'
         );
-        
+    }
+    
+    function testNoParagraphBetweenListItem() {
         $this->assertResult(
 '<ul><li>Foo</li>
 
-<li>Bar</li></ul>', true
+<li>Bar</li></ul>'
         );
-        
+    }
+    
+    function testParagraphSingleElementWithSurroundingSpace() {
         $this->assertResult(
 '<div>
 
@@ -179,7 +226,9 @@ Bar
 </div>', 
         '<div><p>Bar</p></div>'
         );
-        
+    }
+    
+    function testIgnoreExtraSpaceWithLeadingInlineNode() {
         $this->assertResult(
 '<b>Par1</b>a
 
@@ -188,99 +237,124 @@ Bar
 Par2', 
         '<p><b>Par1</b>a</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testAbsorbExtraEndingPTag() {
         $this->assertResult(
 'Par1
 
 Par2</p>', 
         '<p>Par1</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testAbsorbExtraEndingDivTag() {
         $this->assertResult(
 'Par1
 
 Par2</div>', 
         '<p>Par1</p><p>Par2</p>'
         );
-        
+    }
+    
+    function testDoNotParagraphSingleSurroundingSpaceInBlockNode() {
         $this->assertResult(
 '<div>
 Par1
-</div>', true
+</div>'
         );
-        
+    }
+    
+    function testBlockNodeTextDelimeterInBlockNode() {
         $this->assertResult(
 '<div>Par1
 
 <div>Par2</div></div>',
 '<div><p>Par1</p><div>Par2</div></div>'
         );
-        
+    }
+    
+    function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() {
         $this->assertResult(
 '<div>Par1
 <div>Par2</div></div>',
 '<div><p>Par1
 </p><div>Par2</div></div>'
         );
-        
+    }
+    
+    function testBlockNodeTextDelimeterWithoutDoublespace() {
         $this->assertResult(
 'Par1
 <div>Par2</div>',
 '<p>Par1
 </p><div>Par2</div>'
         );
-        
+    }
+    
+    function testTwoParagraphsOfTextAndInlineNode() {
         $this->assertResult(
 'Par1
 
 <b>Par2</b>',
 '<p>Par1</p><p><b>Par2</b></p>'
         );
-        
+    }
+    
+    function testLeadingInlineNodeParagraph() {
         $this->assertResult(
 '<img /> Foo',
 '<p><img /> Foo</p>'
         );
-        
+    }
+    
+    function testTrailingInlineNodeParagraph() {
         $this->assertResult(
 '<li>Foo <a>bar</a></li>'
         );
-        
+    }
+    
+    function testTwoInlineNodeParagraph() {
         $this->assertResult(
 '<li><b>baz</b><a>bar</a></li>'
         );
-        
+    }
+    
+    function testNoParagraphTrailingBlockNodeInBlockNode() {
         $this->assertResult(
 '<div><div>asdf</div><b>asdf</b></div>'
         );
-        
+    }
+    
+    function testParagraphTrailingBlockNodeWithDoublespaceInBlockNode() {
         $this->assertResult(
 '<div><div>asdf</div>
 
 <b>asdf</b></div>',
 '<div><div>asdf</div><p><b>asdf</b></p></div>'
         );
-        
+    }
+    
+    function testParagraphTwoInlineNodesAndWhitespaceNode() {
         $this->assertResult(
 '<b>One</b> <i>Two</i>',
 '<p><b>One</b> <i>Two</i></p>'
         );
-        
     }
     
-    function testInlineRootNode() {
+    function testNoParagraphWithInlineRootNode() {
+        $this->config->set('HTML', 'Parent', 'span');
         $this->assertResult(
 'Par
 
-Par2',
-            true,
-            array('AutoFormat.AutoParagraph' => true, 'HTML.Parent' => 'span')
+Par2'
         );
     }
     
-    function testNeeded() {
+    function testErrorNeeded() {
+        $this->config->set('HTML', 'Allowed', 'b');
         $this->expectError('Cannot enable AutoParagraph injector because p is not allowed');
-        $this->assertResult('<b>foobar</b>', true, array('AutoFormat.AutoParagraph' => true, 'HTML.Allowed' => 'b'));
+        $this->assertResult('<b>foobar</b>');
     }
     
 }
diff --git a/tests/HTMLPurifier/Injector/LinkifyTest.php b/tests/HTMLPurifier/Injector/LinkifyTest.php
index 66a06956..777cac9a 100644
--- a/tests/HTMLPurifier/Injector/LinkifyTest.php
+++ b/tests/HTMLPurifier/Injector/LinkifyTest.php
@@ -8,35 +8,40 @@ class HTMLPurifier_Injector_LinkifyTest extends HTMLPurifier_InjectorHarness
     
     function setup() {
         parent::setup();
-        $this->config = array('AutoFormat.Linkify' => true);
+        $this->config->set('AutoFormat', 'Linkify', true);
     }
     
-    function testLinkify() {
-        
+    function testLinkifyURLInRootNode() {
         $this->assertResult(
             'http://example.com',
             '<a href="http://example.com">http://example.com</a>'
         );
-        
+    }
+    
+    function testLinkifyURLInInlineNode() {
         $this->assertResult(
             '<b>http://example.com</b>',
             '<b><a href="http://example.com">http://example.com</a></b>'
         );
-        
+    }
+    
+    function testBasicUsageCase() {
         $this->assertResult(
             'This URL http://example.com is what you need',
             'This URL <a href="http://example.com">http://example.com</a> is what you need'
         );
-        
+    }
+    
+    function testIgnoreURLInATag() {
         $this->assertResult(
             '<a>http://example.com/</a>'
         );
-        
     }
     
     function testNeeded() {
+        $this->config->set('HTML', 'Allowed', 'b');
         $this->expectError('Cannot enable Linkify injector because a is not allowed');
-        $this->assertResult('http://example.com/', true, array('AutoFormat.Linkify' => true, 'HTML.Allowed' => 'b'));
+        $this->assertResult('http://example.com/');
     }
     
 }
diff --git a/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php b/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
index e820d677..309733f3 100644
--- a/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
+++ b/tests/HTMLPurifier/Injector/PurifierLinkifyTest.php
@@ -8,39 +8,53 @@ class HTMLPurifier_Injector_PurifierLinkifyTest extends HTMLPurifier_InjectorHar
     
     function setup() {
         parent::setup();
-        $this->config = array(
-            'AutoFormat.PurifierLinkify' => true,
-            'AutoFormatParam.PurifierLinkifyDocURL' => '#%s'
-        );
+        $this->config->set('AutoFormat', 'PurifierLinkify', true);
+        $this->config->set('AutoFormatParam', 'PurifierLinkifyDocURL', '#%s');
     }
     
-    function testLinkify() {
-        
+    function testNoTriggerCharacer() {
         $this->assertResult('Foobar');
+    }
+    
+    function testTriggerCharacterInIrrelevantContext() {
         $this->assertResult('20% off!');
+    }
+    
+    function testPreserveNamespace() {
         $this->assertResult('%Core namespace (not recognized)');
+    }
+    
+    function testLinkifyBasic() {
         $this->assertResult(
           '%Namespace.Directive',
           '<a href="#Namespace.Directive">%Namespace.Directive</a>'
         );
+    }
+    
+    function testLinkifyWithAdjacentTextNodes() {
         $this->assertResult(
           'This %Namespace.Directive thing',
           'This <a href="#Namespace.Directive">%Namespace.Directive</a> thing'
         );
+    }
+    
+    function testLinkifyInBlock() {
         $this->assertResult(
           '<div>This %Namespace.Directive thing</div>',
           '<div>This <a href="#Namespace.Directive">%Namespace.Directive</a> thing</div>'
         );
+    }
+    
+    function testPreserveInATag() {
         $this->assertResult(
           '<a>%Namespace.Directive</a>'
         );
-        
-        
     }
     
     function testNeeded() {
+        $this->config->set('HTML', 'Allowed', 'b');
         $this->expectError('Cannot enable PurifierLinkify injector because a is not allowed');
-        $this->assertResult('%Namespace.Directive', true, array('AutoFormat.PurifierLinkify' => true, 'HTML.Allowed' => 'b'));
+        $this->assertResult('%Namespace.Directive');
     }
     
 }
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index 75c05b78..85e4c671 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -5,70 +5,98 @@ require_once 'HTMLPurifier/Lexer/DirectLex.php';
 class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
 {
     
-    var $Lexer;
-    var $DirectLex, $PEARSax3, $DOMLex;
-    var $_entity_lookup;
     var $_has_pear = false;
-    var $_has_dom  = false;
     
-    function setUp() {
-        $this->Lexer     = new HTMLPurifier_Lexer();
-        
-        $this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
-        
-        if ( $GLOBALS['HTMLPurifierTest']['PEAR'] && 
-             ((error_reporting() & E_STRICT) != E_STRICT)
+    function HTMLPurifier_LexerTest() {
+        parent::HTMLPurifier_Harness();
+        // E_STRICT = 2048, int used for PHP4 compat: this check disables
+        // PEAR if PHP 5 strict mode is on, since the class is not strict safe
+        if (
+            $GLOBALS['HTMLPurifierTest']['PEAR'] &&
+            ((error_reporting() & 2048) != 2048) // ought to be a better way
         ) {
-            $this->_has_pear = true;
             require_once 'HTMLPurifier/Lexer/PEARSax3.php';
-            $this->PEARSax3  = new HTMLPurifier_Lexer_PEARSax3();
+            $this->_has_pear = true;
         }
-        
-        $this->_has_dom = version_compare(PHP_VERSION, '5', '>=');
-        if ($this->_has_dom) {
-            require_once 'HTMLPurifier/Lexer/DOMLex.php';
-            $this->DOMLex    = new HTMLPurifier_Lexer_DOMLex();
+        if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
+            require_once 'HTMLPurifier/Lexer/PH5P.php';
         }
-        
-        $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
-        
     }
     
+    // HTMLPurifier_Lexer::create() --------------------------------------------
+    
     function test_create() {
-        $config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
-        $lexer = HTMLPurifier_Lexer::create($config);
+        $this->config->set('Core', 'MaintainLineNumbers', true);
+        $lexer = HTMLPurifier_Lexer::create($this->config);
         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
     }
     
+    // HTMLPurifier_Lexer->parseData() -----------------------------------------
+    
+    function assertParseData($input, $expect = true) {
+        if ($expect === true) $expect = $input;
+        $lexer = new HTMLPurifier_Lexer();
+        $this->assertIdentical($expect, $lexer->parseData($input));
+    }
+    
+    function test_parseData_plainText() {
+        $this->assertParseData('asdf');
+    }
+    
+    function test_parseData_ampersandEntity() {
+        $this->assertParseData('&amp;', '&');
+    }
+    
+    function test_parseData_quotEntity() {
+        $this->assertParseData('&quot;', '"');
+    }
+    
+    function test_parseData_aposNumericEntity() {
+        $this->assertParseData('&#039;', "'");
+    }
+    
+    function test_parseData_aposCompactNumericEntity() {
+        $this->assertParseData('&#39;', "'");
+    }
+    
+    function test_parseData_adjacentAmpersandEntities() {
+        $this->assertParseData('&amp;&amp;&amp;', '&&&');
+    }
+    
+    function test_parseData_trailingUnescapedAmpersand() {
+        $this->assertParseData('&amp;&', '&&');
+    }
+    
+    function test_parseData_internalUnescapedAmpersand() {
+        $this->assertParseData('Procter & Gamble');
+    }
+    
+    function test_parseData_improperEntityFaultToleranceTest() {
+        $this->assertParseData('&#x2D;');
+    }
+    
+    // HTMLPurifier_Lexer->extractBody() ---------------------------------------
+    
     function assertExtractBody($text, $extract = true) {
-        $result = $this->Lexer->extractBody($text);
+        $lexer = new HTMLPurifier_Lexer();
+        $result = $lexer->extractBody($text);
         if ($extract === true) $extract = $text;
         $this->assertIdentical($extract, $result);
     }
     
-    function test_parseData() {
-        $HP =& $this->Lexer;
-        
-        $this->assertIdentical('asdf', $HP->parseData('asdf'));
-        $this->assertIdentical('&', $HP->parseData('&amp;'));
-        $this->assertIdentical('"', $HP->parseData('&quot;'));
-        $this->assertIdentical("'", $HP->parseData('&#039;'));
-        $this->assertIdentical("'", $HP->parseData('&#39;'));
-        $this->assertIdentical('&&&', $HP->parseData('&amp;&amp;&amp;'));
-        $this->assertIdentical('&&', $HP->parseData('&amp;&')); // [INVALID]
-        $this->assertIdentical('Procter & Gamble',
-                $HP->parseData('Procter & Gamble')); // [INVALID]
-        
-        // This is not special, thus not converted. Test of fault tolerance,
-        // realistically speaking, this should never happen
-        $this->assertIdentical('&#x2D;', $HP->parseData('&#x2D;'));
+    function test_extractBody_noBodyTags() {
+        $this->assertExtractBody('<b>Bold</b>');
     }
     
-    
-    function test_extractBody() {
-        $this->assertExtractBody('<b>Bold</b>');
+    function test_extractBody_lowercaseBodyTags() {
         $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
+    }
+    
+    function test_extractBody_uppercaseBodyTags() {
         $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
+    }
+    
+    function test_extractBody_realisticUseCase() {
         $this->assertExtractBody(
 '<?xml version="1.0"
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
@@ -96,303 +124,404 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
          </div>
       </form>
    ');
-        $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
-        $this->assertExtractBody('<body>asdf'); // not closed, don't accept
-        
     }
     
-    function test_tokenizeHTML() {
-        
-        $input = array();
-        $expect = array();
-        $sax_expect = array();
-        $config = array();
-        
-        $input[0] = '';
-        $expect[0] = array();
-        
-        $input[1] = 'This is regular text.';
-        $expect[1] = array(
-            new HTMLPurifier_Token_Text('This is regular text.')
-            );
-        
-        $input[2] = 'This is <b>bold</b> text';
-        $expect[2] = array(
-            new HTMLPurifier_Token_Text('This is ')
-           ,new HTMLPurifier_Token_Start('b', array())
-           ,new HTMLPurifier_Token_Text('bold')
-           ,new HTMLPurifier_Token_End('b')
-           ,new HTMLPurifier_Token_Text(' text')
-            );
-        
-        $input[3] = '<DIV>Totally rad dude. <b>asdf</b></div>';
-        $expect[3] = array(
-            new HTMLPurifier_Token_Start('DIV', array())
-           ,new HTMLPurifier_Token_Text('Totally rad dude. ')
-           ,new HTMLPurifier_Token_Start('b', array())
-           ,new HTMLPurifier_Token_Text('asdf')
-           ,new HTMLPurifier_Token_End('b')
-           ,new HTMLPurifier_Token_End('div')
-            );
-        
-        // [XML-INVALID]
-        $input[4] = '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>';
-        $expect[4] = array(
-            new HTMLPurifier_Token_Start('asdf')
-           ,new HTMLPurifier_Token_End('asdf')
-           ,new HTMLPurifier_Token_Start('d')
-           ,new HTMLPurifier_Token_End('d')
-           ,new HTMLPurifier_Token_Start('poOloka')
-           ,new HTMLPurifier_Token_Start('poolasdf')
-           ,new HTMLPurifier_Token_Start('ds')
-           ,new HTMLPurifier_Token_End('asdf')
-           ,new HTMLPurifier_Token_End('ASDF')
-            );
-        // DOM is different because it condenses empty tags into REAL empty ones
-        // as well as makes it well-formed
-        $dom_expect[4] = array(
-            new HTMLPurifier_Token_Empty('asdf')
-           ,new HTMLPurifier_Token_Empty('d')
-           ,new HTMLPurifier_Token_Start('pooloka')
-           ,new HTMLPurifier_Token_Start('poolasdf')
-           ,new HTMLPurifier_Token_Empty('ds')
-           ,new HTMLPurifier_Token_End('poolasdf')
-           ,new HTMLPurifier_Token_End('pooloka')
-            );
-        
-        $input[5] = '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>';
-        $expect[5] = array(
-            new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!'))
-           ,new HTMLPurifier_Token_Text('Link to ')
-           ,new HTMLPurifier_Token_Start('b',array('id'=>'asdf'))
-           ,new HTMLPurifier_Token_Text('foobar')
-           ,new HTMLPurifier_Token_End('b')
-           ,new HTMLPurifier_Token_End('a')
-            );
-        
-        $input[6] = '<br />';
-        $expect[6] = array(
-            new HTMLPurifier_Token_Empty('br')
-            );
-        
-        // [SGML-INVALID] [RECOVERABLE]
-        $input[7] = '<!-- Comment --> <!-- not so well formed --->';
-        $expect[7] = array(
-            new HTMLPurifier_Token_Comment(' Comment ')
-           ,new HTMLPurifier_Token_Text(' ')
-           ,new HTMLPurifier_Token_Comment(' not so well formed -')
-            );
-        $sax_expect[7] = false; // we need to figure out proper comment output
-        
-        // [SGML-INVALID]
-        $input[8] = '<a href=""';
-        $expect[8] = array(
-            new HTMLPurifier_Token_Text('<a href=""')
-            );
-        // SAX parses it into a tag
-        $sax_expect[8] = array(
-            new HTMLPurifier_Token_Start('a', array('href'=>''))
-            ); 
-        // DOM parses it into an empty tag
-        $dom_expect[8] = array(
-            new HTMLPurifier_Token_Empty('a', array('href'=>''))
-            ); 
-        
-        $input[9] = '&lt;b&gt;';
-        $expect[9] = array(
-            new HTMLPurifier_Token_Text('<b>')
-            );
-        $sax_expect[9] = array(
-            new HTMLPurifier_Token_Text('<')
-           ,new HTMLPurifier_Token_Text('b')
-           ,new HTMLPurifier_Token_Text('>')
-            );
-        // note that SAX can clump text nodes together. We won't be
-        // too picky though
-        
-        // [SGML-INVALID]
-        $input[10] = '<a "=>';
-        // We barf on this, aim for no attributes
-        $expect[10] = array(
-            new HTMLPurifier_Token_Start('a', array('"' => ''))
-            );
-        // DOM correctly has no attributes, but also closes the tag
-        $dom_expect[10] = array(
-            new HTMLPurifier_Token_Empty('a')
-            );
-        // SAX barfs on this
-        $sax_expect[10] = array(
-            new HTMLPurifier_Token_Start('a', array('"' => ''))
-            );
-        
-        // [INVALID] [RECOVERABLE]
-        $input[11] = '"';
-        $expect[11] = array( new HTMLPurifier_Token_Text('"') );
-        
-        // compare with this valid one:
-        $input[12] = '&quot;';
-        $expect[12] = array( new HTMLPurifier_Token_Text('"') );
-        $sax_expect[12] = false; // choked!
-        
-        // CDATA sections!
-        $input[13] = '<![CDATA[You <b>can&#39;t</b> get me!]]>';
-        $expect[13] = array( new HTMLPurifier_Token_Text(
-            'You <b>can&#39;t</b> get me!' // raw
-            ) );
-        $sax_expect[13] = array( // SAX has a seperate call for each entity
-                new HTMLPurifier_Token_Text('You '),
-                new HTMLPurifier_Token_Text('<'),
-                new HTMLPurifier_Token_Text('b'),
-                new HTMLPurifier_Token_Text('>'),
-                new HTMLPurifier_Token_Text('can'),
-                new HTMLPurifier_Token_Text('&'),
-                new HTMLPurifier_Token_Text('#39;t'),
-                new HTMLPurifier_Token_Text('<'),
-                new HTMLPurifier_Token_Text('/b'),
-                new HTMLPurifier_Token_Text('>'),
-                new HTMLPurifier_Token_Text(' get me!')
-            );
-        
-        $char_theta = $this->_entity_lookup->table['theta'];
-        $char_rarr  = $this->_entity_lookup->table['rarr'];
-        
-        // test entity replacement
-        $input[14] = '&theta;';
-        $expect[14] = array( new HTMLPurifier_Token_Text($char_theta) );
-        
-        // test that entities aren't replaced in CDATA sections
-        $input[15] = '&theta; <![CDATA[&rarr;]]>';
-        $expect[15] = array( new HTMLPurifier_Token_Text($char_theta . ' &rarr;') );
-        $sax_expect[15] = array(
-                new HTMLPurifier_Token_Text($char_theta . ' '),
-                new HTMLPurifier_Token_Text('&'),
-                new HTMLPurifier_Token_Text('rarr;')
-            );
-        
-        // test entity resolution in attributes
-        $input[16] = '<a href="index.php?title=foo&amp;id=bar">Link</a>';
-        $expect[16] = array(
-                new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar'))
-               ,new HTMLPurifier_Token_Text('Link')
-               ,new HTMLPurifier_Token_End('a')
-            );
-        
-        // test that UTF-8 is preserved
-        $char_hearts = $this->_entity_lookup->table['hearts'];
-        $input[17] = $char_hearts;
-        $expect[17] = array( new HTMLPurifier_Token_Text($char_hearts) );
-        
-        // test weird characters in attributes
-        $input[18] = '<br test="x &lt; 6" />';
-        $expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
-        
-        // test emoticon protection
-        $input[19] = '<b>Whoa! <3 That\'s not good >.></b>';
-        $expect[19] = array(
-            new HTMLPurifier_Token_Start('b'),
-            new HTMLPurifier_Token_Text('Whoa! '),
-            new HTMLPurifier_Token_Text('<3 That\'s not good >'),
-            new HTMLPurifier_Token_Text('.>'),
-            new HTMLPurifier_Token_End('b'),
-        );
-        $dom_expect[19] = array(
-            new HTMLPurifier_Token_Start('b'),
-            new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
-            new HTMLPurifier_Token_End('b'),
-        );
-        $sax_expect[19] = false; // SAX drops the < character
-        $config[19] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
-        
-        // test comment parsing with funky characters inside
-        $input[20] = '<!-- This >< comment --><br />';
-        $expect[20] = array(
-            new HTMLPurifier_Token_Comment(' This >< comment '),
-            new HTMLPurifier_Token_Empty('br')
-        );
-        $sax_expect[20] = false;
-        $config[20] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
-        
-        // test comment parsing of missing end
-        $input[21] = '<!-- This >< comment';
-        $expect[21] = array(
-            new HTMLPurifier_Token_Comment(' This >< comment')
-        );
-        $sax_expect[21] = false;
-        $dom_expect[21] = false;
-        $config[21] = HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' => true));
-        
-        // test CDATA tags
-        $input[22] = '<script>alert("<foo>");</script>';
-        $expect[22] = array(
-            new HTMLPurifier_Token_Start('script')
-           ,new HTMLPurifier_Token_Text('alert("<foo>");')
-           ,new HTMLPurifier_Token_End('script')
-        );
-        $config[22] = HTMLPurifier_Config::create(array('HTML.Trusted' => true));
-        $sax_expect[22] = false;
-        
-        // test escaping
-        $input[23] = '<!-- This comment < &lt; & -->';
-        $expect[23] = array(
-            new HTMLPurifier_Token_Comment(' This comment < &lt; & ') );
-        $sax_expect[23] = false; $config[23] =
-        HTMLPurifier_Config::create(array('Core.AggressivelyFixLt' =>
-        true));
-        
-        // more DirectLex edge-cases 
-        $input[24] = '<a href="><>">';
-        $expect[24] = array(
-            new HTMLPurifier_Token_Start('a', array('href' => '')),
-            new HTMLPurifier_Token_Text('<">')
-        );
-        $sax_expect[24] = false;
-        $dom_expect[24] = array(
-            new HTMLPurifier_Token_Empty('a', array('href' => '><>'))
-        );
-        
-        $default_config = HTMLPurifier_Config::createDefault();
-        $default_context = new HTMLPurifier_Context();
-        foreach($input as $i => $discard) {
-            if (!isset($config[$i])) $config[$i] = $default_config;
-            
-            $result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i], $default_context);
-            $this->assertIdentical($expect[$i], $result, 'DirectLexTest '.$i.': %s');
-            paintIf($result, $expect[$i] != $result);
-            
-            if ($this->_has_pear) {
-                // assert unless I say otherwise
-                $sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i], $default_context);
-                if (!isset($sax_expect[$i])) {
-                    // by default, assert with normal result
-                    $this->assertIdentical($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s');
-                    paintIf($sax_result, $expect[$i] != $sax_result);
-                } elseif ($sax_expect[$i] === false) {
-                    // assertions were turned off, optionally dump
-                    // paintIf($sax_expect, $i == NUMBER);
-                } else {
-                    // match with a custom SAX result array
-                    $this->assertIdentical($sax_expect[$i], $sax_result, 'PEARSax3Test (custom) '.$i.': %s');
-                    paintIf($sax_result, $sax_expect[$i] != $sax_result);
-                }
-            }
-            
-            if ($this->_has_dom) {
-                $dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i], $default_context);
-                // same structure as SAX
-                if (!isset($dom_expect[$i])) {
-                    $this->assertIdentical($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s');
-                    paintIf($dom_result, $expect[$i] != $dom_result);
-                } elseif ($dom_expect[$i] === false) {
-                    // paintIf($dom_result, $i == NUMBER);
-                } else {
-                    $this->assertIdentical($dom_expect[$i], $dom_result, 'DOMLexTest (custom) '.$i.': %s');
-                    paintIf($dom_result, $dom_expect[$i] != $dom_result);
-                }
-            }
-            
-        }
-        
+    function test_extractBody_bodyWithAttributes() {
+        $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
     }
     
+    function test_extractBody_preserveUnclosedBody() {
+        $this->assertExtractBody('<body>asdf'); // not closed, don't accept
+    }
+    
+    // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
+    
+    function assertTokenization($input, $expect, $alt_expect = array()) {
+        $lexers = array();
+        $lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
+        if ($this->_has_pear) $lexers['PEARSax3']   = new HTMLPurifier_Lexer_PEARSax3();
+        if (version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument')) {
+            $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
+            $lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
+        }
+        foreach ($lexers as $name => $lexer) {
+            $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
+            if (isset($alt_expect[$name])) {
+                if ($alt_expect[$name] === false) continue;
+                $t_expect = $alt_expect[$name];
+                $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
+            } else {
+                $t_expect = $expect;
+                $this->assertIdentical($result, $expect, "$name: %s");
+            }
+            if ($t_expect != $result) {
+                printTokens($result);
+                //var_dump($result);
+            }
+        }
+    }
+    
+    function test_tokenizeHTML_emptyInput() {
+        $this->assertTokenization('', array());
+    }
+    
+    function test_tokenizeHTML_plainText() {
+        $this->assertTokenization(
+            'This is regular text.',
+            array(
+                new HTMLPurifier_Token_Text('This is regular text.')
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_textAndTags() {
+        $this->assertTokenization(
+            'This is <b>bold</b> text',
+            array(
+                new HTMLPurifier_Token_Text('This is '),
+                new HTMLPurifier_Token_Start('b', array()),
+                new HTMLPurifier_Token_Text('bold'),
+                new HTMLPurifier_Token_End('b'),
+                new HTMLPurifier_Token_Text(' text'),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_normalizeCase() {
+        $this->assertTokenization(
+            '<DIV>Totally rad dude. <b>asdf</b></div>',
+            array(
+                new HTMLPurifier_Token_Start('DIV', array()),
+                new HTMLPurifier_Token_Text('Totally rad dude. '),
+                new HTMLPurifier_Token_Start('b', array()),
+                new HTMLPurifier_Token_Text('asdf'),
+                new HTMLPurifier_Token_End('b'),
+                new HTMLPurifier_Token_End('div'),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_notWellFormed() {
+        $this->assertTokenization(
+            '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
+            array(
+                new HTMLPurifier_Token_Start('asdf'),
+                new HTMLPurifier_Token_End('asdf'),
+                new HTMLPurifier_Token_Start('d'),
+                new HTMLPurifier_Token_End('d'),
+                new HTMLPurifier_Token_Start('poOloka'),
+                new HTMLPurifier_Token_Start('poolasdf'),
+                new HTMLPurifier_Token_Start('ds'),
+                new HTMLPurifier_Token_End('asdf'),
+                new HTMLPurifier_Token_End('ASDF'),
+            ),
+            array(
+                'DOMLex' => $alt = array(
+                    new HTMLPurifier_Token_Empty('asdf'),
+                    new HTMLPurifier_Token_Empty('d'),
+                    new HTMLPurifier_Token_Start('pooloka'),
+                    new HTMLPurifier_Token_Start('poolasdf'),
+                    new HTMLPurifier_Token_Empty('ds'),
+                    new HTMLPurifier_Token_End('poolasdf'),
+                    new HTMLPurifier_Token_End('pooloka'),
+                ),
+                'PH5P' => $alt,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_whitespaceInTag() {
+        $this->assertTokenization(
+            '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
+            array(
+                new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
+                new HTMLPurifier_Token_Text('Link to '),
+                new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
+                new HTMLPurifier_Token_Text('foobar'),
+                new HTMLPurifier_Token_End('b'),
+                new HTMLPurifier_Token_End('a'),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_emptyTag() {
+        $this->assertTokenization(
+            '<br />',
+            array( new HTMLPurifier_Token_Empty('br') )
+        );
+    }
+    
+    function test_tokenizeHTML_comment() {
+        $this->assertTokenization(
+            '<!-- Comment -->',
+            array( new HTMLPurifier_Token_Comment(' Comment ') ),
+            array(
+                'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- Comment --') ),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_malformedComment() {
+        $this->assertTokenization(
+            '<!-- not so well formed --->',
+            array( new HTMLPurifier_Token_Comment(' not so well formed -') ),
+            array(
+                'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- not so well formed ---') ),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_unterminatedTag() {
+        $this->assertTokenization(
+            '<a href=""',
+            array( new HTMLPurifier_Token_Text('<a href=""') ),
+            array(
+                // I like our behavior better, but it's non-standard
+                'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
+                'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ),
+                'PH5P' => false, // total barfing, grabs scaffolding too
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_specialEntities() {
+        $this->assertTokenization(
+            '&lt;b&gt;',
+            array(
+                new HTMLPurifier_Token_Text('<b>')
+            ),
+            array(
+                // some parsers will separate entities out
+                'PEARSax3' => $split = array(
+                    new HTMLPurifier_Token_Text('<'),
+                    new HTMLPurifier_Token_Text('b'),
+                    new HTMLPurifier_Token_Text('>'),
+                ),
+                'PH5P' => $split,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_earlyQuote() {
+        $this->assertTokenization(
+            '<a "=>',
+            array( new HTMLPurifier_Token_Empty('a') ),
+            array(
+                // we barf on this input
+                'DirectLex' => $tokens = array(
+                    new HTMLPurifier_Token_Start('a', array('"' => ''))
+                ),
+                'PEARSax3' => $tokens,
+                'PH5P' => array(
+                    new HTMLPurifier_Token_Empty('a', array('"' => ''))
+                ),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_unescapedQuote() {
+        $this->assertTokenization(
+            '"',
+            array( new HTMLPurifier_Token_Text('"') )
+        );
+    }
+    
+    function test_tokenizeHTML_escapedQuote() {
+        $this->assertTokenization(
+            '&quot;',
+            array( new HTMLPurifier_Token_Text('"') ),
+            array(
+                'PEARSax3' => false, // PEAR barfs on this
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_cdata() {
+        $this->assertTokenization(
+            '<![CDATA[You <b>can&#39;t</b> get me!]]>',
+            array( new HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
+            array(
+                // PEAR splits up all of the CDATA
+                'PEARSax3' => $split = array(
+                    new HTMLPurifier_Token_Text('You '),
+                    new HTMLPurifier_Token_Text('<'),
+                    new HTMLPurifier_Token_Text('b'),
+                    new HTMLPurifier_Token_Text('>'),
+                    new HTMLPurifier_Token_Text('can'),
+                    new HTMLPurifier_Token_Text('&'),
+                    new HTMLPurifier_Token_Text('#39;t'),
+                    new HTMLPurifier_Token_Text('<'),
+                    new HTMLPurifier_Token_Text('/b'),
+                    new HTMLPurifier_Token_Text('>'),
+                    new HTMLPurifier_Token_Text(' get me!'),
+                ),
+                'PH5P' => $split,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_characterEntity() {
+        $this->assertTokenization(
+            '&theta;',
+            array( new HTMLPurifier_Token_Text("\xCE\xB8") )
+        );
+    }
+    
+    function test_tokenizeHTML_characterEntityInCDATA() {
+        $this->assertTokenization(
+            '<![CDATA[&rarr;]]>',
+            array( new HTMLPurifier_Token_Text("&rarr;") ),
+            array(
+                'PEARSax3' => $split = array(
+                    new HTMLPurifier_Token_Text('&'),
+                    new HTMLPurifier_Token_Text('rarr;'),
+                ),
+                'PH5P' => $split,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_entityInAttribute() {
+        $this->assertTokenization(
+            '<a href="index.php?title=foo&amp;id=bar">Link</a>',
+            array(
+                new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
+                new HTMLPurifier_Token_Text('Link'),
+                new HTMLPurifier_Token_End('a'),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_preserveUTF8() {
+        $this->assertTokenization(
+            "\xCE\xB8",
+            array( new HTMLPurifier_Token_Text("\xCE\xB8") )
+        );
+    }
+    
+    function test_tokenizeHTML_specialEntityInAttribute() {
+        $this->assertTokenization(
+            '<br test="x &lt; 6" />',
+            array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
+        );
+    }
+    
+    function test_tokenizeHTML_emoticonProtection() {
+        $this->config->set('Core', 'AggressivelyFixLt', true);
+        $this->assertTokenization(
+            '<b>Whoa! <3 That\'s not good >.></b>',
+            array(
+                new HTMLPurifier_Token_Start('b'),
+                new HTMLPurifier_Token_Text('Whoa! '),
+                new HTMLPurifier_Token_Text('<3 That\'s not good >'),
+                new HTMLPurifier_Token_Text('.>'),
+                new HTMLPurifier_Token_End('b')
+            ),
+            array(
+                // text is absorbed together
+                'DOMLex' => array(
+                    new HTMLPurifier_Token_Start('b'),
+                    new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
+                    new HTMLPurifier_Token_End('b'),
+                ),
+                'PEARSax3' => false, // totally mangled
+                'PH5P' => array( // interesting grouping
+                    new HTMLPurifier_Token_Start('b'),
+                    new HTMLPurifier_Token_Text('Whoa! '),
+                    new HTMLPurifier_Token_Text('<'),
+                    new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
+                    new HTMLPurifier_Token_End('b'),
+                ),
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_commentWithFunkyChars() {
+        $this->assertTokenization(
+            '<!-- This >< comment --><br />',
+            array(
+                new HTMLPurifier_Token_Comment(' This >< comment '),
+                new HTMLPurifier_Token_Empty('br'),
+            ),
+            array(
+                'PEARSax3' => false,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_unterminatedComment() {
+        $this->assertTokenization(
+            '<!-- This >< comment',
+            array( new HTMLPurifier_Token_Comment(' This >< comment') ),
+            array(
+                'DOMLex'   => false,
+                'PEARSax3' => false,
+                'PH5P'     => false,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_scriptCDATAContents() {
+        $this->config->set('HTML', 'Trusted', true);
+        $this->assertTokenization(
+            'Foo: <script>alert("<foo>");</script>',
+            array(
+                new HTMLPurifier_Token_Text('Foo: '),
+                new HTMLPurifier_Token_Start('script'),
+                new HTMLPurifier_Token_Text('alert("<foo>");'),
+                new HTMLPurifier_Token_End('script'),
+            ),
+            array(
+                'PEARSax3' => false,
+                // PH5P, for some reason, bubbles the script to <head>
+                'PH5P' => false,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_entitiesInComment() {
+        $this->config->set('Core', 'AggressivelyFixLt', true);
+        $this->assertTokenization(
+            '<!-- This comment < &lt; & -->',
+            array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
+            array(
+                'PEARSax3' => false
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_attributeWithSpecialCharacters() {
+        $this->assertTokenization(
+            '<a href="><>">',
+            array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
+            array(
+                'DirectLex' => array(
+                    new HTMLPurifier_Token_Start('a', array('href' => '')),
+                    new HTMLPurifier_Token_Text('<">'),
+                ),
+                'PEARSax3' => false,
+            )
+        );
+    }
+    
+    function test_tokenizeHTML_emptyTagWithSlashInAttribute() {
+        $this->assertTokenization(
+            '<param name="src" value="http://example.com/video.wmv" />',
+            array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
+        );
+    }
+    
+    /*
+    
+    function test_tokenizeHTML_() {
+        $this->assertTokenization(
+            ,
+            array(
+                
+            )
+        );
+    }
+    */
+    
 }
 
diff --git a/tests/HTMLPurifier/SimpleTest/Reporter.php b/tests/HTMLPurifier/SimpleTest/Reporter.php
index c3b7a5f4..5f01d804 100644
--- a/tests/HTMLPurifier/SimpleTest/Reporter.php
+++ b/tests/HTMLPurifier/SimpleTest/Reporter.php
@@ -16,6 +16,7 @@ class HTMLPurifier_SimpleTest_Reporter extends HTMLReporter
             ?>><?php echo $file ?></option>
         <?php } ?>
     </select>
+    <input type="checkbox" name="standalone" title="Standalone version?" <?php if(isset($_GET['standalone'])) {echo 'checked="checked" ';} ?>/>
     <input type="submit" value="Go">
 </form>
 <?php
diff --git a/tests/HTMLPurifier/Strategy/CoreTest.php b/tests/HTMLPurifier/Strategy/CoreTest.php
index 89b75ad1..8b4e745f 100644
--- a/tests/HTMLPurifier/Strategy/CoreTest.php
+++ b/tests/HTMLPurifier/Strategy/CoreTest.php
@@ -11,26 +11,36 @@ class HTMLPurifier_Strategy_CoreTest extends HTMLPurifier_StrategyHarness
         $this->obj = new HTMLPurifier_Strategy_Core();
     }
     
-    function test() {
-        
+    function testBlankInput() {
         $this->assertResult('');
+    }
+    
+    function testMakeWellFormed() {
         $this->assertResult(
             '<b>Make well formed.',
             '<b>Make well formed.</b>'
         );
+    }
+    
+    function testFixNesting() {
         $this->assertResult(
             '<b><div>Fix nesting.</div></b>',
             '<b></b><div>Fix nesting.</div>'
         );
+    }
+    
+    function testRemoveForeignElements() {
         $this->assertResult(
             '<asdf>Foreign element removal.</asdf>',
             'Foreign element removal.'
         );
+    }
+    
+    function testFirstThree() {
         $this->assertResult(
             '<foo><b><div>All three.</div></b>',
             '<b></b><div>All three.</div>'
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index ac651684..e67a3e44 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -11,79 +11,81 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
         $this->obj = new HTMLPurifier_Strategy_FixNesting();
     }
     
-    function testBlockAndInlineIntegration() {
-        
-        // legal inline
+    function testPreserveInlineInRoot() {
         $this->assertResult('<b>Bold text</b>');
-        
-        // legal inline and block (default parent element is FLOW)
+    }
+    
+    function testPreserveInlineAndBlockInRoot() {
         $this->assertResult('<a href="about:blank">Blank</a><div>Block</div>');
-        
-        // illegal block in inline
+    }
+    
+    function testRemoveBlockInInline() {
         $this->assertResult(
             '<b><div>Illegal div.</div></b>',
             '<b>Illegal div.</b>'
         );
-        
-        // same test with different configuration (fragile)
-        $this->assertResult(
-            '<b><div>Illegal div.</div></b>',
-            '<b>&lt;div&gt;Illegal div.&lt;/div&gt;</b>',
-            array('Core.EscapeInvalidChildren' => true)
-        );
-        
     }
     
-    function testNodeRemovalIntegration() {
-        
-        // test of empty set that's required, resulting in removal of node
+    function testEscapeBlockInInline() {
+        $this->config->set('Core', 'EscapeInvalidChildren', true);
+        $this->assertResult(
+            '<b><div>Illegal div.</div></b>',
+            '<b>&lt;div&gt;Illegal div.&lt;/div&gt;</b>'
+        );
+    }
+    
+    function testRemoveNodeWithMissingRequiredElements() {
         $this->assertResult('<ul></ul>', '');
-        
-        // test illegal text which gets removed
+    }
+    
+    function testRemoveIllegalPCDATA() {
         $this->assertResult(
             '<ul>Illegal text<li>Legal item</li></ul>',
             '<ul><li>Legal item</li></ul>'
         );
-        
     }
     
-    function testTableIntegration() {
-        // test custom table definition
-        $this->assertResult(
-            '<table><tr><td>Cell 1</td></tr></table>'
-        );
+    function testCustomTableDefinition() {
+        $this->assertResult('<table><tr><td>Cell 1</td></tr></table>');
+    }
+    
+    function testRemoveEmptyTable() {
         $this->assertResult('<table></table>', '');
     }
     
-    function testChameleonIntegration() {
-        
-        // block in inline ins not allowed
+    function testChameleonRemoveBlockInNodeInInline() {
         $this->assertResult(
           '<span><ins><div>Not allowed!</div></ins></span>',
           '<span><ins>Not allowed!</ins></span>'
         );
-        
-        // test block element that has inline content
+    }
+    
+    function testChameleonRemoveBlockInBlockNodeWithInlineContent() {
         $this->assertResult(
           '<h1><ins><div>Not allowed!</div></ins></h1>',
           '<h1><ins>Not allowed!</ins></h1>'
         );
-        
-        // stacked ins/del
+    }
+    
+    function testNestedChameleonRemoveBlockInNodeWithInlineContent() {
         $this->assertResult(
           '<h1><ins><del><div>Not allowed!</div></del></ins></h1>',
           '<h1><ins><del>Not allowed!</del></ins></h1>'
         );
+    }
+    
+    function testNestedChameleonPreserveBlockInBlock() {
         $this->assertResult(
           '<div><ins><del><div>Allowed!</div></del></ins></div>'
         );
-        
+    }
+    
+    function testChameleonEscapeInvalidBlockInInline() {
+        $this->config->set('Core', 'EscapeInvalidChildren', true);
         $this->assertResult( // alt config
           '<span><ins><div>Not allowed!</div></ins></span>',
-          '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>',
-          array('Core.EscapeInvalidChildren' => true)
+          '<span><ins>&lt;div&gt;Not allowed!&lt;/div&gt;</ins></span>'
         );
-        
     }
     
     function testExclusionsIntegration() {
@@ -93,41 +95,37 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
           '<a><span></span></a>'
         );
     }
-   
-    function testCustomParentIntegration() {
-        // test inline parent
-        $this->assertResult(
-            '<b>Bold</b>', true, array('HTML.Parent' => 'span')
-        );
-        $this->assertResult(
-            '<div>Reject</div>', 'Reject', array('HTML.Parent' => 'span')
-        );
-   }
-   
-   function testError() {
-        // test fallback to div
-        $this->expectError('Cannot use unrecognized element as parent.');
-        $this->assertResult(
-            '<div>Accept</div>', true, array('HTML.Parent' => 'obviously-impossible')
-        );
-        $this->swallowErrors();
-        
+    
+    function testPreserveInlineNodeInInlineRootNode() {
+        $this->config->set('HTML', 'Parent', 'span');
+        $this->assertResult('<b>Bold</b>');
     }
     
-    function testDoubleCheckIntegration() {
-        // breaks without the redundant checking code
+    function testRemoveBlockNodeInInlineRootNode() {
+        $this->config->set('HTML', 'Parent', 'span');
+        $this->assertResult('<div>Reject</div>', 'Reject');
+   }
+   
+   function testInvalidParentError() {
+        // test fallback to div
+        $this->config->set('HTML', 'Parent', 'obviously-impossible');
+        $this->expectError('Cannot use unrecognized element as parent');
+        $this->assertResult('<div>Accept</div>');
+    }
+    
+    function testCascadingRemovalOfNodesMissingRequiredChildren() {
         $this->assertResult('<table><tr></tr></table>', '');
-        
-        // special case, prevents scrolling one back to find parent
+    }
+    
+    function testCascadingRemovalSpecialCaseCannotScrollOneBack() {
         $this->assertResult('<table><tr></tr><tr></tr></table>', '');
-        
-        // cascading rollbacks
-        $this->assertResult(
-          '<table><tbody><tr></tr><tr></tr></tbody><tr></tr><tr></tr></table>',
-          ''
-        );
-        
-        // rollbacks twice
+    }
+    
+    function testLotsOfCascadingRemovalOfNodes() {
+        $this->assertResult('<table><tbody><tr></tr><tr></tr></tbody><tr></tr><tr></tr></table>', '');
+    }
+    
+    function testAdjacentRemovalOfNodeMissingRequiredChildren() {
         $this->assertResult('<table></table><table></table>', '');
     }
     
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
index 5a1cca99..a227d6a7 100644
--- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
@@ -9,113 +9,77 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
     function setUp() {
         parent::setUp();
         $this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
-        $this->config = array();
     }
     
-    function testNormalIntegration() {
+    function testEmptyInput() {
         $this->assertResult('');
+    }
+    
+    function testWellFormedInput() {
         $this->assertResult('This is <b>bold text</b>.');
     }
     
-    function testUnclosedTagIntegration() {
+    function testUnclosedTagTerminatedByDocumentEnd() {
         $this->assertResult(
             '<b>Unclosed tag, gasp!',
             '<b>Unclosed tag, gasp!</b>'
         );
-        
+    }
+    
+    function testUnclosedTagTerminatedByParentNodeEnd() {
         $this->assertResult(
             '<b><i>Bold and italic?</b>',
             '<b><i>Bold and italic?</i></b>'
         );
-        
+    }
+    
+    function testRemoveStrayClosingTag() {
         $this->assertResult(
             'Unused end tags... recycle!</b>',
             'Unused end tags... recycle!'
         );
     }
     
-    function testEmptyTagDetectionIntegration() {
+    function testConvertStartToEmpty() {
         $this->assertResult(
             '<br style="clear:both;">',
             '<br style="clear:both;" />'
         );
-        
+    }
+    
+    function testConvertEmptyToStart() {
         $this->assertResult(
             '<div style="clear:both;" />',
             '<div style="clear:both;"></div>'
         );
     }
     
-    function testAutoClose() {
-        // paragraph
-        
+    function testAutoCloseParagraph() {
         $this->assertResult(
             '<p>Paragraph 1<p>Paragraph 2',
             '<p>Paragraph 1</p><p>Paragraph 2</p>'
         );
-        
+    }
+    
+    function testAutoCloseParagraphInsideDiv() {
         $this->assertResult(
             '<div><p>Paragraphs<p>In<p>A<p>Div</div>',
             '<div><p>Paragraphs</p><p>In</p><p>A</p><p>Div</p></div>'
         );
-        
-        // list
-        
+    }
+    
+    function testAutoCloseListItem() {
         $this->assertResult(
             '<ol><li>Item 1<li>Item 2</ol>',
             '<ol><li>Item 1</li><li>Item 2</li></ol>'
         );
-        
-        // colgroup
-        
+    }
+    
+    function testAutoCloseColgroup() {
         $this->assertResult(
             '<table><colgroup><col /><tr></tr></table>',
             '<table><colgroup><col /></colgroup><tr></tr></table>'
         );
-        
-    }
-    
-    function testMultipleInjectors() {
-        
-        $this->config = array('AutoFormat.AutoParagraph' => true, 'AutoFormat.Linkify' => true);
-        
-        $this->assertResult(
-            'Foobar',
-            '<p>Foobar</p>'
-        );
-        
-        $this->assertResult(
-            'http://example.com',
-            '<p><a href="http://example.com">http://example.com</a></p>'
-        );
-        
-        $this->assertResult(
-            '<b>http://example.com</b>',
-            '<p><b><a href="http://example.com">http://example.com</a></b></p>'
-        );
-        
-        $this->assertResult(
-            '<b>http://example.com',
-            '<p><b><a href="http://example.com">http://example.com</a></b></p>'
-        );
-        
-        $this->assertResult(
-'http://example.com
-
-http://dev.example.com',
-            '<p><a href="http://example.com">http://example.com</a></p><p><a href="http://dev.example.com">http://dev.example.com</a></p>'
-        );
-        
-        $this->assertResult(
-            'http://example.com <div>http://example.com</div>',
-            '<p><a href="http://example.com">http://example.com</a> </p><div><a href="http://example.com">http://example.com</a></div>'
-        );
-        
-        $this->assertResult(
-            'This URL http://example.com is what you need',
-            '<p>This URL <a href="http://example.com">http://example.com</a> is what you need</p>'
-        );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
new file mode 100644
index 00000000..e8e6c797
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
@@ -0,0 +1,65 @@
+<?php
+
+require_once 'HTMLPurifier/StrategyHarness.php';
+require_once 'HTMLPurifier/Strategy/MakeWellFormed.php';
+
+class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_StrategyHarness
+{
+    
+    function setUp() {
+        parent::setUp();
+        $this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
+        $this->config->set('AutoFormat', 'AutoParagraph', true);
+        $this->config->set('AutoFormat', 'Linkify', true);
+    }
+    
+    function testOnlyAutoParagraph() {
+        $this->assertResult(
+            'Foobar',
+            '<p>Foobar</p>'
+        );
+    }
+    
+    function testParagraphWrappingOnlyLink() {
+        $this->assertResult(
+            'http://example.com',
+            '<p><a href="http://example.com">http://example.com</a></p>'
+        );
+    }
+    
+    function testParagraphWrappingNodeContainingLink() {
+        $this->assertResult(
+            '<b>http://example.com</b>',
+            '<p><b><a href="http://example.com">http://example.com</a></b></p>'
+        );
+    }
+    
+    function testParagraphWrappingPoorlyFormedNodeContainingLink() {
+        $this->assertResult(
+            '<b>http://example.com',
+            '<p><b><a href="http://example.com">http://example.com</a></b></p>'
+        );
+    }
+    
+    function testTwoParagraphsContainingOnlyOneLink() {
+        $this->assertResult(
+            "http://example.com\n\nhttp://dev.example.com",
+            '<p><a href="http://example.com">http://example.com</a></p><p><a href="http://dev.example.com">http://dev.example.com</a></p>'
+        );
+    }
+    
+    function testParagraphNextToDivWithLinks() {
+        $this->assertResult(
+            'http://example.com <div>http://example.com</div>',
+            '<p><a href="http://example.com">http://example.com</a> </p><div><a href="http://example.com">http://example.com</a></div>'
+        );
+    }
+    
+    function testRealisticLinkInSentence() {
+        $this->assertResult(
+            'This URL http://example.com is what you need',
+            '<p>This URL <a href="http://example.com">http://example.com</a> is what you need</p>'
+        );
+    }
+    
+}
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
index 87a4b38c..19a37b24 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
@@ -3,8 +3,7 @@
 require_once 'HTMLPurifier/StrategyHarness.php';
 require_once 'HTMLPurifier/Strategy/RemoveForeignElements.php';
 
-class HTMLPurifier_Strategy_RemoveForeignElementsTest
-  extends HTMLPurifier_StrategyHarness
+class HTMLPurifier_Strategy_RemoveForeignElementsTest extends HTMLPurifier_StrategyHarness
 {
     
     function setUp() {
@@ -12,96 +11,75 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
         $this->obj = new HTMLPurifier_Strategy_RemoveForeignElements();
     }
     
-    function test() {
-        
-        $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict');
-        
+    function testBlankInput() {
         $this->assertResult('');
-        
+    }
+    
+    function testPreserveRecognizedElements() {
         $this->assertResult('This is <b>bold text</b>.');
-        
+    }
+    
+    function testRemoveForeignElements() {
         $this->assertResult(
             '<asdf>Bling</asdf><d href="bang">Bong</d><foobar />',
             'BlingBong'
         );
-        
+    }
+    
+    function testRemoveScriptAndContents() {
         $this->assertResult(
             '<script>alert();</script>',
             ''
         );
-        
+    }
+    
+    function testRemoveStyleAndContents() {
         $this->assertResult(
             '<style>.foo {blink;}</style>',
             ''
         );
-        
+    }
+    
+    function testRemoveOnlyScriptTagsLegacy() {
+        $this->config->set('Core', 'RemoveScriptContents', false);
         $this->assertResult(
             '<script>alert();</script>',
-            'alert();',
-            array('Core.RemoveScriptContents' => false)
+            'alert();'
         );
-        
+    }
+    
+    function testRemoveOnlyScriptTags() {
+        $this->config->set('Core', 'HiddenElements', array());
         $this->assertResult(
             '<script>alert();</script>',
-            'alert();',
-            array('Core.HiddenElements' => array())
+            'alert();'
         );
-        
-        $this->assertResult(
-            '<menu><li>Item 1</li></menu>',
-            '<ul><li>Item 1</li></ul>'
-        );
-        
-        // test center transform
-        $this->assertResult(
-            '<center>Look I am Centered!</center>',
-            '<div style="text-align:center;">Look I am Centered!</div>'
-        );
-        
-        // test font transform
-        $this->assertResult(
-            '<font color="red" face="Arial" size="6">Big Warning!</font>',
-            '<span style="color:red;font-family:Arial;font-size:xx-large;">Big'.
-              ' Warning!</span>'
-        );
-        
-        // test removal of invalid img tag
-        $this->assertResult(
-            '<img />',
-            ''
-        );
-        
-        // test preservation of valid img tag
+    }
+    
+    function testRemoveInvalidImg() {
+        $this->assertResult('<img />', '');
+    }
+    
+    function testPreserveValidImg() {
         $this->assertResult('<img src="foobar.gif" alt="foobar.gif" />');
-        
-        // test preservation of invalid img tag when removal is disabled
-        $this->assertResult(
-            '<img />',
-            true,
-            array(
-                'Core.RemoveInvalidImg' => false
-            )
-        );
-        
-        // test transform to unallowed element
-        $this->assertResult(
-            '<font color="red" face="Arial" size="6">Big Warning!</font>',
-            'Big Warning!',
-            array('HTML.Allowed' => 'div')
-        );
-        
-        // text-ify commented script contents ( the trailing comment gets
-        // removed during generation )
+    }
+    
+    function testPreserveInvalidImgWhenRemovalIsDisabled() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
+        $this->assertResult('<img />');
+    }
+    
+    function testTextifyCommentedScriptContents() {
+        $this->config->set('HTML', 'Trusted', true);
+        $this->config->set('Output', 'CommentScriptContents', false); // simplify output
         $this->assertResult(
 '<script type="text/javascript"><!--
 alert(<b>bold</b>);
 // --></script>',
 '<script type="text/javascript">
 alert(&lt;b&gt;bold&lt;/b&gt;);
-// </script>',
-            array('HTML.Trusted' => true, 'Output.CommentScriptContents' => false)
+// </script>'
         );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElements_TidyTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElements_TidyTest.php
new file mode 100644
index 00000000..8071d2ab
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElements_TidyTest.php
@@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/StrategyHarness.php';
+require_once 'HTMLPurifier/Strategy/RemoveForeignElements.php';
+
+class HTMLPurifier_Strategy_RemoveForeignElements_TidyTest
+  extends HTMLPurifier_StrategyHarness
+{
+    
+    function setUp() {
+        parent::setUp();
+        $this->obj = new HTMLPurifier_Strategy_RemoveForeignElements();
+        $this->config->set('HTML', 'TidyLevel', 'heavy');
+    }
+    
+    function testCenterTransform() {
+        $this->assertResult(
+            '<center>Look I am Centered!</center>',
+            '<div style="text-align:center;">Look I am Centered!</div>'
+        );
+    }
+    
+    function testFontTransform() {
+        $this->assertResult(
+            '<font color="red" face="Arial" size="6">Big Warning!</font>',
+            '<span style="color:red;font-family:Arial;font-size:xx-large;">Big'.
+              ' Warning!</span>'
+        );
+    }
+    
+    function testTransformToForbiddenElement() {
+        $this->config->set('HTML', 'Allowed', 'div');
+        $this->assertResult(
+            '<font color="red" face="Arial" size="6">Big Warning!</font>',
+            'Big Warning!'
+        );
+    }
+    
+    function testMenuTransform() {
+        $this->assertResult(
+            '<menu><li>Item 1</li></menu>',
+            '<ul><li>Item 1</li></ul>'
+        );
+    }
+    
+}
diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php
index fc7de460..25359425 100644
--- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php
+++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php
@@ -1,6 +1,5 @@
 <?php
 
-require_once('HTMLPurifier/Config.php');
 require_once('HTMLPurifier/StrategyHarness.php');
 require_once('HTMLPurifier/Strategy/ValidateAttributes.php');
 
@@ -11,126 +10,99 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
     function setUp() {
         parent::setUp();
         $this->obj = new HTMLPurifier_Strategy_ValidateAttributes();
-        $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict');
     }
     
-    function testEmpty() {
+    function testEmptyInput() {
         $this->assertResult('');
     }
     
-    function testIDs() {
+    function testRemoveIDByDefault() {
         $this->assertResult(
             '<div id="valid">Kill the ID.</div>',
             '<div>Kill the ID.</div>'
         );
-            
-        $this->assertResult('<div id="valid">Preserve the ID.</div>', true,
-            array('HTML.EnableAttrID' => true));
-        
-        $this->assertResult(
-            '<div id="0invalid">Kill the ID.</div>',
-            '<div>Kill the ID.</div>',
-            array('HTML.EnableAttrID' => true)
-        );
-        
-        // test id accumulator
-        $this->assertResult(
-            '<div id="valid">Valid</div><div id="valid">Invalid</div>',
-            '<div id="valid">Valid</div><div>Invalid</div>',
-            array('HTML.EnableAttrID' => true)
-        );
-        
+    }
+    
+    function testRemoveInvalidDir() {
         $this->assertResult(
             '<span dir="up-to-down">Bad dir.</span>',
             '<span>Bad dir.</span>'
         );
-        
-        // test attribute key case sensitivity
-        $this->assertResult(
-            '<div ID="valid">Convert ID to lowercase.</div>',
-            '<div id="valid">Convert ID to lowercase.</div>',
-            array('HTML.EnableAttrID' => true)
-        );
-        
-        // test simple attribute substitution
-        $this->assertResult(
-            '<div id=" valid ">Trim whitespace.</div>',
-            '<div id="valid">Trim whitespace.</div>',
-            array('HTML.EnableAttrID' => true)
-        );
-        
-        // test configuration id blacklist
-        $this->assertResult(
-            '<div id="invalid">Invalid</div>',
-            '<div>Invalid</div>',
-            array(
-                'Attr.IDBlacklist' => array('invalid'),
-                'HTML.EnableAttrID' => true
-            )
-        );
-        
-        // name rewritten as id
-        $this->assertResult(
-            '<a name="foobar" />',
-            '<a id="foobar" />',
-            array('HTML.EnableAttrID' => true)
-        );
     }
     
-    function testClasses() {
+    function testPreserveValidClass() {
         $this->assertResult('<div class="valid">Valid</div>');
-        
+    }
+    
+    function testSelectivelyRemoveInvalidClasses() {
         $this->assertResult(
             '<div class="valid 0invalid">Keep valid.</div>',
             '<div class="valid">Keep valid.</div>'
         );
     }
     
-    function testTitle() {
+    function testPreserveTitle() {
         $this->assertResult(
             '<acronym title="PHP: Hypertext Preprocessor">PHP</acronym>'
         );
     }
     
-    function testLang() {
+    function testAddXMLLang() {
         $this->assertResult(
             '<span lang="fr">La soupe.</span>',
             '<span lang="fr" xml:lang="fr">La soupe.</span>'
         );
-        
-        // test only xml:lang for XHTML 1.1
+    }
+    
+    function testOnlyXMLLangInXHTML11() {
+        $this->config->set('HTML', 'Doctype', 'XHTML 1.1');
         $this->assertResult(
             '<b lang="en">asdf</b>',
-            '<b xml:lang="en">asdf</b>', array('HTML.Doctype' => 'XHTML 1.1')
+            '<b xml:lang="en">asdf</b>'
         );
     }
     
-    function testAlign() {
-        
-        $this->assertResult(
-            '<h1 align="center">Centered Headline</h1>',
-            '<h1 style="text-align:center;">Centered Headline</h1>'
-        );
-        $this->assertResult(
-            '<h1 align="right">Right-aligned Headline</h1>',
-            '<h1 style="text-align:right;">Right-aligned Headline</h1>'
-        );
-        $this->assertResult(
-            '<h1 align="left">Left-aligned Headline</h1>',
-            '<h1 style="text-align:left;">Left-aligned Headline</h1>'
-        );
-        $this->assertResult(
-            '<p align="justify">Justified Paragraph</p>',
-            '<p style="text-align:justify;">Justified Paragraph</p>'
-        );
-        $this->assertResult(
-            '<h1 align="invalid">Invalid Headline</h1>',
-            '<h1>Invalid Headline</h1>'
-        );
-        
+    function testBasicURI() {
+        $this->assertResult('<a href="http://www.google.com/">Google</a>');
     }
     
-    function testTable() {
+    function testInvalidURI() {
+        $this->assertResult(
+            '<a href="javascript:badstuff();">Google</a>',
+            '<a>Google</a>'
+        );
+    }
+    
+    function testBdoAddMissingDir() {
+        $this->assertResult(
+            '<bdo>Go left.</bdo>',
+            '<bdo dir="ltr">Go left.</bdo>'
+        );
+    }
+    
+    function testBdoReplaceInvalidDirWithDefault() {
+        $this->assertResult(
+            '<bdo dir="blahblah">Invalid value!</bdo>',
+            '<bdo dir="ltr">Invalid value!</bdo>'
+        );
+    }
+    
+    function testBdoAlternateDefaultDir() {
+        $this->config->set('Attr', 'DefaultTextDir', 'rtl');
+        $this->assertResult(
+            '<bdo>Go right.</bdo>',
+            '<bdo dir="rtl">Go right.</bdo>'
+        );
+    }
+    
+    function testRemoveDirWhenNotRequired() {
+        $this->assertResult(
+            '<span dir="blahblah">Invalid value!</span>',
+            '<span>Invalid value!</span>'
+        );
+    }
+    
+    function testTableAttributes() {
         $this->assertResult(
 '<table frame="above" rules="rows" summary="A test table" border="2" cellpadding="5%" cellspacing="3" width="100%">
     <col align="right" width="4*" />
@@ -148,293 +120,64 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
     </tr>
 </table>'
         );
-        
-        // test col.span is non-zero
+    }
+    
+    function testColSpanIsNonZero() {
         $this->assertResult(
             '<col span="0" />',
             '<col />'
         );
-        // lengths
-        $this->assertResult(
-            '<td width="5%" height="10" /><th width="10" height="5%" /><hr width="10" height="10" />',
-            '<td style="width:5%;height:10px;" /><th style="width:10px;height:5%;" /><hr style="width:10px;" />'
-        );
-        // td boolean transformation
-        $this->assertResult(
-            '<td nowrap />',
-            '<td style="white-space:nowrap;" />'
-        );
-        
-        // caption align transformation
-        $this->assertResult(
-            '<caption align="left" />',
-            '<caption style="text-align:left;" />'
-        );
-        $this->assertResult(
-            '<caption align="right" />',
-            '<caption style="text-align:right;" />'
-        );
-        $this->assertResult(
-            '<caption align="top" />',
-            '<caption style="caption-side:top;" />'
-        );
-        $this->assertResult(
-            '<caption align="bottom" />',
-            '<caption style="caption-side:bottom;" />'
-        );
-        $this->assertResult(
-            '<caption align="nonsense" />',
-            '<caption />'
-        );
-        
-        // align transformation
-        $this->assertResult(
-            '<table align="left" />',
-            '<table style="float:left;" />'
-        );
-        $this->assertResult(
-            '<table align="center" />',
-            '<table style="margin-left:auto;margin-right:auto;" />'
-        );
-        $this->assertResult(
-            '<table align="right" />',
-            '<table style="float:right;" />'
-        );
-        $this->assertResult(
-            '<table align="top" />',
-            '<table />'
-        );
     }
     
-    function testURI() {
-        $this->assertResult('<a href="http://www.google.com/">Google</a>');
-        
-        // test invalid URI
-        $this->assertResult(
-            '<a href="javascript:badstuff();">Google</a>',
-            '<a>Google</a>'
-        );
-    }
-    
-    function testImg() {
+    function testImgAddDefaults() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             '<img />',
-            '<img src="" alt="Invalid image" />',
-            array('Core.RemoveInvalidImg' => false)
+            '<img src="" alt="Invalid image" />'
         );
-        
+    }
+    
+    function testImgGenerateAlt() {
         $this->assertResult(
             '<img src="foobar.jpg" />',
             '<img src="foobar.jpg" alt="foobar.jpg" />'
         );
-        
+    }
+    
+    function testImgAddDefaultSrc() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             '<img alt="pretty picture" />',
-            '<img alt="pretty picture" src="" />',
-            array('Core.RemoveInvalidImg' => false)
+            '<img alt="pretty picture" src="" />'
         );
-        // mailto in image is not allowed
+    }
+    
+    function testImgRemoveNonRetrievableProtocol() {
+        $this->config->set('Core', 'RemoveInvalidImg', false);
         $this->assertResult(
             '<img src="mailto:foo@example.com" />',
-            '<img alt="mailto:foo@example.com" src="" />',
-            array('Core.RemoveInvalidImg' => false)
-        );
-        // align transformation
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="left" />',
-            '<img src="foobar.jpg" alt="foobar" style="float:left;" />'
-        );
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="right" />',
-            '<img src="foobar.jpg" alt="foobar" style="float:right;" />'
-        );
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="bottom" />',
-            '<img src="foobar.jpg" alt="foobar" style="vertical-align:baseline;" />'
-        );
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="middle" />',
-            '<img src="foobar.jpg" alt="foobar" style="vertical-align:middle;" />'
-        );
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="top" />',
-            '<img src="foobar.jpg" alt="foobar" style="vertical-align:top;" />'
-        );
-        $this->assertResult(
-            '<img src="foobar.jpg" alt="foobar" align="outerspace" />',
-            '<img src="foobar.jpg" alt="foobar" />'
-        );
-        
-    }
-    
-    function testBdo() {
-        // test required attributes for bdo
-        $this->assertResult(
-            '<bdo>Go left.</bdo>',
-            '<bdo dir="ltr">Go left.</bdo>'
-        );
-        
-        $this->assertResult(
-            '<bdo dir="blahblah">Invalid value!</bdo>',
-            '<bdo dir="ltr">Invalid value!</bdo>'
+            '<img alt="mailto:foo@example.com" src="" />'
         );
     }
     
-    function testDir() {
-        // see testBdo, behavior is subtly different
-        $this->assertResult(
-            '<span dir="blahblah">Invalid value!</span>',
-            '<span>Invalid value!</span>'
-        );
+    function testPreserveRel() {
+        $this->config->set('Attr', 'AllowedRel', 'nofollow');
+        $this->assertResult('<a href="foo" rel="nofollow" />');
     }
-        
-    function testLinks() {
-        // link types
-        $this->assertResult(
-            '<a href="foo" rel="nofollow" />',
-            true,
-            array('Attr.AllowedRel' => 'nofollow')
-        );
-        // link targets
-        $this->assertResult(
-            '<a href="foo" target="_top" />',
-            true,
-            array('Attr.AllowedFrameTargets' => '_top',
-                'HTML.Doctype' => 'XHTML 1.0 Transitional')
-        );
+    
+    function testPreserveTarget() {
+        $this->config->set('Attr', 'AllowedFrameTargets', '_top');
+        $this->config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
+        $this->assertResult('<a href="foo" target="_top" />');
+    }
+    
+    function testRemoveTargetWhenNotSupported() {
+        $this->config->set('HTML', 'Doctype', 'XHTML 1.0 Strict');
+        $this->config->set('Attr', 'AllowedFrameTargets', '_top');
         $this->assertResult(
             '<a href="foo" target="_top" />',
             '<a href="foo" />'
         );
-        $this->assertResult(
-            '<a href="foo" target="_top" />',
-            '<a href="foo" />',
-            array('Attr.AllowedFrameTargets' => '_top', 'HTML.Strict' => true)
-        );
-    }
-    
-    function testBorder() {
-        // border
-        $this->assertResult(
-            '<img src="foo" alt="foo" hspace="1" vspace="3" />',
-            '<img src="foo" alt="foo" style="margin-top:3px;margin-bottom:3px;margin-left:1px;margin-right:1px;" />',
-            array('Attr.AllowedRel' => 'nofollow')
-        );
-    }
-    
-    function testHr() {
-        $this->assertResult(
-            '<hr size="3" />',
-            '<hr style="height:3px;" />'
-        );
-        $this->assertResult(
-            '<hr noshade />',
-            '<hr style="color:#808080;background-color:#808080;border:0;" />'
-        );
-        // align transformation
-        $this->assertResult(
-            '<hr align="left" />',
-            '<hr style="margin-left:0;margin-right:auto;text-align:left;" />'
-        );
-        $this->assertResult(
-            '<hr align="center" />',
-            '<hr style="margin-left:auto;margin-right:auto;text-align:center;" />'
-        );
-        $this->assertResult(
-            '<hr align="right" />',
-            '<hr style="margin-left:auto;margin-right:0;text-align:right;" />'
-        );
-        $this->assertResult(
-            '<hr align="bottom" />',
-            '<hr />'
-        );
-    }
-    
-    function testBr() {
-        // br clear transformation
-        $this->assertResult(
-            '<br clear="left" />',
-            '<br style="clear:left;" />'
-        );
-        $this->assertResult(
-            '<br clear="right" />',
-            '<br style="clear:right;" />'
-        );
-        $this->assertResult( // test both?
-            '<br clear="all" />',
-            '<br style="clear:both;" />'
-        );
-        $this->assertResult(
-            '<br clear="none" />',
-            '<br style="clear:none;" />'
-        );
-        $this->assertResult(
-            '<br clear="foo" />',
-            '<br />'
-        );
-    }
-    
-    function testListTypeTransform() {
-        // ul
-        $this->assertResult(
-            '<ul type="disc" />',
-            '<ul style="list-style-type:disc;" />'
-        );
-        $this->assertResult(
-            '<ul type="square" />',
-            '<ul style="list-style-type:square;" />'
-        );
-        $this->assertResult(
-            '<ul type="circle" />',
-            '<ul style="list-style-type:circle;" />'
-        );
-        $this->assertResult( // case insensitive
-            '<ul type="CIRCLE" />',
-            '<ul style="list-style-type:circle;" />'
-        );
-        $this->assertResult(
-            '<ul type="a" />',
-            '<ul />'
-        );
-        // ol
-        $this->assertResult(
-            '<ol type="1" />',
-            '<ol style="list-style-type:decimal;" />'
-        );
-        $this->assertResult(
-            '<ol type="i" />',
-            '<ol style="list-style-type:lower-roman;" />'
-        );
-        $this->assertResult(
-            '<ol type="I" />',
-            '<ol style="list-style-type:upper-roman;" />'
-        );
-        $this->assertResult(
-            '<ol type="a" />',
-            '<ol style="list-style-type:lower-alpha;" />'
-        );
-        $this->assertResult(
-            '<ol type="A" />',
-            '<ol style="list-style-type:upper-alpha;" />'
-        );
-        $this->assertResult(
-            '<ol type="disc" />',
-            '<ol />'
-        );
-        // li
-        $this->assertResult(
-            '<li type="circle" />',
-            '<li style="list-style-type:circle;" />'
-        );
-        $this->assertResult(
-            '<li type="A" />',
-            '<li style="list-style-type:upper-alpha;" />'
-        );
-        $this->assertResult( // case sensitive
-            '<li type="CIRCLE" />',
-            '<li />'
-        );
-        
     }
     
 }
diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributes_IDTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributes_IDTest.php
new file mode 100644
index 00000000..d73b0ebd
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/ValidateAttributes_IDTest.php
@@ -0,0 +1,65 @@
+<?php
+
+require_once('HTMLPurifier/StrategyHarness.php');
+require_once('HTMLPurifier/Strategy/ValidateAttributes.php');
+
+class HTMLPurifier_Strategy_ValidateAttributes_IDTest extends HTMLPurifier_StrategyHarness
+{
+    
+    function setUp() {
+        parent::setUp();
+        $this->obj = new HTMLPurifier_Strategy_ValidateAttributes();
+        $this->config->set('HTML', 'EnableAttrID', true);
+    }
+    
+    
+    function testPreserveIDWhenEnabled() {
+        $this->assertResult('<div id="valid">Preserve the ID.</div>');
+    }
+    
+    function testRemoveInvalidID() {
+        $this->assertResult(
+            '<div id="0invalid">Kill the ID.</div>',
+            '<div>Kill the ID.</div>'
+        );
+    }
+    
+    function testRemoveDuplicateID() {
+        $this->assertResult(
+            '<div id="valid">Valid</div><div id="valid">Invalid</div>',
+            '<div id="valid">Valid</div><div>Invalid</div>'
+        );
+    }
+    
+    function testAttributeKeyCaseInsensitivity() {
+        $this->assertResult(
+            '<div ID="valid">Convert ID to lowercase.</div>',
+            '<div id="valid">Convert ID to lowercase.</div>'
+        );
+    }
+    
+    function testTrimWhitespace() {
+        $this->assertResult(
+            '<div id=" valid ">Trim whitespace.</div>',
+            '<div id="valid">Trim whitespace.</div>'
+        );
+    }
+    
+    function testIDBlacklist() {
+        $this->config->set('Attr', 'IDBlacklist', array('invalid'));
+        $this->assertResult(
+            '<div id="invalid">Invalid</div>',
+            '<div>Invalid</div>'
+        );
+    }
+    
+    function testNameConvertedToID() {
+        $this->config->set('HTML', 'TidyLevel', 'heavy');
+        $this->assertResult(
+            '<a name="foobar" />',
+            '<a id="foobar" />'
+        );
+    }
+    
+}
+
diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php
new file mode 100644
index 00000000..1f0e21ba
--- /dev/null
+++ b/tests/HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php
@@ -0,0 +1,353 @@
+<?php
+
+require_once('HTMLPurifier/StrategyHarness.php');
+require_once('HTMLPurifier/Strategy/ValidateAttributes.php');
+
+class HTMLPurifier_Strategy_ValidateAttributes_TidyTest extends HTMLPurifier_StrategyHarness
+{
+    
+    function setUp() {
+        parent::setUp();
+        $this->obj = new HTMLPurifier_Strategy_ValidateAttributes();
+        $this->config->set('HTML', 'TidyLevel', 'heavy');
+    }
+    
+    function testConvertCenterAlign() {
+        $this->assertResult(
+            '<h1 align="center">Centered Headline</h1>',
+            '<h1 style="text-align:center;">Centered Headline</h1>'
+        );
+    }
+    
+    function testConvertRightAlign() {
+        $this->assertResult(
+            '<h1 align="right">Right-aligned Headline</h1>',
+            '<h1 style="text-align:right;">Right-aligned Headline</h1>'
+        );
+    }
+    
+    function testConvertLeftAlign() {
+        $this->assertResult(
+            '<h1 align="left">Left-aligned Headline</h1>',
+            '<h1 style="text-align:left;">Left-aligned Headline</h1>'
+        );
+    }
+    
+    function testConvertJustifyAlign() {
+        $this->assertResult(
+            '<p align="justify">Justified Paragraph</p>',
+            '<p style="text-align:justify;">Justified Paragraph</p>'
+        );
+    }
+    
+    function testRemoveInvalidAlign() {
+        $this->assertResult(
+            '<h1 align="invalid">Invalid Headline</h1>',
+            '<h1>Invalid Headline</h1>'
+        );
+    }
+    
+    function testConvertTableLengths() {
+        $this->assertResult(
+            '<td width="5%" height="10" /><th width="10" height="5%" /><hr width="10" height="10" />',
+            '<td style="width:5%;height:10px;" /><th style="width:10px;height:5%;" /><hr style="width:10px;" />'
+        );
+    }
+    
+    function testTdConvertNowrap() {
+        $this->assertResult(
+            '<td nowrap />',
+            '<td style="white-space:nowrap;" />'
+        );
+    }
+    
+    function testCaptionConvertAlignLeft() {
+        $this->assertResult(
+            '<caption align="left" />',
+            '<caption style="text-align:left;" />'
+        );
+    }
+    
+    function testCaptionConvertAlignRight() {
+        $this->assertResult(
+            '<caption align="right" />',
+            '<caption style="text-align:right;" />'
+        );
+    }
+    
+    function testCaptionConvertAlignTop() {
+        $this->assertResult(
+            '<caption align="top" />',
+            '<caption style="caption-side:top;" />'
+        );
+    }
+    
+    function testCaptionConvertAlignBottom() {
+        $this->assertResult(
+            '<caption align="bottom" />',
+            '<caption style="caption-side:bottom;" />'
+        );
+    }
+    
+    function testCaptionRemoveInvalidAlign() {
+        $this->assertResult(
+            '<caption align="nonsense" />',
+            '<caption />'
+        );
+    }
+    
+    function testTableConvertAlignLeft() {
+        $this->assertResult(
+            '<table align="left" />',
+            '<table style="float:left;" />'
+        );
+    }
+    
+    function testTableConvertAlignCenter() {
+        $this->assertResult(
+            '<table align="center" />',
+            '<table style="margin-left:auto;margin-right:auto;" />'
+        );
+    }
+    
+    function testTableConvertAlignRight() {
+        $this->assertResult(
+            '<table align="right" />',
+            '<table style="float:right;" />'
+        );
+    }
+    
+    function testTableRemoveInvalidAlign() {
+        $this->assertResult(
+            '<table align="top" />',
+            '<table />'
+        );
+    }
+    
+    function testImgConvertAlignLeft() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="left" />',
+            '<img src="foobar.jpg" alt="foobar" style="float:left;" />'
+        );
+    }
+    
+    function testImgConvertAlignRight() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="right" />',
+            '<img src="foobar.jpg" alt="foobar" style="float:right;" />'
+        );
+    }
+    
+    function testImgConvertAlignBottom() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="bottom" />',
+            '<img src="foobar.jpg" alt="foobar" style="vertical-align:baseline;" />'
+        );
+    }
+    
+    function testImgConvertAlignMiddle() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="middle" />',
+            '<img src="foobar.jpg" alt="foobar" style="vertical-align:middle;" />'
+        );
+    }
+    
+    function testImgConvertAlignTop() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="top" />',
+            '<img src="foobar.jpg" alt="foobar" style="vertical-align:top;" />'
+        );
+    }
+    
+    function testImgRemoveInvalidAlign() {
+        $this->assertResult(
+            '<img src="foobar.jpg" alt="foobar" align="outerspace" />',
+            '<img src="foobar.jpg" alt="foobar" />'
+        );
+    }
+    
+    function testBorderConvertHVSpace() {
+        $this->assertResult(
+            '<img src="foo" alt="foo" hspace="1" vspace="3" />',
+            '<img src="foo" alt="foo" style="margin-top:3px;margin-bottom:3px;margin-left:1px;margin-right:1px;" />'
+        );
+    }
+    
+    function testHrConvertSize() {
+        $this->assertResult(
+            '<hr size="3" />',
+            '<hr style="height:3px;" />'
+        );
+    }
+    
+    function testHrConvertNoshade() {
+        $this->assertResult(
+            '<hr noshade />',
+            '<hr style="color:#808080;background-color:#808080;border:0;" />'
+        );
+    }
+    
+    function testHrConvertAlignLeft() {
+        $this->assertResult(
+            '<hr align="left" />',
+            '<hr style="margin-left:0;margin-right:auto;text-align:left;" />'
+        );
+    }
+    
+    function testHrConvertAlignCenter() {
+        $this->assertResult(
+            '<hr align="center" />',
+            '<hr style="margin-left:auto;margin-right:auto;text-align:center;" />'
+        );
+    }
+    
+    function testHrConvertAlignRight() {
+        $this->assertResult(
+            '<hr align="right" />',
+            '<hr style="margin-left:auto;margin-right:0;text-align:right;" />'
+        );
+    }
+    
+    function testHrRemoveInvalidAlign() {
+        $this->assertResult(
+            '<hr align="bottom" />',
+            '<hr />'
+        );
+    }
+    
+    function testBrConvertClearLeft() {
+        $this->assertResult(
+            '<br clear="left" />',
+            '<br style="clear:left;" />'
+        );
+    }
+    
+    function testBrConvertClearRight() {
+        $this->assertResult(
+            '<br clear="right" />',
+            '<br style="clear:right;" />'
+        );
+    }
+    
+    function testBrConvertClearAll() {
+        $this->assertResult(
+            '<br clear="all" />',
+            '<br style="clear:both;" />'
+        );
+    }
+    
+    function testBrConvertClearNone() {
+        $this->assertResult(
+            '<br clear="none" />',
+            '<br style="clear:none;" />'
+        );
+    }
+    
+    function testBrRemoveInvalidClear() {
+        $this->assertResult(
+            '<br clear="foo" />',
+            '<br />'
+        );
+    }
+    
+    function testUlConvertTypeDisc() {
+        $this->assertResult(
+            '<ul type="disc" />',
+            '<ul style="list-style-type:disc;" />'
+        );
+    }
+    
+    function testUlConvertTypeSquare() {
+        $this->assertResult(
+            '<ul type="square" />',
+            '<ul style="list-style-type:square;" />'
+        );
+    }
+    
+    function testUlConvertTypeCircle() {
+        $this->assertResult(
+            '<ul type="circle" />',
+            '<ul style="list-style-type:circle;" />'
+        );
+    }
+    
+    function testUlConvertTypeCaseInsensitive() {
+        $this->assertResult(
+            '<ul type="CIRCLE" />',
+            '<ul style="list-style-type:circle;" />'
+        );
+    }
+    
+    function testUlRemoveInvalidType() {
+        $this->assertResult(
+            '<ul type="a" />',
+            '<ul />'
+        );
+    }
+    
+    function testOlConvertType1() {
+        $this->assertResult(
+            '<ol type="1" />',
+            '<ol style="list-style-type:decimal;" />'
+        );
+    }
+    
+    function testOlConvertTypeLowerI() {
+        $this->assertResult(
+            '<ol type="i" />',
+            '<ol style="list-style-type:lower-roman;" />'
+        );
+    }
+    
+    function testOlConvertTypeUpperI() {
+        $this->assertResult(
+            '<ol type="I" />',
+            '<ol style="list-style-type:upper-roman;" />'
+        );
+    }
+    
+    function testOlConvertTypeLowerA() {
+        $this->assertResult(
+            '<ol type="a" />',
+            '<ol style="list-style-type:lower-alpha;" />'
+        );
+    }
+    
+    function testOlConvertTypeUpperA() {
+        $this->assertResult(
+            '<ol type="A" />',
+            '<ol style="list-style-type:upper-alpha;" />'
+        );
+    }
+    
+    function testOlRemoveInvalidType() {
+        $this->assertResult(
+            '<ol type="disc" />',
+            '<ol />'
+        );
+    }
+    
+    function testLiConvertTypeCircle() {
+        $this->assertResult(
+            '<li type="circle" />',
+            '<li style="list-style-type:circle;" />'
+        );
+    }
+    
+    function testLiConvertTypeA() {
+        $this->assertResult(
+            '<li type="A" />',
+            '<li style="list-style-type:upper-alpha;" />'
+        );
+    }
+    
+    function testLiConvertTypeCaseSensitive() {
+        $this->assertResult(
+            '<li type="CIRCLE" />',
+            '<li />'
+        );
+    }
+    
+    
+}
+
diff --git a/tests/index.php b/tests/index.php
index 69be2981..a98e20e6 100755
--- a/tests/index.php
+++ b/tests/index.php
@@ -5,7 +5,7 @@
 
 error_reporting(E_ALL | E_STRICT);
 define('HTMLPurifierTest', 1);
-define('HTMLPURIFIER_SCHEMA_STRICT', true);
+define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas
 
 // wishlist: automated calling of this file from multiple PHP versions so we
 // don't have to constantly switch around
@@ -13,10 +13,11 @@ define('HTMLPURIFIER_SCHEMA_STRICT', true);
 // default settings (protect against register_globals)
 $GLOBALS['HTMLPurifierTest'] = array();
 $GLOBALS['HTMLPurifierTest']['PEAR'] = false; // do PEAR tests
+$GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument');
 $simpletest_location = 'simpletest/'; // reasonable guess
 
 // load SimpleTest
-@include '../test-settings.php'; // don't mind if it isn't there
+if (file_exists('../test-settings.php')) include '../test-settings.php';
 require_once $simpletest_location . 'unit_tester.php';
 require_once $simpletest_location . 'reporter.php';
 require_once $simpletest_location . 'mock_objects.php';
@@ -79,7 +80,6 @@ if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) {
 } else {
     
     $test = new GroupTest('All Tests');
-
     foreach ($test_files as $test_file) {
         require_once $test_file;
         $test->addTestClass(path2class($test_file));
diff --git a/tests/test_files.php b/tests/test_files.php
index f9fa71c1..adb0df4c 100644
--- a/tests/test_files.php
+++ b/tests/test_files.php
@@ -79,6 +79,7 @@ $test_files[] = 'HTMLPurifier/GeneratorTest.php';
 $test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModuleTest.php';
+$test_files[] = 'HTMLPurifier/HTMLModule/ObjectTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModule/RubyTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';
 $test_files[] = 'HTMLPurifier/HTMLModule/TidyTest.php';
@@ -98,9 +99,13 @@ $test_files[] = 'HTMLPurifier/Strategy/FixNestingTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/FixNesting_ErrorsTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/MakeWellFormedTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/MakeWellFormed_ErrorsTest.php';
+$test_files[] = 'HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElementsTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
+$test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_TidyTest.php';
 $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
+$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_IDTest.php';
+$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php';
 $test_files[] = 'HTMLPurifier/TagTransformTest.php';
 $test_files[] = 'HTMLPurifier/TokenTest.php';
 $test_files[] = 'HTMLPurifier/URIDefinitionTest.php';