Release 1.5.0, merged in r688-867.

- LanguageFactory::instance() declared static - HTMLModuleManagerTest pass by reference bug fixed, merge back into trunk scheduled git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@869 48356398-32a2-884e-a903-53898d9a118a
2025-01-08 07:01:53 +00:00 · 2007-03-24 01:04:06 +00:00 · 2007-03-24 01:04:06 +00:00 · dd2fd06591
commit dd2fd06591
parent cec7a1c087
130 changed files with 4324 additions and 1385 deletions
--- a/2
+++ b/2
@ -4,7 +4,7 @@
 # Project related configuration options
 #---------------------------------------------------------------------------
 PROJECT_NAME           = HTML Purifier
-PROJECT_NUMBER         = 1.4.1
+PROJECT_NUMBER         = 1.5.0
 OUTPUT_DIRECTORY       = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
--- a/30
+++ b/30
@ -9,6 +9,36 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
    . Internal change
 ==========================

+1.5.0, released 2007-03-23
+! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
+  doesn't actually do anything yet, but keep your eyes peeled.
+! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
+! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
+  I am loathe to release beta quality APIs, but this is exactly that;
+  don't use the internal interfaces if you're not willing to do migration
+  later on.
+- Allow 'x' subtag in language codes
+- Fixed buggy chameleon-support for ins and del
+. Added support for IDREF attributes (i.e. for)
+. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
+. Removed context variable ParentType, replaced with IsInline, which
+  is false when you're not inline and an integer of the parent that
+  caused you to become inline when you are (so possibly zero)
+. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
+  and HTMLDefinition->content_sets
+. StrictBlockquote now reports what elements its supposed to allow,
+  rather than what it does allow
+. Removed HTMLDefinition->info_flow_elements in favor of
+  HTMLDefinition->content_sets['Flow']
+. Removed redundant "exclusionary" definitions from DTD roster
+. StrictBlockquote now requires a construction parameter as if it
+  were an Required ChildDef, this is the "real" set of allowed elements
+. AttrDef partitioned into HTML, CSS and URI segments
+. Modify Youtube filter regexp to be multiline
+. Require both PHP5 and DOM extension in order to use DOMLex, fixes
+  some edge cases where a DOMDocument class exists in a PHP4 environment
+  due to DOM XML extension.
+
 1.4.1, released 2007-01-21
 ! docs/enduser-youtube.html updated according to new functionality
 - YouTube IDs can have underscores and dashes
--- a/8
+++ b/8
@ -7,7 +7,7 @@ TODO List
    ? At-risk
 ==========================

-1.5 release
+1.6 release
 # Implement all non-essential attribute transforms, configurable
 # URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
 # Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
@ -15,8 +15,9 @@ TODO List
    - Requires I18N facilities to be created first (COMPLEX)
 ? Configuration profiles: sets of directives that get set with one func call
 - XSS-attempt detection
+ - Implement IDREF support

-1.6 release
+1.7 release
 # Add pre-packaged "levels" of cleaning (custom behavior already done)
 - More fine-grained control over escaping behavior
    - Silently drop content inbetween SCRIPT tags (can be generalized to allow
@ -29,7 +30,7 @@ TODO List
   tag or attribute that is not supported
 - Parse TinyMCE whitelist into our %HTML.Allow* whitelists

-1.7 release
+1.8 release
 # Additional support for poorly written HTML
    - Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
    - Friendly strict handling of <address> (block -> <br>)
@ -76,7 +77,6 @@ Ongoing
    - more! (look for ones that use WYSIWYGs)

 Unknown release (on a scratch-an-itch basis)
- - Upgrade SimpleTest testing code to newest versions
 - Have 'lang' attribute be checked against official lists
 ? Semi-lossy dumb alternate character encoding transformations, achieved by
   encoding all characters that have string entity equivalents
--- a/benchmarks/Lexer.php
+++ b/benchmarks/Lexer.php
@ -7,6 +7,7 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');

 require_once 'HTMLPurifier/ConfigSchema.php';
 require_once 'HTMLPurifier/Config.php';
+require_once 'HTMLPurifier/Context.php';

 $LEXERS = array();
 $RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
@ -93,11 +94,14 @@ function print_lexers() {
 function do_benchmark($name, $document) {
    global $LEXERS, $RUNS;
    
+    $config = HTMLPurifier_Config::createDefault();
+    $context = new HTMLPurifier_Context();
+    
    $timer = new RowTimer($name);
    $timer->start();
    
    foreach($LEXERS as $key => $lexer) {
-        for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
+        for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
        $timer->setMarker($key);
    }
    
--- a/benchmarks/ProfileDirectLex.php
+++ b/benchmarks/ProfileDirectLex.php
@ -5,12 +5,15 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
 require_once 'HTMLPurifier/ConfigSchema.php';
 require_once 'HTMLPurifier/Config.php';
 require_once 'HTMLPurifier/Lexer/DirectLex.php';
+require_once 'HTMLPurifier/Context.php';

 $input = file_get_contents('samples/Lexer/4.html');
 $lexer = new HTMLPurifier_Lexer_DirectLex();
+$config = HTMLPurifier_Config::createDefault();
+$context = new HTMLPurifier_Context();

 for ($i = 0; $i < 10; $i++) {
-    $tokens = $lexer->tokenizeHTML($input);
+    $tokens = $lexer->tokenizeHTML($input, $config, $context);
 }

 ?>
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@ -188,7 +188,7 @@ $xsl_processor->importStylesheet($xsl_dom_stylesheet);
 $html_output = $xsl_processor->transformToXML($dom_document);

 // some slight fudges to preserve backwards compatibility
-$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
+$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br/>
 $html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns

 if (class_exists('Tidy')) {
--- a/docs/dev-advanced-api.html
+++ b/docs/dev-advanced-api.html
@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
+<link rel="stylesheet" type="text/css" href="style.css" />
+
+<title>Advanced API - HTML Purifier</title>
+
+</head><body>
+
+<h1>Advanced API</h1>
+
+<div id="filing">Filed under Development</div>
+<div id="index">Return to the <a href="index.html">index</a>.</div>
+<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
+
+<p>It makes no sense to adopt a <q>one-size-fits-all</q> approach to
+filtersets: therefore, users must be able to define their own sets of
+<q>allowed</q> elements, as well as switch in-between doctypes of HTML.</p>
+
+<p>Our goals are to let the user:</p>
+
+<dl>
+    <dt>Select</dt>
+    <dd><ul>
+        <li>Doctype</li>
+        <li>Filtersets: Rich / Plain / Full ...</li>
+        <li>Mode: Lenient / Correctional</li>
+        <li>Collections (?): Safe / Unsafe</li>
+        <li>Modules / Tags / Attributes</li>
+    </ul></dd>
+    <dt>Customize</dt>
+    <dd><ul>
+        <li>Tags / Attributes / Attribute Types</li>
+        <li>Filtersets</li>
+        <li>Root Node</li>
+    </ul></dd>
+    <dt>Create</dt>
+    <dd><ul>
+        <li>Modules / Tags / Attributes / Attribute Types</li>
+        <li>Filtersets</li>
+        <li>Doctype</li>
+    </ul></dd>
+</dl>
+
+<h2>Select</h2>
+
+<h3>Selecting a Doctype</h3>
+
+<p>By default, users will use a doctype-based, permissive but secure
+whitelist.  They must define a <strong>doctype</strong>, and this serves
+as the first method of determining a filterset.</p>
+
+<p class="technical">This identifier is based
+on the name the W3C has given to the document type and <em>not</em>
+the DTD identifier.</p>
+
+<p>This parameter is set via the configuration object:</p>
+
+<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
+
+<h3>Selecting a Filterset</h3>
+
+<p>However, selecting this doctype doesn't mean much, because if we
+adhered exactly to the definition we would be letting XSS and other
+nasties through. HTML Purifier must, in its filterset, allow a subset
+of the doctype, which we shall call a <strong>filterset</strong>.</p>
+
+<p>By default, HTML Purifier will use the <strong>Rich</strong>
+filterset, which allows as many elements as possible with untrusted
+sources. Other possible filtersets could be:</p>
+
+<dl>
+    <dt>Full</dt>
+    <dd>Allows the full span of elements in the doctype, good if you want
+        HTML Purifier to work as a Tidy substitute but not to strip
+        anything out.</dd>
+    <dt>Plain</dt>
+    <dd>Provides a minimum set of tags for semantic markup of things
+        like blog comments.</dd>
+</dl>
+
+<p>Extension-authors would be able to define custom filtersets for
+other users to use.</p>
+
+<p>A possible call to select a filterset would be:</p>
+
+<pre>$config->set('HTML', 'Filterset', 'Rich');</pre>
+
+<h3>Selecting Mode</h3>
+
+<p>Within filtersets, there are various <strong>modes</strong> of operation.
+These indicate variant behaviors that, while not strictly changing the
+allowed set of elements and attributes, will definitely affect the output.
+Currently, we have two modes, which may be used together:</p>
+
+<dl>
+    <dt>Lenient</dt>
+    <dd>Deprecated elements and attributes will be transformed into
+        standards-compliant alternatives when explicitly disallowed. For
+        example, in the XHTML 1.0 Strict doctype, a <code>center</code>
+        tag would be turned into a <code>div</code> with the CSS property
+        <code>text-align:center;</code>, but in XHTML 1.0 Transitional
+        the tag would be preserved. This mode is on by default.</dd>
+    <dt>Correctional</dt>
+    <dd>Deprecated elements and attributes will be transformed into
+        standards-compliant alternatives whenever possible. Referring
+        back to the previous example, the <code>center</code> tag would
+        be transformed in both cases. However, tags without a
+        reasonable standards-compliant alternative will be preserved
+        in their form. This mode is on by default. It may have
+        various levels of operation.</dd>
+</dl>
+
+<p>A possible call to select modes would be:</p>
+
+<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
+
+<p>If modes have extra parameters, a hash might work well:</p>
+
+<pre>$config->set('HTML', 'Mode', array(
+    'correctional' => 9, // strongest level
+    'lenient' => true // this one's just boolean
+));</pre>
+
+<p>Modes may possibly be wrapped up with the filterset declaration:</p>
+
+<pre>$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');</pre>
+
+<p>Further investigation in this field is necessary.</p>
+
+<h3>Selecting Modules / Tags / Attributes</h3>
+
+<p>If this cookie cutter approach doesn't appeal to a user, they may
+decide to roll their own filterset by selecting modules, tags and
+attributes to allow.</p>
+
+<p class="technical">This would make use of the same facilities
+as a filterset author would use, except that it would go under an
+<q>anonymous</q> filterset that would be auto-selected if any of the
+relevant module/tag/attribute selection configuration directives were
+non-null.</p>
+
+<p>On the highest level, a user will usually be most interested in
+directly specifying which elements and attributes are desired. For
+example:</p>
+
+<pre>$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');</pre>
+
+<p>Attribute declarations could be merged into this declaration as such:</p>
+
+<pre>$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');</pre>
+
+<p>...or be kept separate:</p>
+
+<pre>$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');</pre>
+
+<p class="technical">Considering that, internally speaking, as mandated by
+the XHTML 1.1 Modularization specification, we have organized our
+elements around modules, considerable gymnastics will be needed to
+get this sort of functionality working.</p>
+
+<p>A user may also specify a module to load a class of elements and attributes
+into their filterest:</p>
+
+<pre>$config->set('HTML', 'Allowed', 'Hypertext,Core');</pre>
+
+<p class="fixme">The granularity of these modules is too coarse for
+the average user (for example, the core module loads everything from
+the essential <code>p</code> tag to the not-so-safe <code>h1</code>
+tag). How do we make this still a viable solution?</p>
+
+<h3>Unified selector</h3>
+
+<p>Because selecting each and every one of these configuration options
+is a chore, we may wish to offer a specialized configuration method
+for selecting a filterset. Possibility:</p>
+
+<pre>function selectFilter($doctype, $filterset, $mode)</pre>
+
+<p>...which is simply a light wrapper over the individual configuration
+calls. A custom config file format or text format could also be adopted.</p>
+
+<div id="version">$Id$</div>
+
+</body></html>
--- a/docs/enduser-overview.txt
+++ b/docs/enduser-overview.txt
@ -36,7 +36,7 @@ forgiving lexer.  You may also be interested in the unit tests located in the
 tests/ folder, which provide a living document on how exactly the filter deals
 with malformed input.

-In summary:
+In summary (see corresponding classes for more details):

 1. Parse document into an array of tag and text tokens (Lexer)
 2. Remove all elements not on whitelist and transform certain other elements
--- a/docs/enduser-security.txt
+++ b/docs/enduser-security.txt
@ -6,45 +6,17 @@ through negligence of people. This class will do its job: no more, no less,
 and it's up to you to provide it the proper information and proper context
 to be effective. Things to remember:

-1. Character Encoding: UTF-8.
-    This segment will soon be obsoleted by enduser-utf8.html
-Currently, the parser runs under the assumption that it is dealing
-with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
-character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
-your character encoding, make sure you configure HTML Purifier or switch
-to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
-the parser will mangle it badly (though it won't be a security risk if you're
-outputting it as UTF-8 though).  Character encoding is, in general, a knotty
-issue, but do yourself a favor and learn about it:
-<http://www.joelonsoftware.com/articles/Unicode.html>
+1. Character Encoding: see enduser-utf8.html for more info.

-2. Doctype: XHTML 1.0 Transitional
-This is what the parser is outputting. For the most
-part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
-that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
-has waaaay too many quirks for a little parser to handle.  We did not select
-strict in order to prevent ourselves from being too draconic on users, but
-this may be configurable in the future.  Do you want standards compliance?
-The doctype is a good place to start.
+2. Doctype: document pending feature completion
+Not strictly necessary, actually. More in-depth discussion once we figure
+out how to get strict loose mode working.

-3. IDs
-    This segment is obsoleted by enduser-id.html
-They need to be unique, but without some knowledge of the
-rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
-needs to be set: we may want to consider disallowing IDs by default to
-save lazy programmers.
+3. IDs: see enduser-id.html for more info

-4. [PROJECTED] Links
-We're not going to try for spam protection (although
-some hooks for such a module might be nice) but we may offer the ability to
-only accept relative URLs. Pick the one that's right for you.
+4. Links: document pending feature completion
+Rudimentary blacklisting, we should also allow only relative URIs. We
+need a doc to explain the stuff.

-5. CSS
-While we can prevent the most flagrant cases from affecting your
-layout (such as absolutely positioned elements), no amount of code is going
-to protect your pages from being attacked by garish colors and plain old
-bad taste.  A neat feature would be the ability to define acceptable colors
-in a document, but that's not likely to be implemented for a while.  In the
-meantime, be sure to make sure that floated elements (permitted, since they
-can be quite useful) can't mess up your layout. Once again, we may want to
-disable this by default to protect lazy developers.
+5. CSS: document pending
+Explain which CSS styles we blocked and why.
--- a/docs/enduser-utf8.html
+++ b/docs/enduser-utf8.html
@ -10,7 +10,7 @@
    .minor td {font-style:italic;}
 </style>

-<title>UTF-8 - HTML Purifier</title>
+<title>UTF-8: The Secret of Character Encoding - HTML Purifier</title>

 <!-- Note to users: this document, though professing to be UTF-8, attempts
 to use only ASCII characters, because most webservers are configured
@ -19,21 +19,27 @@ own advice for sake of portability.  -->

 </head><body>

-<h1>UTF-8</h1>
+<h1>UTF-8: The Secret of Character Encoding</h1>

 <div id="filing">Filed under End-User</div>
 <div id="index">Return to the <a href="index.html">index</a>.</div>
 <div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>

-<p>Character encoding and character sets, in truth, are not that
-difficult to understand. But if you don't understand them, you are going
-to be caught by surprise by some of HTML Purifier's behavior, namely
-the fact that it operates UTF-8 or the limitations of the character
-encoding transformations it does. This document will walk you through
+<p>Character encoding and character sets are not that
+difficult to understand, but so many people blithely stumble
+through the worlds of programming without knowing what to actually
+do about it, or say &quot;Ah, it's a job for those <em>internationalization</em>
+experts.&quot; No, it is not! This document will walk you through
 determining the encoding of your system and how you should handle
 this information. It will stay away from excessive discussion on
-the internals of character encoding, but offer the information in
-asides that can easily be skipped.</p>
+the internals of character encoding.</p>
+
+<p>This document is not designed to be read in its entirety: it will
+slowly introduce concepts that build on each other: you need not get to
+the bottom to have learned something new. However, I strongly
+recommend you read all the way to <strong>Why UTF-8?</strong>, because at least
+at that point you'd have made a conscious decision not to migrate,
+which can be a rewarding (but difficult) task.</p>

 <blockquote class="aside">
 <div class="label">Asides</div>
@ -43,6 +49,50 @@ asides that can easily be skipped.</p>
    with a greater understanding of the underlying issues.</p>
 </blockquote>

+<h2>Table of Contents</h2>
+
+<ol id="toc">
+    <li><a href="#findcharset">Finding the real encoding</a></li>
+    <li><a href="#findmetacharset">Finding the embedded encoding</a></li>
+    <li><a href="#fixcharset">Fixing the encoding</a><ol>
+        <li><a href="#fixcharset-none">No embedded encoding</a></li>
+        <li><a href="#fixcharset-diff">Embedded encoding disagrees</a></li>
+        <li><a href="#fixcharset-server">Changing the server encoding</a><ol>
+            <li><a href="#fixcharset-server-php">PHP header() function</a></li>
+            <li><a href="#fixcharset-server-phpini">PHP ini directive</a></li>
+            <li><a href="#fixcharset-server-nophp">Non-PHP</a></li>
+            <li><a href="#fixcharset-server-htaccess">.htaccess</a></li>
+            <li><a href="#fixcharset-server-ext">File extensions</a></li>
+        </ol></li>
+        <li><a href="#fixcharset-xml">XML</a></li>
+        <li><a href="#fixcharset-internals">Inside the process</a></li>
+    </ol></li>
+    <li><a href="#whyutf8">Why UTF-8?</a><ol>
+        <li><a href="#whyutf8-i18n">Internationalization</a></li>
+        <li><a href="#whyutf8-user">User-friendly</a></li>
+        <li><a href="#whyutf8-forms">Forms</a><ol>
+            <li><a href="#whyutf8-forms-urlencoded">application/x-www-form-urlencoded</a></li>
+            <li><a href="#whyutf8-forms-multipart">multipart/form-data</a></li>
+        </ol></li>
+        <li><a href="#whyutf8-support">Well supported</a></li>
+        <li><a href="#whyutf8-htmlpurifier">HTML Purifiers</a></li>
+    </ol></li>
+    <li><a href="#migrate">Migrate to UTF-8</a><ol>
+        <li><a href="#migrate-db">Configuring your database</a><ol>
+            <li><a href="#migrate-db-legit">Legit method</a></li>
+            <li><a href="#migrate-db-binary">Binary</a></li>
+        </ol></li>
+        <li><a href="#migrate-editor">Text editor</a></li>
+        <li><a href="#migrate-bom">Byte Order Mark (headers already sent!)</a></li>
+        <li><a href="#migrate-fonts">Fonts</a><ol>
+            <li><a href="#migrate-fonts-obscure">Obscure scripts</a></li>
+            <li><a href="#migrate-fonts-occasional">Occasional use</a></li>
+        </ol></li>
+        <li><a href="#migrate-variablewidth">Dealing with variable width in functions</a></li>
+    </ol></li>
+    <li><a href="#externallinks">Further Reading</a></li>
+</ol>
+
 <h2 id="findcharset">Finding the real encoding</h2>

 <p>In the beginning, there was ASCII, and things were simple. But they
@ -275,7 +325,7 @@ your own php.ini file, ask your support for details. Use:</p>

 <h4 id="fixcharset-server-nophp">Non-PHP</h4>

-<p>You may, for whatever reason, may need to set the character encoding
+<p>You may, for whatever reason, need to set the character encoding
 on non-PHP files, usually plain ol' HTML files. Doing this
 is more of a hit-or-miss process: depending on the software being
 used as a webserver and the configuration of that software, certain
@ -386,8 +436,8 @@ processing instructions. They look like:</p>

 <p>For XHTML, this processing instruction theoretically
 overrides the <code>META</code> tag. In reality, this happens only when the
-XHTML is actually served as legit XML and not HTML, which is almost
-always never due to Internet Explorer's lack of support for 
+XHTML is actually served as legit XML and not HTML, which is almost always
+never due to Internet Explorer's lack of support for 
 <code>application/xhtml+xml</code> (even though doing so is often
 argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>

@ -398,10 +448,10 @@ for XML files is UTF-8, which often butts heads with more common
 ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>

 <p>In short, if you use XHTML and have gone through the
-trouble of adding the XML header, be sure to make sure it jives
+trouble of adding the XML header, make sure it jives
 with your <code>META</code> tags and HTTP headers.</p>

-<h3>Inside the process</h3>
+<h3 id="fixcharset-internals">Inside the process</h3>

 <p>This section is not required reading,
 but may answer some of your questions on what's going on in all
@ -572,7 +622,7 @@ Each method has deficiencies, especially the former.</p>
 the page, you still have the trouble of what to do with characters
 that are outside of the character encoding's range. The behavior, once
 again, varies: Firefox 2.0 entity-izes them while Internet Explorer
-7.0 mangles them beyond intelligibility. For serious I18N purposes,
+7.0 mangles them beyond intelligibility. For serious internationalization purposes,
 this is not an option.</p>

 <p>The other possibility is to set Accept-Encoding to UTF-8, which
@ -604,22 +654,374 @@ hounding you about broken pages.</p>

 <h3 id="whyutf8-htmlpurifier">HTML Purifier</h3>

-<p>And finally, we get to HTML Purifier.</p>
+<p>And finally, we get to HTML Purifier.  HTML Purifier is built to
+deal with UTF-8: any indications otherwise are the result of an
+encoder that converts text from your preferred encoding to UTF-8, and
+back again.  HTML Purifier never touches anything else, and leaves
+it up to the module iconv to do the dirty work.</p>
+
+<p>This approach, however, is not perfect. iconv is blithely unaware
+of HTML character entities. HTML Purifier, in order to
+protect against sophisticated escaping schemes, normalizes all character
+and numeric entities before processing the text. This leads to
+one important ramification:</p>
+
+<p><strong>Any character that is not supported by the target character
+set, regardless of whether or not it is in the form of a character
+entity or a raw character, will be silently ignored.</strong></p>
+
+<p>Example of this principle at work: say you have <code>&amp;theta;</code>
+in your HTML, but the output is in Latin-1 (which, understandably,
+does not understand Greek), the following process will occur (assuming you've
+set the encoding correctly using %Core.Encoding):</p>
+
+<ul>
+    <li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
+        (note that theta is preserved since it doesn't actually use
+        any non-ASCII characters): <code>&amp;theta;</code></li>
+    <li>The <code>EntityParser</code> will transform all named and numeric
+        character entities to their corresponding raw UTF-8 equivalents:
+        <code>&theta;</code></li>
+    <li>HTML Purifier processes the code: <code>&theta;</code></li>
+    <li>The <code>Encoder</code> now transforms the text back from UTF-8
+        to ISO 8859-1. Since Greek is not supported by ISO 8859-1, it
+        will be either ignored or replaced with a question mark:
+        <code>?</code></li>
+</ul>
+
+<p>This behaviour is quite unsatisfactory. It is a deal-breaker for
+international applications, and it can be mildly annoying for the provincial
+soul who occasionally needs a special character. Since 1.4.0, HTML
+Purifier has provided a slightly more palatable workaround using
+%Core.EscapeNonASCIICharacters. The process now looks like:</p>
+
+<ul>
+    <li>The <code>Encoder</code> transforms encoding to UTF-8: <code>&amp;theta;</code></li>
+    <li>The <code>EntityParser</code> transforms entities: <code>&theta;</code></li>
+    <li>HTML Purifier processes the code: <code>&theta;</code></li>
+    <li>The <code>Encoder</code> replaces all non-ASCII characters
+        with numeric entities: <code>&amp;#952;</code></li>
+    <li>For good measure, <code>Encoder</code> transforms encoding back to
+        original (which is strictly unnecessary for 99% of encodings
+        out there): <code>&amp;#952;</code> (remember, it's all ASCII!)</li>
+</ul>
+
+<p>...which means that this is only good for an occasional foray into
+the land of Unicode characters, and is totally unacceptable for Chinese
+or Japanese texts. The even bigger kicker is that, supposing the
+input encoding was actually ISO-8859-7, which <em>does</em> support
+theta, the character would get entity-ized anyway! (The Encoder does
+not discriminate).</p>
+
+<p>The current functionality is about where HTML Purifier will be for
+the rest of eternity. HTML Purifier could attempt to preserve the original
+form of the entities so that they could be substituted back in, only the
+DOM extension kills them off irreversibly. HTML Purifier could also attempt
+to be smart and only convert non-ASCII characters that weren't supported
+by the target encoding, but that would require reimplementing iconv
+with HTML awareness, something I will not do.</p>
+
+<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
+not being sarcastic here: some people could care less about other languages)</p>

 <h2 id="migrate">Migrate to UTF-8</h2>

-<h3 id="migrate-editor">Text editor</h3>
+<p>So, you've decided to bite the bullet, and want to migrate to UTF-8.
+Note that this is not for the faint-hearted, and you should expect
+the process to take longer than you think it will take.</p>
+
+<p>The general idea is that you convert all existing text to UTF-8,
+and then you set all the headers and META tags we discussed earlier
+to UTF-8. There are many ways going about doing this: you could
+write a conversion script that runs through the database and re-encodes
+everything as UTF-8 or you could do the conversion on the fly when someone
+reads the page. The details depend on your system, but I will cover
+some of the more subtle points of migration that may trip you up.</p>

 <h3 id="migrate-db">Configuring your database</h3>

-<h3 id="migrate-convert">Convert old text</h3>
+<p>Most modern databases, the most prominent open-source ones being MySQL
+4.1+ and PostgreSQL, support character encodings. If you're switching
+to UTF-8, logically speaking, you'd want to make sure your database
+knows about the change too. There are some caveats though:</p>
+
+<h4 id="migrate-db-legit">Legit method</h4>
+
+<p>Standardization in terms of SQL syntax for specifying character
+encodings is notoriously spotty. Refer to your respective database's
+documentation on how to do this properly.</p>
+
+<p>For <a href="http://dev.mysql.com/doc/refman/5.0/en/charset-conversion.html">MySQL</a>, <code>ALTER</code> will magically perform the
+character encoding conversion for you. However, you have
+to make sure that the text inside the column is what is says it is:
+if you had put Shift-JIS in an ISO 8859-1 column, MySQL will irreversibly mangle
+the text when you try to convert it to UTF-8. You'll have to convert
+it to a binary field, convert it to a Shift-JIS field (the real encoding),
+and then finally to UTF-8. Many a website had pages irreversibly mangled
+because they didn't realize that they'd been deluding themselves about
+the character encoding all along, don't become the next victim.</p>
+
+<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
+encoding of a database (as of 8.2). You will have to dump the data, and then reimport
+it into a new table. Make sure that your client encoding is set properly:
+this is how PostgreSQL knows to perform an encoding conversion.</p>
+
+<p>Many times, you will be also asked about the &quot;collation&quot; of
+the new column. Collation is how a DBMS sorts text, like ordering
+B, C and A into A, B and C (the problem gets surprisingly complicated
+when you get to languages like Thai and Japanese). If in doubt,
+going with the default setting is usually a safe bet.</p>
+
+<p>Once the conversion is all said and done, you still have to remember
+to set the client encoding (your encoding) properly on each database
+connection using <code>SET NAMES</code> (which is standard SQL and is
+usually supported).</p>
+
+<h4 id="migrate-db-binary">Binary</h4>
+
+<p>Due to the abovementioned compatibility issues, a more interoperable
+way of storing UTF-8 text is to stuff it in a binary datatype.
+<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
+<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
+Doing so can save you some huge headaches:</p>
+
+<ul>
+    <li>The syntax for binary data types is very portable,</li>
+    <li>MySQL 4.0 has <em>no</em> support for character encodings, so
+        if you want to support it you <em>have</em> to use binary,</li>
+    <li>MySQL, as of 5.1, has no support for four byte UTF-8 characters,
+        which represent characters beyond the basic multilingual
+        plane, and</li>
+    <li>You will never have to worry about your DBMS being too smart
+        and attempting to convert your text when you don't want it to.</li>
+</ul>
+
+<p>MediaWiki, a very prominent international application, uses binary fields
+for storing their data because of point three.</p>
+
+<p>There are drawbacks, of course:</p>
+
+<ul>
+    <li>Database tools like PHPMyAdmin won't be able to offer you inline
+        text editing, since it is declared as binary,</li>
+    <li>It's not semantically correct: it's really text not binary
+        (lying to the database),</li>
+    <li>Unless you use the not-very-portable wizardry mentioned above,
+        you have to change the encoding yourself (usually, you'd do
+        it on the fly), and</li>
+    <li>You will not have collation.</li>
+</ul>
+
+<p>Choose based on your circumstances.</p>
+
+<h3 id="migrate-editor">Text editor</h3>
+
+<p>For more flat-file oriented systems, you will often be tasked with
+converting reams of existing text and HTML files into UTF-8, as well as
+making sure that all new files uploaded are properly encoded. Once again,
+I can only point vaguely in the right direction for converting your
+existing files: make sure you backup, make sure you use
+<a href="http://php.net/ref.iconv">iconv</a>(), and
+make sure you know what the original character encoding of the files
+is (or are, depending on the tidiness of your system).</p>
+
+<p>However, I can proffer more specific advice on the subject of
+text editors. Many text editors have notoriously spotty Unicode support.
+To find out how your editor is doing, you can check out <a
+href="http://www.alanwood.net/unicode/utilities_editors.html">this list</a>
+or <a href="http://en.wikipedia.org/wiki/Comparison_of_text_editors#Encoding_support">Wikipedia's list.</a>
+I personally use Notepad++, which works like a charm when it comes to UTF-8.
+Usually, you will have to <strong>explicitly</strong> tell the editor through some dialogue
+(usually Save as or Format) what encoding you want it to use. An editor
+will often offer &quot;Unicode&quot; as a method of saving, which is
+ambiguous. Make sure you know whether or not they really mean UTF-8
+or UTF-16 (which is another flavor of Unicode).</p>
+
+<p>The two things to look out for are whether or not the editor
+supports <strong>font mixing</strong> (multiple
+fonts in one document) and whether or not it adds a <strong>BOM</strong>.
+Font mixing is important because fonts rarely have support for every
+language known to mankind: in order to be flexible, an editor must
+be able to take a little from here and a little from there, otherwise
+all your Chinese characters will come as nice boxes. We'll discuss
+BOM below.</p>

 <h3 id="migrate-bom">Byte Order Mark (headers already sent!)</h3>

+<p>The BOM, or <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte
+Order Mark</a>, is a magical, invisible character placed at
+the beginning of UTF-8 files to tell people what the encoding is and
+what the endianness of the text is. It is also unnecessary.</p>
+
+<p>Because it's invisible, it often
+catches people by surprise when it starts doing things it shouldn't
+be doing. For example, this PHP file:</p>
+
+<pre><strong>BOM</strong>&lt;?php
+header('Location: index.php');
+?&gt;</pre>
+
+<p>...will fail with the all too familiar <strong>Headers already sent</strong>
+PHP error. And because the BOM is invisible, this culprit will go unnoticed.
+My suggestion is to only use ASCII in PHP pages, but if you must, make
+sure the page is saved WITHOUT the BOM.</p>
+
+<blockquote class="aside">
+    <p>The headers the error is referring to are <strong>HTTP headers</strong>,
+       which are sent to the browser before any HTML to tell it various
+       information. The moment any regular text (and yes, a BOM counts as
+       ordinary text) is output, the headers must be sent, and you are
+       not allowed to send anymore. Thus, the error.</p>
+</blockquote>
+
+<p>If you are reading in text files to insert into the middle of another
+page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte 
+sequence for BOM <code>&quot;\xEF\xBB\xBF&quot;</code> before inserting it in,
+via:</p>
+
+<pre>$text = str_replace(&quot;\xEF\xBB\xBF&quot;, '', $text);</pre>
+
+<h3 id="migrate-fonts">Fonts</h3>
+
+<p>Generally speaking, people who are having trouble with fonts fall
+into two categories:</p>
+
+<ul>
+<li>Those who want to
+use an extremely obscure language for which there is very little
+support even among native speakers of the language, and</li>
+<li>Those where the primary language of the text is
+well-supported but there are occasional characters
+that aren't supported.</li>
+</ul>
+
+<p>Yes, there's always a chance where an English user happens across
+a Sinhalese website and doesn't have the right font. But an English user
+who happens not to have the right fonts probably has no business reading Sinhalese
+anyway. So we'll deal with the other two edge cases.</p>
+
+<h4 id="migrate-fonts-obscure">Obscure scripts</h4>
+
+<p>If you run a Bengali website, you may get comments from users who
+would like to read your website but get heaps of question marks or
+other meaningless characters. Fixing this problem requires the
+installation of a font or language pack which is often highly
+dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
+of such a help file for the Bengali language, I am sure there are
+others out there too. You just have to point users to the appropriate
+help file.</p>
+
+<h4 id="migrate-fonts-occasional">Occasional use</h4>
+
+<p>A prime example of when you'll see some very obscure Unicode
+characters embedded in what otherwise would be very bland ASCII are
+letters of the
+<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
+Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
+manner (you probably see them all the time in your dictionary). Your
+average font probably won't have support for all of the IPA characters
+like &#664; (bilabial click) or &#658; (voiced postalveolar fricative).
+So what's a poor browser to do? Font mix! Smart browsers like Mozilla Firefox
+and Internet Explorer 7 will borrow glyphs from other fonts in order
+to make sure that all the characters display properly.</p>
+
+<p>But what happens when the browser isn't smart and happens to be the
+most widely used browser in the entire world? Microsoft IE 6
+is not smart enough to borrow from other fonts when a character isn't
+present, so more often than not you'll be slapped with a nice big &#65533;.
+To get things to work, MSIE 6 needs a little nudge. You could configure it
+to use a different font to render the text, but you can acheive the same
+effect by selectively changing the font for blocks of special characters
+to known good Unicode fonts.</p>
+
+<p>Fortunantely, the folks over at Wikipedia have already done all the
+heavy lifting for you. Get the CSS from the horses mouth here:
+<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
+and search for &quot;.IPA&quot; There are also a smattering of
+other classes you can use for other purposes, check out 
+<a href="http://meta.wikimedia.org/wiki/Help:Special_characters#Displaying_Special_Characters">this page</a>
+for more details. For you lazy ones, this should work:</p>
+
+<pre>.Unicode {
+        font-family: Code2000, &quot;TITUS Cyberbit Basic&quot;, &quot;Doulos SIL&quot;,
+            &quot;Chrysanthi Unicode&quot;, &quot;Bitstream Cyberbit&quot;,
+            &quot;Bitstream CyberBase&quot;, Thryomanes, Gentium, GentiumAlt,
+            &quot;Lucida Grande&quot;, &quot;Arial Unicode MS&quot;, &quot;Microsoft Sans Serif&quot;,
+            &quot;Lucida Sans Unicode&quot;;
+        font-family /**/:inherit; /* resets fonts for everyone but IE6 */
+}</pre>
+
+<p>The standard usage goes along the lines of <code>&lt;span class=&quot;Unicode&quot;&gt;Crazy
+Unicode stuff here&lt;/span&gt;</code>. Characters in the
+<a href="http://en.wikipedia.org/wiki/Windows_Glyph_List_4">Windows Glyph List</a>
+usually don't need to be fixed, but for anything else you probably
+want to play it safe. Unless, of course, you don't care about IE6
+users.</p>
+
 <h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>

+<p>When people claim that PHP6 will solve all our Unicode problems, they're
+misinformed. It will not fix any of the abovementioned troubles. It will,
+however, fix the problem we are about to discuss: processing UTF-8 text
+in PHP.</p>
+
+<p>PHP (as of PHP5) is blithely unaware of the existence of UTF-8 (with a few
+notable exceptions). Sometimes, this will cause problems, other times,
+this won't. So far, we've avoided discussing the architecture of
+UTF-8, so, we must first ask, what is UTF-8? Yes, it supports Unicode,
+and yes, it is variable width. Other traits:</p>
+
+<ul>
+    <li>Every character's byte sequence is unique and will never be found
+        inside the byte sequence of another character,</li>
+    <li>UTF-8 may use up to four bytes to encode a character,</li>
+    <li>UTF-8 text must be checked for well-formedness,</li>
+    <li>Pure ASCII is also valid UTF-8, and</li>
+    <li>Binary sorting will sort UTF-8 in the same order as Unicode.</li>
+</ul>
+
+<p>Each of these traits affect different domains of text processing
+in different ways. It is beyond the scope of this document to explain
+what precisely these implications are. PHPWact provides
+a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
+on what to expect from each functions, although coverage is spotty in
+some areas. Their more general notes on
+<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
+are also worth looking at for information on UTF-8. Some rules of thumb
+when dealing with Unicode text:</p>
+
+<ul>
+    <li>Do not EVER use functions that:<ul>
+        <li>...convert case (strtolower, strtoupper, ucfirst, ucwords)</li>
+        <li>...claim to be case-insensitive (str_ireplace, stristr, strcasecmp)</li>
+    </ul></li>
+    <li>Think twice before using functions that:<ul>
+        <li>...count characters (strlen will return bytes, not characters;
+            str_split and word_wrap may corrupt)</li>
+        <li>...entity-ize things (UTF-8 doesn't need entities)</li>
+        <li>...do very complex string processing (*printf)</li>
+    </ul></li>
+</ul>
+
+<p>...and always think in bytes, not characters. If you use strpos()
+to find the position of a character, it will be in bytes, but this
+usually won't matter since substr() also operates with byte indices!</p>
+
+<p>You'll also need to make sure your UTF-8 is well-formed and will
+probably need replacements for some of these functions. I recommend
+using Harry Fuecks' <a href="http://phputf8.sourceforge.net/">PHP
+UTF-8</a> library, rather than use mb_string directly. HTML Purifier
+also defines a few useful UTF-8 compatible functions: check out
+<code>Encoder.php</code> in the <code>/library/HTMLPurifier/</code>
+directory.</p>
+
 <h2 id="externallinks">Further Reading</h2>

+<p>Well, that's it. Hopefully this document has served as a very
+practical springboard into knowledge of how UTF-8 works.  You may have
+decided that you don't want to migrate yet: that's fine, just know
+what will happen to your output and what bug reports you may recieve.</p>
+
 <p>Many other developers have already discussed the subject of Unicode,
 UTF-8 and internationalization, and I would like to defer to them for
 a more in-depth look into character sets and encodings.</p>
--- a/docs/fixquotes.htc
+++ b/docs/fixquotes.htc
@ -0,0 +1,6 @@
+<public:attach event="oncontentready" onevent="init();" />
+<script>
+function init() {
+  element.innerHTML = '&#8220;'+element.innerHTML+'&#8221;';
+}
+</script>
--- a/docs/index.html
+++ b/docs/index.html
@ -31,7 +31,7 @@ information for casual developers using HTML Purifier.</p>
 <dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
 <dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>

-<dt><a href="enduser-utf8.html">UTF-8</a></dt>
+<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
 <dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>

 </dl>
@ -54,6 +54,10 @@ conventions.</p>
 <dt><a href="dev-optimization.html">Optimization</a></dt>
 <dd>Discusses possible methods of optimizing HTML Purifier.</dd>

+<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
+<dd>Functional specification for HTML Purifier's advanced API for defining
+custom filtering behavior.</dd>
+
 </dl>

 <h2>Proposals</h2>
--- a/docs/proposal-config.txt
+++ b/docs/proposal-config.txt
@ -7,7 +7,7 @@ value is used for.  This means decentralized configuration declarations that
 are nevertheless error checking and a centralized configuration object.

 Directives are divided into namespaces, indicating the major portion of
-functionality they cover (although there may be overlaps.  Please consult
+functionality they cover (although there may be overlaps).  Please consult
 the documentation in ConfigDef for more information on these namespaces.

 Since configuration is dependant on context, internal classes require a
@ -36,4 +36,5 @@ the definition, you'd have to force reconstruction.

 In practice, the pulling directives from the config object are
 solely need-based, and the flex points are littered throughout the
-setup() function.  Some sort of refactoring is likely in order.
+setup() function.  Some sort of refactoring is likely in order. See
+ref-xhtml-1.1.txt for more info.
--- a/docs/proposal-language.txt
+++ b/docs/proposal-language.txt
@ -1,42 +1,6 @@
 We are going to model our I18N/L10N off of MediaWiki's system.  Their's is
 obviously quite complicated, so we're going to simplify it a bit for our needs.

-== Structure ==
-
-First, you have a Language object.  This object contains all the localisable
-message strings, as well as other important language-specific settings and
-custom behavior (uppercasing, lowercasing, printing dates, formatting
-numbers, etc.)
-
-The object is constructed from two sources: subclassed versions of itself
-(classes) and Message files (messages).
-
-== General use ==
-
-You load a language object by calling the Language::factory() function. 
-This function the class file for the object (taking in account fallback 
-languages by using the fallback langauge's object but overloading the 
-language key) and returns that object. Nothing else happens.
-
-When a message/etc is requested, a lazy load initializor is called.  Now the
-real work starts.  We're first going to take the scenario that the language
-is not cached.  The system loads the Messages file by:
-
-    require( $filename );
-    $cache = compact( self::$mLocalisationKeys );	
-
-...where self::$mLocalisationKeys is the name of variables that could be used
-in the localization file. This lets you use things like:
-
-    $fallback = false;
-    $rtl = false;
-
-...and easily siphon them into arrays.
-
-Then, we load the $fallback language (if not set, English) to fill in the gaps in
-the messages.  There is specialized behavior for certain keys, as they can be
-mergeable maps, lists or alias lists (not sure what the last one is).
-
 == Caching ==

 MediaWiki has lots of caching mechanisms built in, which make the code somewhat
--- a/docs/ref-loose-vs-strict.txt
+++ b/docs/ref-loose-vs-strict.txt
@ -32,6 +32,6 @@ A tag's attribute 'target' (for selecting frames) cut
    current behavior: no substitute, just delete when in strict, allow in loose
 Attribute 'name' deprecated in favor of 'id'
    current behavior: dropped silently
-    projected behavior: create proper AttrTransform (currently not allowed at all)
+    projected behavior: create proper AttrTransform
 [done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
    current behavior: disallow as usual
--- a/docs/ref-xhtml-1.1.txt
+++ b/docs/ref-xhtml-1.1.txt
@ -1,21 +1,187 @@

-Getting XHTML 1.1 Working
-
-It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
+XHTML 1.1 and HTML Purifier

+Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
 1. Scratch lang entirely in favor of xml:lang
 2. Scratch name entirely in favor of id (partially-done)
 3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>

-...but that's only an informative section. More things to do:
+HTML Purifier uses the modularization of XHTML
+<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
+of HTMLDefinition into a more manageable and extensible fashion. Rather
+than have one super-object, HTMLDefinition is split into HTMLModules,
+each of which are responsible for defining elements, their attributes,
+and other properties (for a more indepth coverage, see
+/library/HTMLPurifier/HTMLModule.php's docblock comments).

-1. Scratch style attribute (it's deprecated)
-2. Be module-aware (this might entail intelligent grouping in the definition
-   and allowing users to specifically remove certain modules (see 5))
-3. Cross-reference minimal content models with existing DTDs and determine
-   changes (todo)
-4. Watch out for the Legacy Module
-<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_legacymodule>
-5. Let users specify their own custom modules
-6. Study Modularization document
-<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/>
+The modules that W3C defines and we support are:
+
+    * 5.1. Attribute Collections (technically not a module
+    * 5.2. Core Modules
+          o 5.2.2. Text Module
+          o 5.2.3. Hypertext Module
+          o 5.2.4. List Module
+    * 5.4. Text Extension Modules
+          o 5.4.1. Presentation Module
+          o 5.4.2. Edit Module
+          o 5.4.3. Bi-directional Text Module
+    * 5.6. Table Modules
+          o 5.6.2. Tables Module
+    * 5.7. Image Module
+    * 5.18. Style Attribute Module
+
+Modules that we don't support but coul support are:
+
+    * 5.6. Table Modules
+          o 5.6.1. Basic Tables Module [?]
+    * 5.8. Client-side Image Map Module [?]
+    * 5.9. Server-side Image Map Module [?]
+    * 5.12. Target Module [?]
+    * 5.21. Name Identification Module [deprecated]
+    * 5.22. Legacy Module [deprecated]
+
+These modules will not be implemented due to their dangerousness or
+inapplicability as an XHTML fragment:
+
+    * 5.2. Core Modules
+          o 5.2.1. Structure Module
+    * 5.3. Applet Module
+    * 5.5. Forms Modules
+          o 5.5.1. Basic Forms Module
+          o 5.5.2. Forms Module
+    * 5.10. Object Module
+    * 5.11. Frames Module
+    * 5.13. Iframe Module
+    * 5.14. Intrinsic Events Module
+    * 5.15. Metainformation Module
+    * 5.16. Scripting Module
+    * 5.17. Style Sheet Module
+    * 5.19. Link Module
+    * 5.20. Base Module
+
+We will not be using W3C's XML Schemas or DTDs directly due to the lack
+of robust tools for handling them (the main problem is that all the
+current parsers are usually PHP 5 only and solely-validating, not
+correcting).
+
+The abstraction of the HTMLDefinition creation process will also
+contribute to a need for a caching system. Cache invalidation would be
+difficult, but could be done by comparing the HTML and Attr config
+namespaces with a copy that was packaged along with the serialized
+HTMLDefinition object.
+
+== General Use-Case ==
+
+The outwards API of HTMLDefinition has been largely preserved, not
+only for backwards-compatibility but also by design. Instead,
+HTMLDefinition can be retrieved "raw", in which it loads a structure
+that closely resembles the modules of XHTML 1.1. This structure is very
+dynamic, making it easy to make cascading changes to global content
+sets or remove elements in bulk.
+
+However, once HTML Purifier needs the actual definition, it retrieves
+a finalized version of HTMLDefinition. The finalized definition involves
+processing the modules into a form that it is optimized for multiple
+calls. This final version is immutable and, even if editable, would
+be extremely hard to change.
+
+So, some code taking advantage of the XHTML modularization may look
+like this:
+
+<?php
+    $config = HTMLPurifier_Config::createDefault();
+    $def =& $config->getHTMLDefinition(true); // reference to raw
+    unset($def->modules['Hypertext']); // rm ''a'' link
+    $purifier = new HTMLPurifier($config);
+    $purifier->purify($html); // now the definition is finalized
+?>
+
+== Inclusions ==
+
+One of the nice features of HTMLDefinition is that piggy-backing off
+of global attribute and content sets is extremely easy to do.
+
+=== Attributes ===
+
+HTMLModule->elements[$element]->attr stores attribute information for the
+specific attributes of $element. This is quite close to the final
+API that HTML Purifier interfaces with, but there's an important
+extra feature: attr may also contain a array with a member index zero.
+
+<?php
+    HTMLModule->elements[$element]->attr[0] = array('AttrSet');
+?>
+
+Rather than map the attribute key 0 to an array (which should be
+an AttrDef), it defines a number of attribute collections that should
+be merged into this elements attribute array.
+
+Furthermore, the value of an attribute key, attribute value pair need
+not be a fully fledged AttrDef object. They can also be a string, which
+signifies a AttrDef that is looked up from a centralized registry
+AttrTypes. This allows more concise attribute definitions that look
+more like W3C's declarations, as well as offering a centralized point
+for modifying the behavior of one attribute type. And, of course, the
+old method of manually instantiating an AttrDef still works.
+
+=== Attribute Collections ===
+
+Attribute collections are stored and processed in the AttrCollections
+object, which is responsible for performing the inclusions signified
+by the 0 index. These attribute collections, too, are mutable, by
+using HTMLModule->attr_collections. You may add new attributes
+to a collection or define an entirely new collection for your module's
+use. Inclusions can also be cumulative.
+
+Attribute collections allow us to get rid of so called "global attributes"
+(which actually aren't so global).
+
+=== Content Models and ChildDef ===
+
+An implementation of the above-mentioned attributes and attribute
+collections was applied to the ChildDef system. HTML Purifier uses
+a proprietary system called ChildDef for performance and flexibility
+reasons, but this does not line up very well with W3C's notion of
+regexps for defining the allowed children of an element.
+
+HTMLPurifier->elements[$element]->content_model and 
+HTMLPurifier->elements[$element]->content_model_type store information
+about the final ChildDef that will be stored in
+HTMLPurifier->elements[$element]->child (we use a different variable
+because the two forms are sufficiently different).
+
+$content_model is an abstract, string representation of the internal
+state of ChildDef, while $content_model_type is a string identifier
+of which ChildDef subclass to instantiate. $content_model is processed
+by substituting all content set identifiers (capitalized element names)
+with their contents. It is then parsed and passed into the appropriate
+ChildDef class, as defined by the ContentSets->getChildDef() or the
+custom fallback HTMLModule->getChildDef() for custom child definitions
+not in the core.
+
+You'll need to use these facilities if you plan on referencing a content
+set like "Inline" or "Block", and using them is recommended even if you're
+not due to their conciseness.
+
+A few notes on $content_model: it's structure can be as complicated
+as you want, but the pipe symbol (|) is reserved for defining possible
+choices, due to the content sets implementation. For example, a content
+model that looks like:
+
+"Inline -> Block -> a"
+
+...when the Inline content set is defined as "span | b" and the Block
+content set is defined as "div | blockquote", will expand into:
+
+"span | b -> div | blockquote -> a"
+
+The custom HTMLModule->getChildDef() function will need to be able to
+then feed this information to ChildDef in a usable manner.
+
+=== Content Sets ===
+
+Content sets can be altered using HTMLModule->content_sets, an associative
+array of content set names to content set contents. If the content set
+already exists, your values are appended on to it (great for, say,
+registering the font tag as an inline element), otherwise it is
+created. They are substituted into content_model.
--- a/docs/style.css
+++ b/docs/style.css
@ -42,3 +42,27 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;

 /* Contains, without exception, $Id$, for SVN version info. */
 #version {text-align:right; font-style:italic; margin:2em 0;}
+
+#toc ol ol {list-style-type:lower-roman;}
+#toc ol {list-style-type:decimal;}
+#toc {list-style-type:upper-alpha;}
+
+q {
+  behavior: url(fixquotes.htc); /* IE fix */
+  quotes: '\201C' '\201D' '\2018' '\2019';
+}
+q:before {
+  content: open-quote;
+}
+q:after {
+  content: close-quote;
+}
+
+/* Marks off implementation details interesting only to the person writing
+   the class described in the spec. */
+.technical {margin-left:2em; }
+.technical:before {content:"Technical note: "; font-weight:bold; color:#061; }
+
+/* Marks off sections that are lacking. */
+.fixme {margin-left:2em; }
+.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
--- a/library/HTMLPurifier/AttrCollections.php
+++ b/library/HTMLPurifier/AttrCollections.php
@ -0,0 +1,100 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTypes.php';
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+
+/**
+ * Defines common attribute collections that modules reference
+ */
+
+class HTMLPurifier_AttrCollections
+{
+    
+    /**
+     * Associative array of attribute collections, indexed by name
+     * @note Technically, the composition of these is more complicated,
+     *       but we bypass it using our own excludes property
+     */
+    var $info = array();
+    
+    /**
+     * Performs all expansions on internal data for use by other inclusions
+     * It also collects all attribute collection extensions from
+     * modules
+     * @param $attr_types HTMLPurifier_AttrTypes instance
+     * @param $modules Hash array of HTMLPurifier_HTMLModule members
+     */
+    function HTMLPurifier_AttrCollections($attr_types, $modules) {
+        $info =& $this->info;
+        // load extensions from the modules
+        foreach ($modules as $module) {
+            foreach ($module->attr_collections as $coll_i => $coll) {
+                foreach ($coll as $attr_i => $attr) {
+                    if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
+                        // merge in includes
+                        $info[$coll_i][$attr_i] = array_merge(
+                            $info[$coll_i][$attr_i], $attr);
+                        continue;
+                    }
+                    $info[$coll_i][$attr_i] = $attr;
+                }
+            }
+        }
+        // perform internal expansions and inclusions
+        foreach ($info as $name => $attr) {
+            // merge attribute collections that include others
+            $this->performInclusions($info[$name]);
+            // replace string identifiers with actual attribute objects
+            $this->expandIdentifiers($info[$name], $attr_types);
+        }
+    }
+    
+    /**
+     * Takes a reference to an attribute associative array and performs
+     * all inclusions specified by the zero index.
+     * @param &$attr Reference to attribute array
+     */
+    function performInclusions(&$attr) {
+        if (!isset($attr[0])) return;
+        $merge = $attr[0];
+        // loop through all the inclusions
+        for ($i = 0; isset($merge[$i]); $i++) {
+            // foreach attribute of the inclusion, copy it over
+            foreach ($this->info[$merge[$i]] as $key => $value) {
+                if (isset($attr[$key])) continue; // also catches more inclusions
+                $attr[$key] = $value;
+            }
+            if (isset($info[$merge[$i]][0])) {
+                // recursion
+                $merge = array_merge($merge, isset($info[$merge[$i]][0]));
+            }
+        }
+        unset($attr[0]);
+    }
+    
+    /**
+     * Expands all string identifiers in an attribute array by replacing
+     * them with the appropriate values inside HTMLPurifier_AttrTypes
+     * @param &$attr Reference to attribute array
+     * @param $attr_types HTMLPurifier_AttrTypes instance
+     */
+    function expandIdentifiers(&$attr, $attr_types) {
+        foreach ($attr as $def_i => $def) {
+            if ($def_i === 0) continue;
+            if (!is_string($def)) continue;
+            if ($def === false) {
+                unset($attr[$def_i]);
+                continue;
+            }
+            if (isset($attr_types->info[$def])) {
+                $attr[$def_i] = $attr_types->info[$def];
+            } else {
+                trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
+                unset($attr[$def_i]);
+            }
+        }
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/AttrDef/CSS/Background.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Background.php
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/CSSDefinition.php';
 * Validates shorthand CSS property background.
 * @warning Does not support url tokens that have internal spaces.
 */
-class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
 {
    
    /**
@ -16,7 +16,7 @@ class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
     */
    var $info;
    
-    function HTMLPurifier_AttrDef_Background($config) {
+    function HTMLPurifier_AttrDef_CSS_Background($config) {
        $def = $config->getCSSDefinition();
        $this->info['background-color'] = $def->info['background-color'];
        $this->info['background-image'] = $def->info['background-image'];
--- a/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
+++ b/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@ -1,8 +1,8 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/CSSLength.php';
-require_once 'HTMLPurifier/AttrDef/Percentage.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';

 /* W3C says:
    [ // adjective and number must be in correct order, even if
@ -45,15 +45,15 @@ require_once 'HTMLPurifier/AttrDef/Percentage.php';
 /**
 * Validates the value of background-position.
 */
-class HTMLPurifier_AttrDef_BackgroundPosition extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
 {
    
    var $length;
    var $percentage;
    
-    function HTMLPurifier_AttrDef_BackgroundPosition() {
-        $this->length     = new HTMLPurifier_AttrDef_CSSLength();
-        $this->percentage = new HTMLPurifier_AttrDef_Percentage();
+    function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
+        $this->length     = new HTMLPurifier_AttrDef_CSS_Length();
+        $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
    }
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/CSS/Border.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Border.php
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 /**
 * Validates the border property as defined by CSS.
 */
-class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
 {
    
    /**
@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
     */
    var $info = array();
    
-    function HTMLPurifier_AttrDef_Border($config) {
+    function HTMLPurifier_AttrDef_CSS_Border($config) {
        $def = $config->getCSSDefinition();
        $this->info['border-width'] = $def->info['border-width'];
        $this->info['border-style'] = $def->info['border-style'];
--- a/library/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Color.php
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 /**
 * Validates Color as defined by CSS.
 */
-class HTMLPurifier_AttrDef_Color extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
 {
    
    /**
--- a/library/HTMLPurifier/AttrDef/CSS/Composite.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Composite.php
@ -9,7 +9,7 @@
 * especially useful for CSS values, which often are a choice between
 * an enumerated set of predefined values or a flexible data type.
 */
-class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
 {
    
    /**
@ -21,7 +21,7 @@ class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
    /**
     * @param $defs List of HTMLPurifier_AttrDef objects
     */
-    function HTMLPurifier_AttrDef_Composite($defs) {
+    function HTMLPurifier_AttrDef_CSS_Composite($defs) {
        $this->defs = $defs;
    }
    
--- a/library/HTMLPurifier/AttrDef/CSS/Font.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Font.php
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 /**
 * Validates shorthand CSS property font.
 */
-class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
 {
    
    /**
@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
        'status-bar' => true
    );
    
-    function HTMLPurifier_AttrDef_Font($config) {
+    function HTMLPurifier_AttrDef_CSS_Font($config) {
        $def = $config->getCSSDefinition();
        $this->info['font-style']   = $def->info['font-style'];
        $this->info['font-variant'] = $def->info['font-variant'];
--- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 /**
 * Validates a font family list according to CSS spec
 */
-class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
 {
    
    /**
--- a/library/HTMLPurifier/AttrDef/CSS/Length.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Length.php
@ -1,13 +1,12 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/Number.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Number.php';

 /**
 * Represents a Length as defined by CSS.
- * @warning Be sure not to confuse this with HTMLPurifier_AttrDef_Length!
 */
-class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
 {
    
    /**
@ -26,8 +25,8 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
     * @param $non_negative Bool indication whether or not negative values are
     *                      allowed.
     */
-    function HTMLPurifier_AttrDef_CSSLength($non_negative = false) {
-        $this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
+    function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
+        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
    }
    
    function validate($length, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
+++ b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
@ -6,16 +6,16 @@ require_once 'HTMLPurifier/AttrDef.php';
 * Validates shorthand CSS property list-style.
 * @warning Does not support url tokens that have internal spaces.
 */
-class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
 {
    
    /**
     * Local copy of component validators.
-     * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
+     * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
     */
    var $info;
    
-    function HTMLPurifier_AttrDef_ListStyle($config) {
+    function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
        $def = $config->getCSSDefinition();
        $this->info['list-style-type']     = $def->info['list-style-type'];
        $this->info['list-style-position'] = $def->info['list-style-position'];
--- a/library/HTMLPurifier/AttrDef/CSS/Multiple.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Multiple.php
@ -13,7 +13,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 *       can only be used alone: it will never manifest as part of a multi
 *       shorthand declaration.  Thus, this class does not allow inherit.
 */
-class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
 {
    
    /**
@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
     * @param $single HTMLPurifier_AttrDef to multiply
     * @param $max Max number of values allowed (usually four)
     */
-    function HTMLPurifier_AttrDef_Multiple($single, $max = 4) {
+    function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
        $this->single = $single;
        $this->max = $max;
    }
--- a/library/HTMLPurifier/AttrDef/CSS/Number.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Number.php
@ -3,7 +3,7 @@
 /**
 * Validates a number as defined by the CSS spec.
 */
-class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
 {
    
    /**
@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
    /**
     * @param $non_negative Bool indicating whether negatives are forbidden
     */
-    function HTMLPurifier_AttrDef_Number($non_negative = false) {
+    function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
        $this->non_negative = $non_negative;
    }
    
--- a/library/HTMLPurifier/AttrDef/CSS/Percentage.php
+++ b/library/HTMLPurifier/AttrDef/CSS/Percentage.php
@ -1,24 +1,24 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/Number.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Number.php';

 /**
 * Validates a Percentage as defined by the CSS spec.
 */
-class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
 {
    
    /**
-     * Instance of HTMLPurifier_AttrDef_Number to defer number validation
+     * Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
     */
    var $number_def;
    
    /**
     * @param Bool indicating whether to forbid negative values
     */
-    function HTMLPurifier_AttrDef_Percentage($non_negative = false) {
-        $this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
+    function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
+        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
    }
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
+++ b/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 * @note This class could be generalized into a version that acts sort of
 *       like Enum except you can compound the allowed values.
 */
-class HTMLPurifier_AttrDef_TextDecoration extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
 {
    
    /**
--- a/library/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/library/HTMLPurifier/AttrDef/CSS/URI.php
@ -4,17 +4,17 @@ require_once 'HTMLPurifier/AttrDef/URI.php';

 /**
 * Validates a URI in CSS syntax, which uses url('http://example.com')
- * @note While theoretically speaking we a URI in a CSS document could
+ * @note While theoretically speaking a URI in a CSS document could
 *       be non-embedded, as of CSS2 there is no such usage so we're
 *       generalizing it. This may need to be changed in the future.
 * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
 *          the separator, you cannot put a literal semicolon in
 *          in the URI. Try percent encoding it, in that case.
 */
-class HTMLPurifier_AttrDef_CSSURI extends HTMLPurifier_AttrDef_URI
+class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
 {
    
-    function HTMLPurifier_AttrDef_CSSURI() {
+    function HTMLPurifier_AttrDef_CSS_URI() {
        $this->HTMLPurifier_AttrDef_URI(true); // always embedded
    }
    
--- a/library/HTMLPurifier/AttrDef/Enum.php
+++ b/library/HTMLPurifier/AttrDef/Enum.php
@ -25,8 +25,8 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
     * @param $case_sensitive Bool indicating whether or not case sensitive
     */
    function HTMLPurifier_AttrDef_Enum(
-        $valid_values = array(), $case_sensitive = false) {
-        
+        $valid_values = array(), $case_sensitive = false
+    ) {
        $this->valid_values = array_flip($valid_values);
        $this->case_sensitive = $case_sensitive;
    }
--- a/library/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/library/HTMLPurifier/AttrDef/HTML/ID.php
@ -3,6 +3,22 @@
 require_once 'HTMLPurifier/AttrDef.php';
 require_once 'HTMLPurifier/IDAccumulator.php';

+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'EnableID', false, 'bool',
+    'Allows the ID attribute in HTML.  This is disabled by default '.
+    'due to the fact that without proper configuration user input can '.
+    'easily break the validation of a webpage by specifying an ID that is '.
+    'already on the surrounding HTML.  If you don\'t mind throwing caution to '.
+    'the wind, enable this directive, but I strongly recommend you also '.
+    'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
+    'user supplied IDs (%Attr.IDPrefix).  This directive has been available '.
+    'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
+    'versions.'
+);
+HTMLPurifier_ConfigSchema::defineAlias(
+    'HTML', 'EnableAttrID', 'Attr', 'EnableID'
+);
+
 HTMLPurifier_ConfigSchema::define(
    'Attr', 'IDPrefix', '', 'string',
    'String to prefix to IDs.  If you have no idea what IDs your pages '.
@ -36,11 +52,16 @@ HTMLPurifier_ConfigSchema::define(
 *          blacklist. If you're hacking around, make sure you use load()!
 */

-class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
 {
    
+    // ref functionality disabled, since we also have to verify
+    // whether or not the ID it refers to exists
+    
    function validate($id, $config, &$context) {
        
+        if (!$config->get('Attr', 'EnableID')) return false;
+        
        $id = trim($id); // trim it first
        
        if ($id === '') return false;
@ -55,8 +76,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
                '%Attr.IDPrefix is set', E_USER_WARNING);
        }
        
-        $id_accumulator =& $context->get('IDAccumulator');
-        if (isset($id_accumulator->ids[$id])) return false;
+        //if (!$this->ref) {
+            $id_accumulator =& $context->get('IDAccumulator');
+            if (isset($id_accumulator->ids[$id])) return false;
+        //}
        
        // we purposely avoid using regex, hopefully this is faster
        
@ -71,7 +94,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
            $result = ($trim === '');
        }
        
-        if ($result) $id_accumulator->add($id);
+        if (/*!$this->ref && */$result) $id_accumulator->add($id);
        
        // if no change was made to the ID, return the result
        // else, return the new id if stripping whitespace made it
--- a/library/HTMLPurifier/AttrDef/HTML/Length.php
+++ b/library/HTMLPurifier/AttrDef/HTML/Length.php
@ -1,18 +1,16 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/Pixels.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';

 /**
 * Validates the HTML type length (not to be confused with CSS's length).
 * 
 * This accepts integer pixels or percentages as lengths for certain
- * HTML attributes. Don't use this for CSS: that's
- * HTMLPurifier_AttrDef_CSSLength which requires prefixes and allows a lot
- * more different types.
+ * HTML attributes.
 */

-class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
+class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
 {
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
+++ b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
@ -1,7 +1,7 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/Length.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Length.php';

 /**
 * Validates a MultiLength as defined by the HTML spec.
@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/Length.php';
 * A multilength is either a integer (pixel count), a percentage, or
 * a relative number.
 */
-class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
+class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
 {
    
    function validate($string, $config, &$context) {
@ -27,12 +27,14 @@ class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
        
        $int = substr($string, 0, $length - 1);
        
+        if ($int == '') return '*';
        if (!is_numeric($int)) return false;
        
        $int = (int) $int;
        
-        if ($int < 0) return '0*';
-        
+        if ($int < 0) return false;
+        if ($int == 0) return '0';
+        if ($int == 1) return '*';
        return ((string) $int) . '*';
        
    }
--- a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
+++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@ -4,9 +4,13 @@ require_once 'HTMLPurifier/AttrDef.php';
 require_once 'HTMLPurifier/Config.php';

 /**
- * Validates the contents of the global HTML attribute class.
+ * Validates contents based on NMTOKENS attribute type.
+ * @note The only current use for this is the class attribute in HTML
+ * @note Could have some functionality factored out into Nmtoken class
+ * @warning We cannot assume this class will be used only for 'class'
+ *          attributes. Not sure how to hook in magic behavior, then.
 */
-class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
 {
    
    function validate($string, $config, &$context) {
@ -31,10 +35,10 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
        
        if (empty($matches[1])) return false;
        
-        // reconstruct class string
+        // reconstruct string
        $new_string = '';
-        foreach ($matches[1] as $class_names) {
-            $new_string .= $class_names . ' ';
+        foreach ($matches[1] as $token) {
+            $new_string .= $token . ' ';
        }
        $new_string = rtrim($new_string);
        
--- a/library/HTMLPurifier/AttrDef/HTML/Pixels.php
+++ b/library/HTMLPurifier/AttrDef/HTML/Pixels.php
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 /**
 * Validates an integer representation of pixels according to the HTML spec.
 */
-class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
 {
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/Lang.php
+++ b/library/HTMLPurifier/AttrDef/Lang.php
@ -46,7 +46,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
        
        // process second subtag : $subtags[1]
        $length = strlen($subtags[1]);
-        if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
+        if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
            return $new_string;
        }
        if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@ -3,7 +3,7 @@
 require_once 'HTMLPurifier/AttrDef.php';
 require_once 'HTMLPurifier/URIScheme.php';
 require_once 'HTMLPurifier/URISchemeRegistry.php';
-require_once 'HTMLPurifier/AttrDef/Host.php';
+require_once 'HTMLPurifier/AttrDef/URI/Host.php';
 require_once 'HTMLPurifier/PercentEncoder.php';

 HTMLPurifier_ConfigSchema::define(
@ -77,6 +77,14 @@ HTMLPurifier_ConfigSchema::define(
    'This directive has been available since 1.3.0.'
 );

+HTMLPurifier_ConfigSchema::define(
+    'URI', 'Disable', false, 'bool',
+    'Disables all URIs in all forms. Not sure why you\'d want to do that '.
+    '(after all, the Internet\'s founded on the notion of a hyperlink). '.
+    'This directive has been available since 1.3.0.'
+);
+HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
+
 /**
 * Validates a URI as defined by RFC 3986.
 * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@ -92,7 +100,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
     * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
     */
    function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
-        $this->host = new HTMLPurifier_AttrDef_Host();
+        $this->host = new HTMLPurifier_AttrDef_URI_Host();
        $this->PercentEncoder = new HTMLPurifier_PercentEncoder();
        $this->embeds_resource = (bool) $embeds_resource;
    }
@ -102,6 +110,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
        // We'll write stack-based parsers later, for now, use regexps to
        // get things working as fast as possible (irony)
        
+        if ($config->get('URI', 'Disable')) return false;
+        
        // parse as CDATA
        $uri = $this->parseCDATA($uri);
        
--- a/library/HTMLPurifier/AttrDef/URI/Email.php
+++ b/library/HTMLPurifier/AttrDef/URI/Email.php
@ -2,7 +2,7 @@

 require_once 'HTMLPurifier/AttrDef.php';

-class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
 {
    
    /**
--- a/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
+++ b/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
@ -1,12 +1,12 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/Email.php';
+require_once 'HTMLPurifier/AttrDef/URI/Email.php';

 /**
 * Primitive email validation class based on the regexp found at 
 * http://www.regular-expressions.info/email.html
 */
-class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
+class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
 {
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/URI/Host.php
+++ b/library/HTMLPurifier/AttrDef/URI/Host.php
@ -1,28 +1,28 @@
 <?php

 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/IPv4.php';
-require_once 'HTMLPurifier/AttrDef/IPv6.php';
+require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
+require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';

 /**
 * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
 */
-class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
 {
    
    /**
-     * Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
+     * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
     */
    var $ipv4;
    
    /**
-     * Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
+     * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
     */
    var $ipv6;
    
-    function HTMLPurifier_AttrDef_Host() {
-        $this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
-        $this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
+    function HTMLPurifier_AttrDef_URI_Host() {
+        $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
+        $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
    }
    
    function validate($string, $config, &$context) {
--- a/library/HTMLPurifier/AttrDef/URI/IPv4.php
+++ b/library/HTMLPurifier/AttrDef/URI/IPv4.php
@ -6,7 +6,7 @@ require_once 'HTMLPurifier/AttrDef.php';
 * Validates an IPv4 address
 * @author Feyd @ forums.devnetwork.net (public domain)
 */
-class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
+class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
 {
    
    /**
@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
     */
    var $ip4;
    
-    function HTMLPurifier_AttrDef_IPv4() {
+    function HTMLPurifier_AttrDef_URI_IPv4() {
        $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
        $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
    }
--- a/library/HTMLPurifier/AttrDef/URI/IPv6.php
+++ b/library/HTMLPurifier/AttrDef/URI/IPv6.php
@ -1,6 +1,6 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/IPv4.php';
+require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';

 /**
 * Validates an IPv6 address.
@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
 * @note This function requires brackets to have been removed from address
 *       in URI.
 */
-class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
+class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
 {
    
    function validate($aIP, $config, &$context) {
--- a/library/HTMLPurifier/AttrTypes.php
+++ b/library/HTMLPurifier/AttrTypes.php
@ -0,0 +1,41 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
+require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
+require_once 'HTMLPurifier/AttrDef/Integer.php';
+require_once 'HTMLPurifier/AttrDef/Text.php';
+require_once 'HTMLPurifier/AttrDef/URI.php';
+
+/**
+ * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
+ */
+class HTMLPurifier_AttrTypes
+{
+    /**
+     * Lookup array of attribute string identifiers to concrete implementations
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Constructs the info array
+     */
+    function HTMLPurifier_AttrTypes() {
+        $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
+        $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
+        $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
+        $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
+        $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
+        $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
+        $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
+        $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
+        
+        // number is really a positive integer (one or more digits)
+        $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
+    }
+}
+
+?>
--- a/library/HTMLPurifier/CSSDefinition.php
+++ b/library/HTMLPurifier/CSSDefinition.php
@ -1,19 +1,19 @@
 <?php

+require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
+require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
+require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
+require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
+require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
+require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
 require_once 'HTMLPurifier/AttrDef/Enum.php';
-require_once 'HTMLPurifier/AttrDef/Color.php';
-require_once 'HTMLPurifier/AttrDef/Composite.php';
-require_once 'HTMLPurifier/AttrDef/CSSLength.php';
-require_once 'HTMLPurifier/AttrDef/Percentage.php';
-require_once 'HTMLPurifier/AttrDef/Multiple.php';
-require_once 'HTMLPurifier/AttrDef/TextDecoration.php';
-require_once 'HTMLPurifier/AttrDef/FontFamily.php';
-require_once 'HTMLPurifier/AttrDef/Font.php';
-require_once 'HTMLPurifier/AttrDef/Border.php';
-require_once 'HTMLPurifier/AttrDef/ListStyle.php';
-require_once 'HTMLPurifier/AttrDef/CSSURI.php';
-require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php';
-require_once 'HTMLPurifier/AttrDef/Background.php';

 /**
 * Defines allowed CSS attributes and what their values are.
@ -43,7 +43,7 @@ class HTMLPurifier_CSSDefinition
            array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
            'groove', 'ridge', 'inset', 'outset'), false);
        
-        $this->info['border-style'] = new HTMLPurifier_AttrDef_Multiple($border_style);
+        $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
        
        $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
            array('none', 'left', 'right', 'both'), false);
@ -54,10 +54,10 @@ class HTMLPurifier_CSSDefinition
        $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
            array('normal', 'small-caps'), false);
        
-        $uri_or_none = new HTMLPurifier_AttrDef_Composite(
+        $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
            array(
                new HTMLPurifier_AttrDef_Enum(array('none')),
-                new HTMLPurifier_AttrDef_CSSURI()
+                new HTMLPurifier_AttrDef_CSS_URI()
            )
        );
        
@ -68,11 +68,11 @@ class HTMLPurifier_CSSDefinition
            'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
        $this->info['list-style-image'] = $uri_or_none;
        
-        $this->info['list-style'] = new HTMLPurifier_AttrDef_ListStyle($config);
+        $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
        
        $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
            array('capitalize', 'uppercase', 'lowercase', 'none'), false);
-        $this->info['color'] = new HTMLPurifier_AttrDef_Color();
+        $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
        
        $this->info['background-image'] = $uri_or_none;
        $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
@ -81,96 +81,96 @@ class HTMLPurifier_CSSDefinition
        $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
            array('scroll', 'fixed')
        );
-        $this->info['background-position'] = new HTMLPurifier_AttrDef_BackgroundPosition();
+        $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
        
        $border_color = 
        $this->info['border-top-color'] = 
        $this->info['border-bottom-color'] = 
        $this->info['border-left-color'] = 
        $this->info['border-right-color'] = 
-        $this->info['background-color'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('transparent')),
-            new HTMLPurifier_AttrDef_Color()
+            new HTMLPurifier_AttrDef_CSS_Color()
        ));
        
-        $this->info['background'] = new HTMLPurifier_AttrDef_Background($config);
+        $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
        
-        $this->info['border-color'] = new HTMLPurifier_AttrDef_Multiple($border_color);
+        $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
        
        $border_width = 
        $this->info['border-top-width'] = 
        $this->info['border-bottom-width'] = 
        $this->info['border-left-width'] = 
-        $this->info['border-right-width'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
-            new HTMLPurifier_AttrDef_CSSLength(true) //disallow negative
+            new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
        ));
        
-        $this->info['border-width'] = new HTMLPurifier_AttrDef_Multiple($border_width);
+        $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
        
-        $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('normal')),
-            new HTMLPurifier_AttrDef_CSSLength()
+            new HTMLPurifier_AttrDef_CSS_Length()
        ));
        
-        $this->info['word-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('normal')),
-            new HTMLPurifier_AttrDef_CSSLength()
+            new HTMLPurifier_AttrDef_CSS_Length()
        ));
        
-        $this->info['font-size'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
                'small', 'medium', 'large', 'x-large', 'xx-large',
                'larger', 'smaller')),
-            new HTMLPurifier_AttrDef_Percentage(),
-            new HTMLPurifier_AttrDef_CSSLength()
+            new HTMLPurifier_AttrDef_CSS_Percentage(),
+            new HTMLPurifier_AttrDef_CSS_Length()
        ));
        
-        $this->info['line-height'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('normal')),
-            new HTMLPurifier_AttrDef_Number(true), // no negatives
-            new HTMLPurifier_AttrDef_CSSLength(true),
-            new HTMLPurifier_AttrDef_Percentage(true)
+            new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true)
        ));
        
        $margin =
        $this->info['margin-top'] = 
        $this->info['margin-bottom'] = 
        $this->info['margin-left'] = 
-        $this->info['margin-right'] = new HTMLPurifier_AttrDef_Composite(array(
-            new HTMLPurifier_AttrDef_CSSLength(),
-            new HTMLPurifier_AttrDef_Percentage(),
+        $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage(),
            new HTMLPurifier_AttrDef_Enum(array('auto'))
        ));
        
-        $this->info['margin'] = new HTMLPurifier_AttrDef_Multiple($margin);
+        $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
        
        // non-negative
        $padding =
        $this->info['padding-top'] = 
        $this->info['padding-bottom'] = 
        $this->info['padding-left'] = 
-        $this->info['padding-right'] = new HTMLPurifier_AttrDef_Composite(array(
-            new HTMLPurifier_AttrDef_CSSLength(true),
-            new HTMLPurifier_AttrDef_Percentage(true)
+        $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true)
        ));
        
-        $this->info['padding'] = new HTMLPurifier_AttrDef_Multiple($padding);
+        $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
        
-        $this->info['text-indent'] = new HTMLPurifier_AttrDef_Composite(array(
-            new HTMLPurifier_AttrDef_CSSLength(),
-            new HTMLPurifier_AttrDef_Percentage()
+        $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage()
        ));
        
-        $this->info['width'] = new HTMLPurifier_AttrDef_Composite(array(
-            new HTMLPurifier_AttrDef_CSSLength(true),
-            new HTMLPurifier_AttrDef_Percentage(true),
+        $this->info['width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true),
            new HTMLPurifier_AttrDef_Enum(array('auto'))
        ));
        
-        $this->info['text-decoration'] = new HTMLPurifier_AttrDef_TextDecoration();
+        $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
        
-        $this->info['font-family'] = new HTMLPurifier_AttrDef_FontFamily();
+        $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
        
        // this could use specialized code
        $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
@ -179,14 +179,14 @@ class HTMLPurifier_CSSDefinition
        
        // MUST be called after other font properties, as it references
        // a CSSDefinition object
-        $this->info['font'] = new HTMLPurifier_AttrDef_Font($config);
+        $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
        
        // same here
        $this->info['border'] =
        $this->info['border-bottom'] = 
        $this->info['border-top'] = 
        $this->info['border-left'] = 
-        $this->info['border-right'] = new HTMLPurifier_AttrDef_Border($config);
+        $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
        
        $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
            'collapse', 'seperate'));
@ -197,11 +197,11 @@ class HTMLPurifier_CSSDefinition
        $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
            'auto', 'fixed'));
        
-        $this->info['vertical-align'] = new HTMLPurifier_AttrDef_Composite(array(
+        $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
            new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
                'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
-            new HTMLPurifier_AttrDef_CSSLength(),
-            new HTMLPurifier_AttrDef_Percentage()
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage()
        ));
        
    }
--- a/library/HTMLPurifier/ChildDef/Chameleon.php
+++ b/library/HTMLPurifier/ChildDef/Chameleon.php
@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
    }
    
    function validateChildren($tokens_of_children, $config, &$context) {
-        $parent_type = $context->get('ParentType');
-        switch ($parent_type) {
-            case 'unknown':
-            case 'inline':
-                $result = $this->inline->validateChildren(
-                    $tokens_of_children, $config, $context);
-                break;
-            case 'block':
-                $result = $this->block->validateChildren(
-                    $tokens_of_children, $config, $context);
-                break;
-            default:
-                trigger_error('Invalid context', E_USER_ERROR);
-                return false;
+        if ($context->get('IsInline') === false) {
+            return $this->block->validateChildren(
+                $tokens_of_children, $config, $context);
+        } else {
+            return $this->inline->validateChildren(
+                $tokens_of_children, $config, $context);
        }
-        return $result;
    }
 }

--- a/library/HTMLPurifier/ChildDef/Required.php
+++ b/library/HTMLPurifier/ChildDef/Required.php
@ -20,10 +20,13 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
            $elements = str_replace(' ', '', $elements);
            $elements = explode('|', $elements);
        }
-        $elements = array_flip($elements);
-        foreach ($elements as $i => $x) {
-            $elements[$i] = true;
-            if (empty($i)) unset($elements[$i]);
+        $keys = array_keys($elements);
+        if ($keys == array_keys($keys)) {
+            $elements = array_flip($elements);
+            foreach ($elements as $i => $x) {
+                $elements[$i] = true;
+                if (empty($i)) unset($elements[$i]);
+            }
        }
        $this->elements = $elements;
        $this->gen = new HTMLPurifier_Generator();
--- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php
+++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php
@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php';

 /**
 * Takes the contents of blockquote when in strict and reformats for validation.
- * 
- * From XHTML 1.0 Transitional to Strict, there is a notable change where 
 */
 class   HTMLPurifier_ChildDef_StrictBlockquote
 extends HTMLPurifier_ChildDef_Required
 {
+    var $real_elements;
+    var $fake_elements;
    var $allow_empty = true;
    var $type = 'strictblockquote';
    var $init = false;
-    function HTMLPurifier_ChildDef_StrictBlockquote() {}
    function validateChildren($tokens_of_children, $config, &$context) {
        
        $def = $config->getHTMLDefinition();
        if (!$this->init) {
            // allow all inline elements
-            $this->elements = $def->info_flow_elements;
-            $this->elements['#PCDATA'] = true;
+            $this->real_elements = $this->elements;
+            $this->fake_elements = $def->info_content_sets['Flow'];
+            $this->fake_elements['#PCDATA'] = true;
            $this->init = true;
        }
        
+        // trick the parent class into thinking it allows more
+        $this->elements = $this->fake_elements;
        $result = parent::validateChildren($tokens_of_children, $config, $context);
+        $this->elements = $this->real_elements;
+        
        if ($result === false) return array();
        if ($result === true) $result = $tokens_of_children;
        
@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required
            // ifs are nested for readability
            if (!$is_inline) {
                if (!$depth) {
-                     if (($token->type == 'text') ||
-                         ($def->info[$token->name]->type == 'inline')) {
+                     if (
+                        $token->type == 'text' ||
+                        !isset($this->elements[$token->name])
+                     ) {
                        $is_inline = true;
                        $ret[] = $block_wrap_start;
                     }
@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required
                if (!$depth) {
                    // starting tokens have been inline text / empty
                    if ($token->type == 'start' || $token->type == 'empty') {
-                        if ($def->info[$token->name]->type == 'block') {
+                        if (isset($this->elements[$token->name])) {
                            // ended
                            $ret[] = $block_wrap_end;
                            $is_inline = false;
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@ -149,23 +149,36 @@ class HTMLPurifier_Config
            return;
        }
        $this->conf[$namespace][$key] = $value;
+        if ($namespace == 'HTML' || $namespace == 'Attr') {
+            // reset HTML definition if relevant attributes changed
+            $this->html_definition = null;
+        }
+        if ($namespace == 'CSS') {
+            $this->css_definition = null;
+        }
    }
    
    /**
-     * Retrieves a copy of the HTML definition.
+     * Retrieves reference to the HTML definition.
+     * @param $raw Return a copy that has not been setup yet. Must be
+     *             called before it's been setup, otherwise won't work.
     */
-    function getHTMLDefinition() {
-        if ($this->html_definition === null) {
-            $this->html_definition = new HTMLPurifier_HTMLDefinition();
-            $this->html_definition->setup($this);
+    function &getHTMLDefinition($raw = false) {
+        if (
+            empty($this->html_definition) || // hasn't ever been setup
+            ($raw && $this->html_definition->setup) // requesting new one
+        ) {
+            $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
+            if ($raw) return $this->html_definition; // no setup!
        }
+        if (!$this->html_definition->setup) $this->html_definition->setup();
        return $this->html_definition;
    }
    
    /**
-     * Retrieves a copy of the CSS definition
+     * Retrieves reference to the CSS definition
     */
-    function getCSSDefinition() {
+    function &getCSSDefinition() {
        if ($this->css_definition === null) {
            $this->css_definition = new HTMLPurifier_CSSDefinition();
            $this->css_definition->setup($this);
--- a/library/HTMLPurifier/ConfigDef.php
+++ b/library/HTMLPurifier/ConfigDef.php
@ -0,0 +1,10 @@
+<?php
+
+/**
+ * Base class for configuration entity
+ */
+class HTMLPurifier_ConfigDef {
+    var $class = false;
+}
+
+?>
--- a/library/HTMLPurifier/ConfigDef/Directive.php
+++ b/library/HTMLPurifier/ConfigDef/Directive.php
@ -0,0 +1,74 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object containing definition of a directive.
+ * @note This structure does not contain default values
+ */
+class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
+{
+    
+    var $class = 'directive';
+    
+    function HTMLPurifier_ConfigDef_Directive(
+        $type = null,
+        $descriptions = null,
+        $allow_null = null,
+        $allowed = null,
+        $aliases = null
+    ) {
+        if (        $type !== null)         $this->type = $type;
+        if ($descriptions !== null) $this->descriptions = $descriptions;
+        if (  $allow_null !== null)   $this->allow_null = $allow_null;
+        if (     $allowed !== null)      $this->allowed = $allowed;
+        if (     $aliases !== null)      $this->aliases = $aliases;
+    }
+    
+    /**
+     * Allowed type of the directive. Values are:
+     *      - string
+     *      - istring (case insensitive string)
+     *      - int
+     *      - float
+     *      - bool
+     *      - lookup (array of value => true)
+     *      - list (regular numbered index array)
+     *      - hash (array of key => value)
+     *      - mixed (anything goes)
+     */
+    var $type = 'mixed';
+    
+    /**
+     * Plaintext descriptions of the configuration entity is. Organized by
+     * file and line number, so multiple descriptions are allowed.
+     */
+    var $descriptions = array();
+    
+    /**
+     * Is null allowed? Has no effect for mixed type.
+     * @bool
+     */
+    var $allow_null = false;
+    
+    /**
+     * Lookup table of allowed values of the element, bool true if all allowed.
+     */
+    var $allowed = true;
+    
+    /**
+     * Hash of value aliases, i.e. values that are equivalent.
+     */
+    var $aliases = array();
+    
+    /**
+     * Adds a description to the array
+     */
+    function addDescription($file, $line, $description) {
+        if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
+        $this->descriptions[$file][$line] = $description;
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/ConfigDef/DirectiveAlias.php
+++ b/library/HTMLPurifier/ConfigDef/DirectiveAlias.php
@ -0,0 +1,27 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object describing a directive alias
+ */
+class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
+{
+    var $class = 'alias';
+    
+    /**
+     * Namespace being aliased to
+     */
+    var $namespace;
+    /**
+     * Directive being aliased to
+     */
+    var $name;
+    
+    function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
+        $this->namespace = $namespace;
+        $this->name = $name;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ConfigDef/Namespace.php
+++ b/library/HTMLPurifier/ConfigDef/Namespace.php
@ -0,0 +1,23 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object describing of a namespace
+ */
+class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
+    
+    function HTMLPurifier_ConfigDef_Namespace($description = null) {
+        $this->description = $description;
+    }
+    
+    var $class = 'namespace';
+    
+    /**
+     * String description of what kinds of directives go in this namespace.
+     */
+    var $description;
+    
+}
+
+?>
--- a/library/HTMLPurifier/ConfigSchema.php
+++ b/library/HTMLPurifier/ConfigSchema.php
@ -1,6 +1,10 @@
 <?php

 require_once 'HTMLPurifier/Error.php';
+require_once 'HTMLPurifier/ConfigDef.php';
+require_once 'HTMLPurifier/ConfigDef/Namespace.php';
+require_once 'HTMLPurifier/ConfigDef/Directive.php';
+require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';

 /**
 * Configuration definition, defines directives and their defaults.
@ -138,7 +142,7 @@ class HTMLPurifier_ConfigSchema {
                return;
            }
            $def->info[$namespace][$name] =
-                new HTMLPurifier_ConfigEntity_Directive();
+                new HTMLPurifier_ConfigDef_Directive();
            $def->info[$namespace][$name]->type = $type;
            $def->info[$namespace][$name]->allow_null = $allow_null;
            $def->defaults[$namespace][$name]   = $default;
@ -172,7 +176,7 @@ class HTMLPurifier_ConfigSchema {
            return;
        }
        $def->info[$namespace] = array();
-        $def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
+        $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
        $def->info_namespace[$namespace]->description = $description;
        $def->defaults[$namespace] = array();
    }
@ -284,7 +288,7 @@ class HTMLPurifier_ConfigSchema {
            return;
        }
        $def->info[$namespace][$name] =
-            new HTMLPurifier_ConfigEntity_DirectiveAlias(
+            new HTMLPurifier_ConfigDef_DirectiveAlias(
                $new_namespace, $new_name);
    }
    
@ -379,120 +383,4 @@ class HTMLPurifier_ConfigSchema {
    }
 }

-/**
- * Base class for configuration entity
- */
-class HTMLPurifier_ConfigEntity {
-    var $class = false;
-}
-
-/**
- * Structure object describing of a namespace
- */
-class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
-    
-    function HTMLPurifier_ConfigEntity_Namespace($description = null) {
-        $this->description = $description;
-    }
-    
-    var $class = 'namespace';
-    
-    /**
-     * String description of what kinds of directives go in this namespace.
-     */
-    var $description;
-    
-}
-
-/**
- * Structure object containing definition of a directive.
- * @note This structure does not contain default values
- */
-class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
-{
-    
-    var $class = 'directive';
-    
-    function HTMLPurifier_ConfigEntity_Directive(
-        $type = null,
-        $descriptions = null,
-        $allow_null = null,
-        $allowed = null,
-        $aliases = null
-    ) {
-        if (        $type !== null)         $this->type = $type;
-        if ($descriptions !== null) $this->descriptions = $descriptions;
-        if (  $allow_null !== null)   $this->allow_null = $allow_null;
-        if (     $allowed !== null)      $this->allowed = $allowed;
-        if (     $aliases !== null)      $this->aliases = $aliases;
-    }
-    
-    /**
-     * Allowed type of the directive. Values are:
-     *      - string
-     *      - istring (case insensitive string)
-     *      - int
-     *      - float
-     *      - bool
-     *      - lookup (array of value => true)
-     *      - list (regular numbered index array)
-     *      - hash (array of key => value)
-     *      - mixed (anything goes)
-     */
-    var $type = 'mixed';
-    
-    /**
-     * Plaintext descriptions of the configuration entity is. Organized by
-     * file and line number, so multiple descriptions are allowed.
-     */
-    var $descriptions = array();
-    
-    /**
-     * Is null allowed? Has no effect for mixed type.
-     * @bool
-     */
-    var $allow_null = false;
-    
-    /**
-     * Lookup table of allowed values of the element, bool true if all allowed.
-     */
-    var $allowed = true;
-    
-    /**
-     * Hash of value aliases, i.e. values that are equivalent.
-     */
-    var $aliases = array();
-    
-    /**
-     * Adds a description to the array
-     */
-    function addDescription($file, $line, $description) {
-        if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
-        $this->descriptions[$file][$line] = $description;
-    }
-    
-}
-
-/**
- * Structure object describing a directive alias
- */
-class HTMLPurifier_ConfigEntity_DirectiveAlias extends HTMLPurifier_ConfigEntity
-{
-    var $class = 'alias';
-    
-    /**
-     * Namespace being aliased to
-     */
-    var $namespace;
-    /**
-     * Directive being aliased to
-     */
-    var $name;
-    
-    function HTMLPurifier_ConfigEntity_DirectiveAlias($namespace, $name) {
-        $this->namespace = $namespace;
-        $this->name = $name;
-    }
-}
-
 ?>
--- a/library/HTMLPurifier/ContentSets.php
+++ b/library/HTMLPurifier/ContentSets.php
@ -0,0 +1,148 @@
+<?php
+
+// common defs that we'll support by default
+require_once 'HTMLPurifier/ChildDef.php';
+require_once 'HTMLPurifier/ChildDef/Empty.php';
+require_once 'HTMLPurifier/ChildDef/Required.php';
+require_once 'HTMLPurifier/ChildDef/Optional.php';
+
+class HTMLPurifier_ContentSets
+{
+    
+    /**
+     * List of content set strings (pipe seperators) indexed by name.
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * List of content set lookups (element => true) indexed by name.
+     * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
+     * @public
+     */
+    var $lookup = array();
+    
+    /**
+     * Synchronized list of defined content sets (keys of info)
+     */
+    var $keys = array();
+    /**
+     * Synchronized list of defined content values (values of info)
+     */
+    var $values = array();
+    
+    /**
+     * Merges in module's content sets, expands identifiers in the content
+     * sets and populates the keys, values and lookup member variables.
+     * @param $modules List of HTMLPurifier_HTMLModule
+     */
+    function HTMLPurifier_ContentSets($modules) {
+        if (!is_array($modules)) $modules = array($modules);
+        // populate content_sets based on module hints
+        // sorry, no way of overloading
+        foreach ($modules as $module_i => $module) {
+            foreach ($module->content_sets as $key => $value) {
+                if (isset($this->info[$key])) {
+                    // add it into the existing content set
+                    $this->info[$key] = $this->info[$key] . ' | ' . $value;
+                } else {
+                    $this->info[$key] = $value;
+                }
+            }
+        }
+        // perform content_set expansions
+        $this->keys = array_keys($this->info);
+        foreach ($this->info as $i => $set) {
+            // only performed once, so infinite recursion is not
+            // a problem
+            $this->info[$i] =
+                str_replace(
+                    $this->keys,
+                    // must be recalculated each time due to
+                    // changing substitutions
+                    array_values($this->info),
+                $set);
+        }
+        $this->values = array_values($this->info);
+        
+        // generate lookup tables
+        foreach ($this->info as $name => $set) {
+            $this->lookup[$name] = $this->convertToLookup($set);
+        }
+    }
+    
+    /**
+     * Accepts a definition; generates and assigns a ChildDef for it
+     * @param $def HTMLPurifier_ElementDef reference
+     * @param $module Module that defined the ElementDef
+     */
+    function generateChildDef(&$def, $module) {
+        if (!empty($def->child)) return; // already done!
+        $content_model = $def->content_model;
+        if (is_string($content_model)) {
+            $def->content_model = str_replace(
+                $this->keys, $this->values, $content_model);
+        }
+        $def->child = $this->getChildDef($def, $module);
+    }
+    
+    /**
+     * Instantiates a ChildDef based on content_model and content_model_type
+     * member variables in HTMLPurifier_ElementDef
+     * @note This will also defer to modules for custom HTMLPurifier_ChildDef
+     *       subclasses that need content set expansion
+     * @param $def HTMLPurifier_ElementDef to have ChildDef extracted
+     * @return HTMLPurifier_ChildDef corresponding to ElementDef
+     */
+    function getChildDef($def, $module) {
+        $value = $def->content_model;
+        if (is_object($value)) {
+            trigger_error(
+                'Literal object child definitions should be stored in '.
+                'ElementDef->child not ElementDef->content_model',
+                E_USER_NOTICE
+            );
+            return $value;
+        }
+        switch ($def->content_model_type) {
+            case 'required':
+                return new HTMLPurifier_ChildDef_Required($value);
+            case 'optional':
+                return new HTMLPurifier_ChildDef_Optional($value);
+            case 'empty':
+                return new HTMLPurifier_ChildDef_Empty();
+            case 'custom':
+                return new HTMLPurifier_ChildDef_Custom($value);
+        }
+        // defer to its module
+        $return = false;
+        if ($module->defines_child_def) { // save a func call
+            $return = $module->getChildDef($def);
+        }
+        if ($return !== false) return $return;
+        // error-out
+        trigger_error(
+            'Could not determine which ChildDef class to instantiate',
+            E_USER_ERROR
+        );
+        return false;
+    }
+    
+    /**
+     * Converts a string list of elements separated by pipes into
+     * a lookup array.
+     * @param $string List of elements
+     * @return Lookup array of elements
+     */
+    function convertToLookup($string) {
+        $array = explode('|', str_replace(' ', '', $string));
+        $ret = array();
+        foreach ($array as $i => $k) {
+            $ret[$k] = true;
+        }
+        return $ret;
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/ElementDef.php
+++ b/library/HTMLPurifier/ElementDef.php
@ -0,0 +1,122 @@
+<?php
+
+/**
+ * Structure that stores an HTML element definition. Used by
+ * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
+ */
+class HTMLPurifier_ElementDef
+{
+    
+    /**
+     * Does the definition work by itself, or is it created solely
+     * for the purpose of merging into another definition?
+     */
+    var $standalone = true;
+    
+    /**
+     * Associative array of attribute name to HTMLPurifier_AttrDef
+     * @note Before being processed by HTMLPurifier_AttrCollections
+     *       when modules are finalized during
+     *       HTMLPurifier_HTMLDefinition->setup(), this array may also
+     *       contain an array at index 0 that indicates which attribute
+     *       collections to load into the full array. It may also
+     *       contain string indentifiers in lieu of HTMLPurifier_AttrDef,
+     *       see HTMLPurifier_AttrTypes on how they are expanded during
+     *       HTMLPurifier_HTMLDefinition->setup() processing.
+     * @public
+     */
+    var $attr = array();
+    
+    /**
+     * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
+     * @public
+     */
+    var $attr_transform_pre = array();
+    
+    /**
+     * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
+     * @public
+     */
+    var $attr_transform_post = array();
+    
+    
+    
+    /**
+     * HTMLPurifier_ChildDef of this tag.
+     * @public
+     */
+    var $child;
+    
+    /**
+     * Abstract string representation of internal ChildDef rules. See
+     * HTMLPurifier_ContentSets for how this is parsed and then transformed
+     * into an HTMLPurifier_ChildDef.
+     * @public
+     */
+    var $content_model;
+    
+    /**
+     * Value of $child->type, used to determine which ChildDef to use,
+     * used in combination with $content_model.
+     * @public
+     */
+    var $content_model_type;
+    
+    
+    
+    /**
+     * Lookup table of tags that close this tag. Used during parsing
+     * to make sure we don't attempt to nest unclosed tags.
+     * @public
+     */
+    var $auto_close = array();
+    
+    /**
+     * Does the element have a content model (#PCDATA | Inline)*? This
+     * is important for chameleon ins and del processing in 
+     * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
+     * have to worry about this one.
+     * @public
+     */
+    var $descendants_are_inline;
+    
+    /**
+     * Lookup table of tags excluded from all descendants of this tag.
+     * @public
+     */
+    var $excludes = array();
+    
+    /**
+     * Merges the values of another element definition into this one.
+     * Values from the new element def take precedence if a value is
+     * not mergeable.
+     */
+    function mergeIn($def) {
+        
+        // later keys takes precedence
+        foreach($def->attr as $k => $v) {
+            if ($k == 0) {
+                // merge in the includes
+                // sorry, no way to override an include
+                foreach ($v as $v2) {
+                    $def->attr[0][] = $v2;
+                }
+                continue;
+            }
+            $this->attr[$k] = $v;
+        }
+        foreach($def->attr_transform_pre    as $k => $v) $this->attr_transform_pre[$k]  = $v;
+        foreach($def->attr_transform_post   as $k => $v) $this->attr_transform_post[$k] = $v;
+        foreach($def->auto_close            as $k => $v) $this->auto_close[$k]          = $v;
+        foreach($def->excludes              as $k => $v) $this->excludes[$k]            = $v;
+        
+        if(!is_null($def->child)) $this->child = $def->child;
+        if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
+        if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
+        if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
+        
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/Filter/YouTube.php
+++ b/library/HTMLPurifier/Filter/YouTube.php
@ -9,7 +9,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
    
    function preFilter($html, $config, &$context) {
        $pre_regex = '#<object[^>]+>.+?'.
-            'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#';
+            'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
        $pre_replace = '<span class="youtube-embed">\1</span>';
        return preg_replace($pre_regex, $pre_replace, $html);
    }
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@ -1,656 +1,250 @@
-<?php
-
-require_once 'HTMLPurifier/AttrDef.php';
-    require_once 'HTMLPurifier/AttrDef/Enum.php';
-    require_once 'HTMLPurifier/AttrDef/ID.php';
-    require_once 'HTMLPurifier/AttrDef/Class.php';
-    require_once 'HTMLPurifier/AttrDef/Text.php';
-    require_once 'HTMLPurifier/AttrDef/Lang.php';
-    require_once 'HTMLPurifier/AttrDef/Pixels.php';
-    require_once 'HTMLPurifier/AttrDef/Length.php';
-    require_once 'HTMLPurifier/AttrDef/MultiLength.php';
-    require_once 'HTMLPurifier/AttrDef/Integer.php';
-    require_once 'HTMLPurifier/AttrDef/URI.php';
-    require_once 'HTMLPurifier/AttrDef/CSS.php';
-require_once 'HTMLPurifier/AttrTransform.php';
-    require_once 'HTMLPurifier/AttrTransform/Lang.php';
-    require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
-    require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
-    require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
-require_once 'HTMLPurifier/ChildDef.php';
-    require_once 'HTMLPurifier/ChildDef/Chameleon.php';
-    require_once 'HTMLPurifier/ChildDef/Empty.php';
-    require_once 'HTMLPurifier/ChildDef/Required.php';
-    require_once 'HTMLPurifier/ChildDef/Optional.php';
-    require_once 'HTMLPurifier/ChildDef/Table.php';
-    require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
-require_once 'HTMLPurifier/Generator.php';
-require_once 'HTMLPurifier/Token.php';
-require_once 'HTMLPurifier/TagTransform.php';
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'EnableAttrID', false, 'bool',
-    'Allows the ID attribute in HTML.  This is disabled by default '.
-    'due to the fact that without proper configuration user input can '.
-    'easily break the validation of a webpage by specifying an ID that is '.
-    'already on the surrounding HTML.  If you don\'t mind throwing caution to '.
-    'the wind, enable this directive, but I strongly recommend you also '.
-    'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
-    'user supplied IDs (%Attr.IDPrefix).  This directive has been available '.
-    'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
-    'versions.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'Strict', false, 'bool',
-    'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
-    'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'BlockWrapper', 'p', 'string',
-    'String name of element to wrap inline elements that are inside a block '.
-    'context.  This only occurs in the children of blockquote in strict mode. '.
-    'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
-    'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
-    '<code>&lt;p&gt;</code> tags can be replaced '.
-    'with whatever you desire, as long as it is a block level element. '.
-    'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'Parent', 'div', 'string',
-    'String name of element that HTML fragment passed to library will be '.
-    'inserted in.  An interesting variation would be using span as the '.
-    'parent element, meaning that only inline tags would be allowed. '.
-    'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'AllowedElements', null, 'lookup/null',
-    'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
-    'can overload it with your own list of tags to allow.  Note that this '.
-    'method is subtractive: it does its job by taking away from HTML Purifier '.
-    'usual feature set, so you cannot add a tag that HTML Purifier never '.
-    'supported in the first place (like embed, form or head).  If you change this, you '.
-    'probably also want to change %HTML.AllowedAttributes. '.
-    '<strong>Warning:</strong> If another directive conflicts with the '.
-    'elements here, <em>that</em> directive will win and override. '.
-    'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'HTML', 'AllowedAttributes', null, 'lookup/null',
-    'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
-    'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
-    '(style, id, class, dir, lang, xml:lang).'.
-    '<strong>Warning:</strong> If another directive conflicts with the '.
-    'elements here, <em>that</em> directive will win and override. For '.
-    'example, %HTML.EnableAttrID will take precedence over *.id in this '.
-    'directive.  You must set that directive to true before you can use '.
-    'IDs at all. This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
-    'Attr', 'DisableURI', false, 'bool',
-    'Disables all URIs in all forms. Not sure why you\'d want to do that '.
-    '(after all, the Internet\'s founded on the notion of a hyperlink). '.
-    'This directive has been available since 1.3.0.'
-);
-
-/**
- * Defines the purified HTML type with large amounts of objects.
- * 
- * The main function of this object is its $info array, which is an 
- * associative array of all the child and attribute definitions for
- * each allowed element. It also contains special use information (always
- * prefixed by info) for intelligent tag closing and global attributes.
- * 
- * For optimization, the definition generation may be moved to
- * a maintenance script and stipulate that definition be created
- * by a factory method that unserializes a serialized version of Definition.
- * Customization would entail copying the maintenance script, making the
- * necessary changes, generating the serialized object, and then hooking it
- * in via the factory method. We would also offer a LiveDefinition for
- * automatic recompilation, suggesting that we would have a DefinitionGenerator.
- */
-
-class HTMLPurifier_HTMLDefinition
-{
-    
-    /**
-     * Associative array of element names to HTMLPurifier_ElementDef
-     * @public
-     */
-    var $info = array();
-    
-    /**
-     * Associative array of global attribute name to attribute definition.
-     * @public
-     */
-    var $info_global_attr = array();
-    
-    /**
-     * String name of parent element HTML will be going into.
-     * @public
-     */
-    var $info_parent = 'div';
-    
-    /**
-     * Definition for parent element, allows parent element to be a
-     * tag that's not allowed inside the HTML fragment.
-     * @public
-     */
-    var $info_parent_def;
-    
-    /**
-     * String name of element used to wrap inline elements in block context
-     * @note This is rarely used except for BLOCKQUOTEs in strict mode
-     * @public
-     */
-    var $info_block_wrapper = 'p';
-    
-    /**
-     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
-     * @public
-     */
-    var $info_tag_transform = array();
-    
-    /**
-     * List of HTMLPurifier_AttrTransform to be performed before validation.
-     * @public
-     */
-    var $info_attr_transform_pre = array();
-    
-    /**
-     * List of HTMLPurifier_AttrTransform to be performed after validation/
-     * @public
-     */
-    var $info_attr_transform_post = array();
-    
-    /**
-     * Lookup table of flow elements
-     * @public
-     */
-    var $info_flow_elements = array();
-    
-    /**
-     * Boolean is a strict definition?
-     * @public
-     */
-    var $strict;
-    
-    /**
-     * Initializes the definition, the meat of the class.
-     */
-    function setup($config) {
-        
-        // some cached config values
-        $this->strict = $config->get('HTML', 'Strict');
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[] : initializes the definition objects
-        
-        // if you attempt to define rules later on for a tag not in this array
-        // PHP will create an stdclass
-        
-        $allowed_tags =
-            array(
-                'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
-                'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
-                'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
-                'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
-                'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
-                'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
-                'colgroup', 'col', 'td', 'th', 'tr'
-            );
-        
-        if (!$this->strict) {
-            $allowed_tags[] = 'u';
-            $allowed_tags[] = 's';
-            $allowed_tags[] = 'strike';
-        }
-        
-        foreach ($allowed_tags as $tag) {
-            $this->info[$tag] = new HTMLPurifier_ElementDef();
-        }
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->child : defines allowed children for elements
-        
-        // emulates the structure of the DTD
-        // however, these are condensed, with bad stuff taken out
-        // screening process was done by hand
-        
-        // entities: prefixed with e_ and _ replaces . from DTD
-        // double underlines are entities we made up
-        
-        // we don't use an array because that complicates interpolation
-        // strings are used instead of arrays because if you use arrays,
-        // you have to do some hideous manipulation with array_merge()
-        
-        // todo: determine whether or not having allowed children
-        //       that aren't allowed globally affects security (it shouldn't)
-        // if above works out, extend children definitions to include all
-        //       possible elements (allowed elements will dictate which ones
-        //       get dropped
-        
-        $e_special_extra = 'img';
-        $e_special_basic = 'br | span | bdo';
-        $e_special = "$e_special_basic | $e_special_extra";
-        $e_fontstyle_extra = 'big | small';
-        $e_fontstyle_basic = 'tt | i | b | u | s | strike';
-        $e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
-        $e_phrase_extra = 'sub | sup';
-        $e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
-          ' | cite | abbr | acronym';
-        $e_phrase = "$e_phrase_basic | $e_phrase_extra";
-        $e_misc_inline = 'ins | del';
-        $e_misc = "$e_misc_inline";
-        $e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
-        // pseudo-property we created for convenience, see later on
-        $e__inline = "#PCDATA | $e_inline | $e_misc_inline";
-        // note the casing
-        $e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline);
-        $e_heading = 'h1|h2|h3|h4|h5|h6';
-        $e_lists = 'ul | ol | dl';
-        $e_blocktext = 'pre | hr | blockquote | address';
-        $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
-        $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
-        $e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
-        $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
-        $e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
-          " | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
-        $e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
-          " | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
-          " | $e_misc_inline");
-        $e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
-        $e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
-        
-        $this->info['ins']->child =
-        $this->info['del']->child =
-            new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
-        
-        $this->info['dd']->child  =
-        $this->info['li']->child  =
-        $this->info['div']->child = $e_Flow;
-        
-        if ($this->strict) {
-            $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
-        } else {
-            $this->info['blockquote']->child = $e_Flow;
-        }
-        
-        $this->info['caption']->child   = 
-        $this->info['em']->child   =
-        $this->info['strong']->child    =
-        $this->info['dfn']->child  =
-        $this->info['code']->child =
-        $this->info['samp']->child =
-        $this->info['kbd']->child  =
-        $this->info['var']->child  =
-        $this->info['cite']->child =
-        $this->info['abbr']->child =
-        $this->info['acronym']->child   =
-        $this->info['q']->child    =
-        $this->info['sub']->child  =
-        $this->info['tt']->child   =
-        $this->info['sup']->child  =
-        $this->info['i']->child    =
-        $this->info['b']->child    =
-        $this->info['big']->child  =
-        $this->info['small']->child=
-        $this->info['bdo']->child  =
-        $this->info['span']->child =
-        $this->info['dt']->child   =
-        $this->info['p']->child    = 
-        $this->info['h1']->child   = 
-        $this->info['h2']->child   = 
-        $this->info['h3']->child   = 
-        $this->info['h4']->child   = 
-        $this->info['h5']->child   = 
-        $this->info['h6']->child   = $e_Inline;
-        
-        if (!$this->strict) {
-            $this->info['u']->child    =
-            $this->info['s']->child    =
-            $this->info['strike']->child    = $e_Inline;
-        }
-        
-        // the only three required definitions, besides custom table code
-        $this->info['ol']->child   =
-        $this->info['ul']->child   = new HTMLPurifier_ChildDef_Required('li');
-        
-        $this->info['dl']->child   = new HTMLPurifier_ChildDef_Required('dt|dd');
-        
-        if ($this->strict) {
-            $this->info['address']->child = $e_Inline;
-        } else {
-            $this->info['address']->child =
-              new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
-                  " | $e_misc_inline");
-        }
-        
-        $this->info['img']->child  =
-        $this->info['br']->child   =
-        $this->info['hr']->child   = new HTMLPurifier_ChildDef_Empty();
-        
-        $this->info['pre']->child  = $e_pre_content;
-        
-        $this->info['a']->child    = $e_a_content;
-        
-        $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
-        
-        // not a real entity, watch the double underscore
-        $e__row = new HTMLPurifier_ChildDef_Required('tr');
-        $this->info['thead']->child = $e__row;
-        $this->info['tfoot']->child = $e__row;
-        $this->info['tbody']->child = $e__row;
-        $this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
-        $this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
-        $this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
-        $this->info['th']->child = $e_Flow;
-        $this->info['td']->child = $e_Flow;
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->type : defines the type of the element (block or inline)
-        
-        // reuses $e_Inline and $e_Block
-        foreach ($e_Inline->elements as $name => $bool) {
-            if ($name == '#PCDATA') continue;
-            if (!isset($this->info[$name])) continue;
-            $this->info[$name]->type = 'inline';
-        }
-        
-        foreach ($e_Block->elements as $name => $bool) {
-            if (!isset($this->info[$name])) continue;
-            $this->info[$name]->type = 'block';
-        }
-        
-        foreach ($e_Flow->elements as $name => $bool) {
-            $this->info_flow_elements[$name] = true;
-        }
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->excludes : defines elements that aren't allowed in here
-        
-        // make sure you test using isset() and not !empty()
-        
-        $this->info['a']->excludes = array('a' => true);
-        $this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
-            // technically useless, but good to be indepth
-            'object', 'applet', 'font', 'basefont'));
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->attr : defines allowed attributes for elements
-        
-        // this doesn't include REQUIRED declarations, those are handled
-        // by the transform classes. It will, however, do simple and slightly
-        // complex attribute value substitution
-        
-        // the question of varying allowed attributes is more entangling.
-        
-        $e_Text = new HTMLPurifier_AttrDef_Text();
-        
-        // attrs, included in almost every single one except for a few,
-        // which manually override these in their local definitions
-        $this->info_global_attr = array(
-            // core attrs
-            'class' => new HTMLPurifier_AttrDef_Class(),
-            'title' => $e_Text,
-            'style' => new HTMLPurifier_AttrDef_CSS(),
-            // i18n
-            'dir'   => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
-            'lang'  => new HTMLPurifier_AttrDef_Lang(),
-            'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
-            );
-        
-        if ($config->get('HTML', 'EnableAttrID')) {
-            $this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
-        }
-        
-        // required attribute stipulation handled in attribute transformation
-        $this->info['bdo']->attr = array(); // nothing else
-        
-        $this->info['br']->attr['dir'] = false;
-        $this->info['br']->attr['lang'] = false;
-        $this->info['br']->attr['xml:lang'] = false;
-        
-        $this->info['td']->attr['abbr'] = $e_Text;
-        $this->info['th']->attr['abbr'] = $e_Text;
-        
-        $this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum(
-            array('left', 'center', 'right', 'justify', 'char'), false));
-        
-        $this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum(
-            array('top', 'middle', 'bottom', 'baseline'), false));
-        
-        $this->info['img']->attr['alt'] = $e_Text;
-        
-        $e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above',
-            'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false);
-        $this->info['table']->attr['frame'] = $e_TFrame;
-        
-        $e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups',
-            'rows', 'cols', 'all'), false);
-        $this->info['table']->attr['rules'] = $e_TRules;
-        
-        $this->info['table']->attr['summary'] = $e_Text;
-        
-        $this->info['table']->attr['border'] =
-            new HTMLPurifier_AttrDef_Pixels();
-        
-        $e_Length = new HTMLPurifier_AttrDef_Length();
-        $this->info['table']->attr['cellpadding'] =
-        $this->info['table']->attr['cellspacing'] =
-        $this->info['table']->attr['width'] =
-        $this->info['img']->attr['height'] =
-        $this->info['img']->attr['width'] = $e_Length;
-        $this->setAttrForTableElements('charoff', $e_Length);
-        
-        $e_MultiLength = new HTMLPurifier_AttrDef_MultiLength();
-        $this->info['col']->attr['width'] =
-        $this->info['colgroup']->attr['width'] = $e_MultiLength;
-        
-        $e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true);
-        $this->info['colgroup']->attr['span'] =
-        $this->info['col']->attr['span']   =
-        $this->info['td']->attr['rowspan'] =
-        $this->info['th']->attr['rowspan'] = 
-        $this->info['td']->attr['colspan'] =
-        $this->info['th']->attr['colspan'] = $e__NumberSpan;
-        
-        if (!$config->get('Attr', 'DisableURI')) {
-            $e_URI = new HTMLPurifier_AttrDef_URI();
-            $this->info['a']->attr['href'] =
-            $this->info['img']->attr['longdesc'] =
-            $this->info['del']->attr['cite'] =
-            $this->info['ins']->attr['cite'] =
-            $this->info['blockquote']->attr['cite'] =
-            $this->info['q']->attr['cite'] = $e_URI;
-            
-            // URI that causes HTTP request
-            $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
-        }
-        
-        if (!$this->strict) {
-            $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
-            $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
-        }
-        
-        //////////////////////////////////////////////////////////////////////
-        // info_tag_transform : transformations of tags
-        
-        $this->info_tag_transform['font']   = new HTMLPurifier_TagTransform_Font();
-        $this->info_tag_transform['menu']   = new HTMLPurifier_TagTransform_Simple('ul');
-        $this->info_tag_transform['dir']    = new HTMLPurifier_TagTransform_Simple('ul');
-        $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->auto_close : tags that automatically close another
-        
-        // todo: determine whether or not SGML-like modeling based on
-        // mandatory/optional end tags would be a better policy
-        
-        // make sure you test using isset() not !empty()
-        
-        // these are all block elements: blocks aren't allowed in P
-        $this->info['p']->auto_close = array_flip(array(
-                'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
-                'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
-                'table', 'ul'
-            ));
-        
-        $this->info['li']->auto_close = array('li' => true);
-        
-        // we need TABLE and heading mismatch code
-        // we may need to make this more flexible for heading mismatch,
-        // or we can just create another info
-        
-        //////////////////////////////////////////////////////////////////////
-        // info[]->attr_transform_* : attribute transformations in elements
-        // pre is applied before any validation is done, post is done after
-        
-        $this->info['h1']->attr_transform_pre[] =
-        $this->info['h2']->attr_transform_pre[] =
-        $this->info['h3']->attr_transform_pre[] =
-        $this->info['h4']->attr_transform_pre[] =
-        $this->info['h5']->attr_transform_pre[] =
-        $this->info['h6']->attr_transform_pre[] =
-        $this->info['p'] ->attr_transform_pre[] = 
-                    new HTMLPurifier_AttrTransform_TextAlign();
-        
-        $this->info['bdo']->attr_transform_post[] =
-                    new HTMLPurifier_AttrTransform_BdoDir();
-        
-        $this->info['img']->attr_transform_post[] =
-                    new HTMLPurifier_AttrTransform_ImgRequired();
-        
-        //////////////////////////////////////////////////////////////////////
-        // info_attr_transform_* : global attribute transformation that is
-        // unconditionally called. Good for transformations that have complex
-        // start conditions
-        // pre is applied before any validation is done, post is done after
-        
-        $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
-        
-        // protect against stdclasses floating around
-        foreach ($this->info as $key => $obj) {
-            if ($obj instanceof stdClass) {
-                unset($this->info[$key]);
-            }
-        }
-        
-        //////////////////////////////////////////////////////////////////////
-        // info_block_wrapper : wraps inline elements in block context
-        
-        $block_wrapper = $config->get('HTML', 'BlockWrapper');
-        if (isset($e_Block->elements[$block_wrapper])) {
-            $this->info_block_wrapper = $block_wrapper;
-        } else {
-            trigger_error('Cannot use non-block element as block wrapper.',
-                E_USER_ERROR);
-        }
-        
-        //////////////////////////////////////////////////////////////////////
-        // info_parent : parent element of the HTML fragment
-        
-        $parent = $config->get('HTML', 'Parent');
-        if (isset($this->info[$parent])) {
-            $this->info_parent = $parent;
-        } else {
-            trigger_error('Cannot use unrecognized element as parent.',
-                E_USER_ERROR);
-        }
-        $this->info_parent_def = $this->info[$this->info_parent];
-        
-        //////////////////////////////////////////////////////////////////////
-        // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
-        
-        $allowed_elements = $config->get('HTML', 'AllowedElements');
-        if (is_array($allowed_elements)) {
-            foreach ($this->info as $name => $d) {
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
-            }
-        }
-        $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
-        if (is_array($allowed_attributes)) {
-            foreach ($this->info_global_attr as $attr_key => $info) {
-                if (!isset($allowed_attributes["*.$attr_key"])) {
-                    unset($this->info_global_attr[$attr_key]);
-                }
-            }
-            foreach ($this->info as $tag => $info) {
-                foreach ($info->attr as $attr => $attr_info) {
-                    if (!isset($allowed_attributes["$tag.$attr"])) {
-                        unset($this->info[$tag]->attr[$attr]);
-                    }
-                }
-            }
-        }
-    }
-    
-    function setAttrForTableElements($attr, $def) {
-        $this->info['col']->attr[$attr] = 
-        $this->info['colgroup']->attr[$attr] = 
-        $this->info['tbody']->attr[$attr] = 
-        $this->info['td']->attr[$attr] = 
-        $this->info['tfoot']->attr[$attr] = 
-        $this->info['th']->attr[$attr] = 
-        $this->info['thead']->attr[$attr] = 
-        $this->info['tr']->attr[$attr] = $def;
-    }
-    
-}
-
-/**
- * Structure that stores an element definition.
- */
-class HTMLPurifier_ElementDef
-{
-    
-    /**
-     * Associative array of attribute name to HTMLPurifier_AttrDef
-     * @public
-     */
-    var $attr = array();
-    
-    /**
-     * List of tag's HTMLPurifier_AttrTransform to be done before validation
-     * @public
-     */
-    var $attr_transform_pre = array();
-    
-    /**
-     * List of tag's HTMLPurifier_AttrTransform to be done after validation
-     * @public
-     */
-    var $attr_transform_post = array();
-    
-    /**
-     * Lookup table of tags that close this tag.
-     * @public
-     */
-    var $auto_close = array();
-    
-    /**
-     * HTMLPurifier_ChildDef of this tag.
-     * @public
-     */
-    var $child;
-    
-    /**
-     * Type of the tag: inline or block or unknown?
-     * @public
-     */
-    var $type = 'unknown';
-    
-    /**
-     * Lookup table of tags excluded from all descendants of this tag.
-     * @public
-     */
-    var $excludes = array();
-    
-}
-
-?>
+<?php
+
+// components
+require_once 'HTMLPurifier/HTMLModuleManager.php';
+
+// this definition and its modules MUST NOT define configuration directives
+// outside of the HTML or Attr namespaces
+
+// will be superceded by more accurate doctype declaration schemes
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Strict', false, 'bool',
+    'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'BlockWrapper', 'p', 'string',
+    'String name of element to wrap inline elements that are inside a block '.
+    'context.  This only occurs in the children of blockquote in strict mode. '.
+    'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
+    'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
+    '<code>&lt;p&gt;</code> tags can be replaced '.
+    'with whatever you desire, as long as it is a block level element. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Parent', 'div', 'string',
+    'String name of element that HTML fragment passed to library will be '.
+    'inserted in.  An interesting variation would be using span as the '.
+    'parent element, meaning that only inline tags would be allowed. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'AllowedElements', null, 'lookup/null',
+    'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+    'can overload it with your own list of tags to allow.  Note that this '.
+    'method is subtractive: it does its job by taking away from HTML Purifier '.
+    'usual feature set, so you cannot add a tag that HTML Purifier never '.
+    'supported in the first place (like embed, form or head).  If you change this, you '.
+    'probably also want to change %HTML.AllowedAttributes. '.
+    '<strong>Warning:</strong> If another directive conflicts with the '.
+    'elements here, <em>that</em> directive will win and override. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'AllowedAttributes', null, 'lookup/null',
+    'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+    'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+    '(style, id, class, dir, lang, xml:lang).'.
+    '<strong>Warning:</strong> If another directive conflicts with the '.
+    'elements here, <em>that</em> directive will win and override. For '.
+    'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+    'directive.  You must set that directive to true before you can use '.
+    'IDs at all. This directive has been available since 1.3.0.'
+);
+
+/**
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ * 
+ * Conventions:
+ * 
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ * 
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ * 
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
+ */
+class HTMLPurifier_HTMLDefinition
+{
+    
+    /** FULLY-PUBLIC VARIABLES */
+    
+    /**
+     * Associative array of element names to HTMLPurifier_ElementDef
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Associative array of global attribute name to attribute definition.
+     * @public
+     */
+    var $info_global_attr = array();
+    
+    /**
+     * String name of parent element HTML will be going into.
+     * @public
+     */
+    var $info_parent = 'div';
+    
+    /**
+     * Definition for parent element, allows parent element to be a
+     * tag that's not allowed inside the HTML fragment.
+     * @public
+     */
+    var $info_parent_def;
+    
+    /**
+     * String name of element used to wrap inline elements in block context
+     * @note This is rarely used except for BLOCKQUOTEs in strict mode
+     * @public
+     */
+    var $info_block_wrapper = 'p';
+    
+    /**
+     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+     * @public
+     */
+    var $info_tag_transform = array();
+    
+    /**
+     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
+     * @public
+     */
+    var $info_attr_transform_pre = array();
+    
+    /**
+     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
+     * @public
+     */
+    var $info_attr_transform_post = array();
+    
+    /**
+     * Nested lookup array of content set name (Block, Inline) to
+     * element name to whether or not it belongs in that content set.
+     * @public
+     */
+    var $info_content_sets = array();
+    
+    
+    
+    /** PUBLIC BUT INTERNAL VARIABLES */
+    
+    var $setup = false; /**< Has setup() been called yet? */
+    var $config; /**< Temporary instance of HTMLPurifier_Config */
+    
+    var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+    
+    /**
+     * Performs low-cost, preliminary initialization.
+     * @param $config Instance of HTMLPurifier_Config
+     */
+    function HTMLPurifier_HTMLDefinition(&$config) {
+        $this->config =& $config;
+        $this->manager = new HTMLPurifier_HTMLModuleManager();
+    }
+    
+    /**
+     * Processes internals into form usable by HTMLPurifier internals. 
+     * Modifying the definition after calling this function should not
+     * be done.
+     */
+    function setup() {
+        
+        // multiple call guard
+        if ($this->setup) {return;} else {$this->setup = true;}
+        
+        $this->processModules();
+        $this->setupConfigStuff();
+        
+        unset($this->config);
+        unset($this->manager);
+        
+    }
+    
+    /**
+     * Extract out the information from the manager
+     */
+    function processModules() {
+        
+        $this->manager->setup($this->config);
+        
+        foreach ($this->manager->activeModules as $module) {
+            foreach($module->info_tag_transform         as $k => $v) $this->info_tag_transform[$k]      = $v;
+            foreach($module->info_attr_transform_pre    as $k => $v) $this->info_attr_transform_pre[$k] = $v;
+            foreach($module->info_attr_transform_post   as $k => $v) $this->info_attr_transform_post[$k]= $v;
+        }
+        
+        $this->info = $this->manager->getElements($this->config);
+        $this->info_content_sets = $this->manager->contentSets->lookup;
+        
+    }
+    
+    /**
+     * Sets up stuff based on config. We need a better way of doing this.
+     */
+    function setupConfigStuff() {
+        
+        $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
+        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+            $this->info_block_wrapper = $block_wrapper;
+        } else {
+            trigger_error('Cannot use non-block element as block wrapper.',
+                E_USER_ERROR);
+        }
+        
+        $parent = $this->config->get('HTML', 'Parent');
+        $def = $this->manager->getElement($parent, $this->config);
+        if ($def) {
+            $this->info_parent = $parent;
+            $this->info_parent_def = $def;
+        } else {
+            trigger_error('Cannot use unrecognized element as parent.',
+                E_USER_ERROR);
+            $this->info_parent_def = $this->manager->getElement(
+                $this->info_parent, $this->config);
+        }
+        
+        // setup allowed elements, SubtractiveWhitelist module
+        $allowed_elements = $this->config->get('HTML', 'AllowedElements');
+        if (is_array($allowed_elements)) {
+            foreach ($this->info as $name => $d) {
+                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+            }
+        }
+        $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
+        if (is_array($allowed_attributes)) {
+            foreach ($this->info_global_attr as $attr_key => $info) {
+                if (!isset($allowed_attributes["*.$attr_key"])) {
+                    unset($this->info_global_attr[$attr_key]);
+                }
+            }
+            foreach ($this->info as $tag => $info) {
+                foreach ($info->attr as $attr => $attr_info) {
+                    if (!isset($allowed_attributes["$tag.$attr"]) &&
+                        !isset($allowed_attributes["*.$attr"])) {
+                        unset($this->info[$tag]->attr[$attr]);
+                    }
+                }
+            }
+        }
+        
+    }
+    
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule.php
+++ b/library/HTMLPurifier/HTMLModule.php
@ -0,0 +1,125 @@
+<?php
+
+/**
+ * Represents an XHTML 1.1 module, with information on elements, tags
+ * and attributes.
+ * @note Even though this is technically XHTML 1.1, it is also used for
+ *       regular HTML parsing. We are using modulization as a convenient
+ *       way to represent the internals of HTMLDefinition, and our
+ *       implementation is by no means conforming and does not directly
+ *       use the normative DTDs or XML schemas.
+ * @note The public variables in a module should almost directly
+ *       correspond to the variables in HTMLPurifier_HTMLDefinition.
+ *       However, the prefix info carries no special meaning in these
+ *       objects (include it anyway if that's the correspondence though).
+ */
+
+class HTMLPurifier_HTMLModule
+{
+    /**
+     * Short unique string identifier of the module
+     */
+    var $name;
+    
+    /**
+     * Dynamically set integer that specifies when the module was loaded in.
+     */
+    var $order;
+    
+    /**
+     * Informally, a list of elements this module changes. Not used in
+     * any significant way.
+     * @protected
+     */
+    var $elements = array();
+    
+    /**
+     * Associative array of element names to element definitions.
+     * Some definitions may be incomplete, to be merged in later
+     * with the full definition.
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Associative array of content set names to content set additions.
+     * This is commonly used to, say, add an A element to the Inline
+     * content set. This corresponds to an internal variable $content_sets
+     * and NOT info_content_sets member variable of HTMLDefinition.
+     * @public
+     */
+    var $content_sets = array();
+    
+    /**
+     * Associative array of attribute collection names to attribute
+     * collection additions. More rarely used for adding attributes to
+     * the global collections. Example is the StyleAttribute module adding
+     * the style attribute to the Core. Corresponds to HTMLDefinition's
+     * attr_collections->info, since the object's data is only info,
+     * with extra behavior associated with it.
+     * @public
+     */
+    var $attr_collections = array();
+    
+    /**
+     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+     * @public
+     */
+    var $info_tag_transform = array();
+    
+    /**
+     * List of HTMLPurifier_AttrTransform to be performed before validation.
+     * @public
+     */
+    var $info_attr_transform_pre = array();
+    
+    /**
+     * List of HTMLPurifier_AttrTransform to be performed after validation.
+     * @public
+     */
+    var $info_attr_transform_post = array();
+    
+    /**
+     * Boolean flag that indicates whether or not getChildDef is implemented.
+     * For optimization reasons: may save a call to a function. Be sure
+     * to set it if you do implement getChildDef(), otherwise it will have
+     * no effect!
+     * @public
+     */
+    var $defines_child_def = false;
+    
+    /**
+     * Retrieves a proper HTMLPurifier_ChildDef subclass based on 
+     * content_model and content_model_type member variables of
+     * the HTMLPurifier_ElementDef class. There is a similar function
+     * in HTMLPurifier_HTMLDefinition.
+     * @param $def HTMLPurifier_ElementDef instance
+     * @return HTMLPurifier_ChildDef subclass
+     * @public
+     */
+    function getChildDef($def) {return false;}
+    
+    /**
+     * Hook method that lets module perform arbitrary operations on
+     * HTMLPurifier_HTMLDefinition before the module gets processed.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function preProcess(&$definition) {}
+    
+    /**
+     * Hook method that lets module perform arbitrary operations
+     * on HTMLPurifier_HTMLDefinition after the module gets processed.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function postProcess(&$definition) {}
+    
+    /**
+     * Hook method that is called when a module gets registered to
+     * the definition.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function setup(&$definition) {}
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Bdo.php
+++ b/library/HTMLPurifier/HTMLModule/Bdo.php
@ -0,0 +1,43 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
+
+/**
+ * XHTML 1.1 Bi-directional Text Module, defines elements that
+ * declare directionality of content. Text Extension Module.
+ */
+class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Bdo';
+    var $elements = array('bdo');
+    var $info = array();
+    var $content_sets = array('Inline' => 'bdo');
+    var $attr_collections = array(
+        'I18N' => array('dir' => false)
+    );
+    
+    function HTMLPurifier_HTMLModule_Bdo() {
+        $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
+        $this->attr_collections['I18N']['dir'] = $dir;
+        $this->info['bdo'] = new HTMLPurifier_ElementDef();
+        $this->info['bdo']->attr = array(
+            0 => array('Core', 'Lang'),
+            'dir' => $dir, // required
+            // The Abstract Module specification has the attribute
+            // inclusions wrong for bdo: bdo allows
+            // xml:lang too (and we'll toss in lang for good measure,
+            // though it is not allowed for XHTML 1.1, this will
+            // be managed with a global attribute transform)
+        );
+        $this->info['bdo']->content_model = '#PCDATA | Inline';
+        $this->info['bdo']->content_model_type = 'optional';
+        // provides fallback behavior if dir's missing (dir is required)
+        $this->info['bdo']->attr_transform_post['required-dir'] =
+            new HTMLPurifier_AttrTransform_BdoDir();
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/CommonAttributes.php
+++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php
@ -0,0 +1,31 @@
+<?php
+
+class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
+{
+    var $name = 'CommonAttributes';
+    
+    var $attr_collections = array(
+        'Core' => array(
+            0 => array('Style'),
+            // 'xml:space' => false,
+            'class' => 'NMTOKENS',
+            'id' => 'ID',
+            'title' => 'CDATA',
+        ),
+        'Lang' => array(
+            'xml:lang' => false, // see constructor
+        ),
+        'I18N' => array(
+            0 => array('Lang'), // proprietary, for xml:lang/lang
+        ),
+        'Common' => array(
+            0 => array('Core', 'I18N')
+        )
+    );
+    
+    function HTMLPurifier_HTMLModule_CommonAttributes() {
+        $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
+    }
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Edit.php
+++ b/library/HTMLPurifier/HTMLModule/Edit.php
@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ChildDef/Chameleon.php';
+
+/**
+ * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
+ * Module.
+ */
+class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Edit';
+    var $elements = array('del', 'ins');
+    var $info = array();
+    var $content_sets = array('Inline' => 'del | ins');
+    
+    function HTMLPurifier_HTMLModule_Edit() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(
+                0 => array('Common'),
+                'cite' => 'URI',
+                // 'datetime' => 'Datetime' // Datetime not implemented
+            );
+            // Inline context ! Block context (exclamation mark is
+            // separator, see getChildDef for parsing)
+            $this->info[$element]->content_model =
+                '#PCDATA | Inline ! #PCDATA | Flow';
+            // HTML 4.01 specifies that ins/del must not contain block
+            // elements when used in an inline context, chameleon is
+            // a complicated workaround to acheive this effect
+            $this->info[$element]->content_model_type = 'chameleon';
+        }
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'chameleon') return false;
+        $value = explode('!', $def->content_model);
+        return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Hypertext.php
+++ b/library/HTMLPurifier/HTMLModule/Hypertext.php
@ -0,0 +1,36 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
+ */
+class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Hypertext';
+    var $elements = array('a');
+    var $info = array();
+    var $content_sets = array('Inline' => 'a');
+    
+    function HTMLPurifier_HTMLModule_Hypertext() {
+        $this->info['a'] = new HTMLPurifier_ElementDef();
+        $this->info['a']->attr = array(
+            0 => array('Common'),
+            // 'accesskey' => 'Character',
+            // 'charset' => 'Charset',
+            'href' => 'URI',
+            //'hreflang' => 'LanguageCode',
+            //'rel' => 'LinkTypes',
+            //'rev' => 'LinkTypes',
+            //'tabindex' => 'Number',
+            //'type' => 'ContentType',
+        );
+        $this->info['a']->content_model = '#PCDATA | Inline';
+        $this->info['a']->content_model_type = 'optional';
+        $this->info['a']->excludes = array('a' => true);
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Image.php
+++ b/library/HTMLPurifier/HTMLModule/Image.php
@ -0,0 +1,38 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+require_once 'HTMLPurifier/AttrDef/URI.php';
+require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
+
+/**
+ * XHTML 1.1 Image Module provides basic image embedding.
+ * @note There is specialized code for removing empty images in
+ *       HTMLPurifier_Strategy_RemoveForeignElements
+ */
+class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Image';
+    var $elements = array('img');
+    var $info = array();
+    var $content_sets = array('Inline' => 'img');
+    
+    function HTMLPurifier_HTMLModule_Image() {
+        $this->info['img'] = new HTMLPurifier_ElementDef();
+        $this->info['img']->attr = array(
+            0 => array('Common'),
+            'alt' => 'Text',
+            'height' => 'Length',
+            'longdesc' => 'URI', 
+            'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
+            'width' => 'Length'
+        );
+        $this->info['img']->content_model_type = 'empty';
+        $this->info['img']->attr_transform_post[] =
+            new HTMLPurifier_AttrTransform_ImgRequired();
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Legacy.php
+++ b/library/HTMLPurifier/HTMLModule/Legacy.php
@ -0,0 +1,60 @@
+<?php
+
+/**
+ * XHTML 1.1 Legacy module defines elements that were previously 
+ * deprecated.
+ * 
+ * @note Not all legacy elements have been implemented yet, which
+ *       is a bit of a reverse problem as compared to browsers! In
+ *       addition, this legacy module may implement a bit more than
+ *       mandated by XHTML 1.1.
+ * 
+ * This module can be used in combination with TransformToStrict in order
+ * to transform as many deprecated elements as possible, but retain
+ * questionably deprecated elements that do not have good alternatives
+ * as well as transform elements that don't have an implementation.
+ * See docs/ref-strictness.txt for more details.
+ */
+
+class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
+{
+    
+    // incomplete
+    
+    var $name = 'Legacy';
+    var $elements = array('u', 's', 'strike');
+    var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
+    
+    function HTMLPurifier_HTMLModule_Legacy() {
+        // setup new elements
+        foreach ($this->elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            // for u, s, strike, as more elements get added, add
+            // conditionals as necessary
+            $this->info[$name]->content_model = 'Inline | #PCDATA';
+            $this->info[$name]->content_model_type = 'optional';
+            $this->info[$name]->attr[0] = array('Common');
+        }
+        
+        // setup modifications to old elements
+        foreach ($this->non_standalone_elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            $this->info[$name]->standalone = false;
+        }
+        
+        $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+        $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+        
+        $this->info['address']->content_model = 'Inline | #PCDATA | p';
+        $this->info['address']->content_model_type = 'optional';
+        $this->info['address']->child = false;
+        
+        $this->info['blockquote']->content_model = 'Flow | #PCDATA';
+        $this->info['blockquote']->content_model_type = 'optional';
+        $this->info['blockquote']->child = false;
+        
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/List.php
+++ b/library/HTMLPurifier/HTMLModule/List.php
@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 List Module, defines list-oriented elements. Core Module.
+ */
+class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'List';
+    var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
+    var $info = array();
+    // According to the abstract schema, the List content set is a fully formed
+    // one or more expr, but it invariably occurs in an optional declaration
+    // so we're not going to do that subtlety. It might cause trouble
+    // if a user defines "List" and expects that multiple lists are
+    // allowed to be specified, but then again, that's not very intuitive.
+    // Furthermore, the actual XML Schema may disagree. Regardless,
+    // we don't have support for such nested expressions without using
+    // the incredibly inefficient and draconic Custom ChildDef.
+    var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
+    
+    function HTMLPurifier_HTMLModule_List() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(0 => array('Common'));
+            if ($element == 'li' || $element == 'dd') {
+                $this->info[$element]->content_model = '#PCDATA | Flow';
+                $this->info[$element]->content_model_type = 'optional';
+            } elseif ($element == 'ol' || $element == 'ul') {
+                $this->info[$element]->content_model = 'li';
+                $this->info[$element]->content_model_type = 'required';
+            }
+        }
+        $this->info['dt']->content_model = '#PCDATA | Inline';
+        $this->info['dt']->content_model_type = 'optional';
+        $this->info['dl']->content_model = 'dt | dd';
+        $this->info['dl']->content_model_type = 'required';
+        // this could be a LOT more robust
+        $this->info['li']->auto_close = array('li' => true);
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Presentation.php
+++ b/library/HTMLPurifier/HTMLModule/Presentation.php
@ -0,0 +1,41 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Presentation Module, defines simple presentation-related
+ * markup. Text Extension Module.
+ * @note The official XML Schema and DTD specs further divide this into
+ *       two modules:
+ *          - Block Presentation (hr)
+ *          - Inline Presentation (b, big, i, small, sub, sup, tt)
+ *       We have chosen not to heed this distinction, as content_sets
+ *       provides satisfactory disambiguation.
+ */
+class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Presentation';
+    var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
+    var $info = array();
+    var $content_sets = array(
+        'Block' => 'hr',
+        'Inline' => 'b | big | i | small | sub | sup | tt'
+    );
+    
+    function HTMLPurifier_HTMLModule_Presentation() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(0 => array('Common'));
+            if ($element == 'hr') {
+                $this->info[$element]->content_model_type = 'empty';
+            } else {
+                $this->info[$element]->content_model = '#PCDATA | Inline';
+                $this->info[$element]->content_model_type = 'optional';
+            }
+        }
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/StyleAttribute.php
+++ b/library/HTMLPurifier/HTMLModule/StyleAttribute.php
@ -0,0 +1,27 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/AttrDef/CSS.php';
+
+/**
+ * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
+ * Module.
+ */
+class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'StyleAttribute';
+    var $attr_collections = array(
+        // The inclusion routine differs from the Abstract Modules but
+        // is in line with the DTD and XML Schemas.
+        'Style' => array('style' => false), // see constructor
+        'Core' => array(0 => array('Style'))
+    );
+    
+    function HTMLPurifier_HTMLModule_StyleAttribute() {
+        $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Tables.php
+++ b/library/HTMLPurifier/HTMLModule/Tables.php
@ -0,0 +1,88 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ChildDef/Table.php';
+
+/**
+ * XHTML 1.1 Tables Module, fully defines accessible table elements.
+ */
+class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Tables';
+    var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
+        'colgroup', 'tbody', 'thead', 'tfoot');
+    var $info = array();
+    var $content_sets = array('Block' => 'table');
+    
+    function HTMLPurifier_HTMLModule_Tables() {
+        foreach ($this->elements as $e) {
+            $this->info[$e] = new HTMLPurifier_ElementDef();
+            $this->info[$e]->attr = array(0 => array('Common'));
+            $attr =& $this->info[$e]->attr;
+            if ($e == 'caption') continue;
+            if ($e == 'table'){
+                $attr['border'] = 'Pixels';
+                $attr['cellpadding'] = 'Length';
+                $attr['cellspacing'] = 'Length';
+                $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
+                    'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
+                    'vsides', 'box', 'border'
+                ), false);
+                $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
+                    'none', 'groups', 'rows', 'cols', 'all'
+                ), false);
+                $attr['summary'] = 'Text';
+                $attr['width'] = 'Length';
+                continue;
+            }
+            if ($e == 'col' || $e == 'colgroup') {
+                $attr['span'] = 'Number';
+                $attr['width'] = 'MultiLength';
+            }
+            if ($e == 'td' || $e == 'th') {
+                $attr['abbr'] = 'Text';
+                $attr['colspan'] = 'Number';
+                $attr['rowspan'] = 'Number';
+            }
+            $attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
+                'left', 'center', 'right', 'justify', 'char'
+            ), false);
+            $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
+                'top', 'middle', 'bottom', 'baseline'
+            ), false);
+            $attr['charoff'] = 'Length';
+        }
+        $this->info['caption']->content_model = '#PCDATA | Inline';
+        $this->info['caption']->content_model_type = 'optional';
+        
+        // Is done directly because it doesn't leverage substitution
+        // mechanisms. True model is:
+        // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
+        $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
+        
+        $this->info['td']->content_model = 
+        $this->info['th']->content_model = '#PCDATA | Flow';
+        $this->info['td']->content_model_type = 
+        $this->info['th']->content_model_type = 'optional';
+        
+        $this->info['tr']->content_model = 'td | th';
+        $this->info['tr']->content_model_type = 'required';
+        
+        $this->info['col']->content_model_type = 'empty';
+        
+        $this->info['colgroup']->content_model = 'col';
+        $this->info['colgroup']->content_model_type = 'optional';
+        
+        $this->info['tbody']->content_model = 
+        $this->info['thead']->content_model = 
+        $this->info['tfoot']->content_model = 'tr';
+        $this->info['tbody']->content_model_type = 
+        $this->info['thead']->content_model_type = 
+        $this->info['tfoot']->content_model_type = 'required';
+        
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/Text.php
+++ b/library/HTMLPurifier/HTMLModule/Text.php
@ -0,0 +1,78 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Text Module, defines basic text containers. Core Module.
+ * @note In the normative XML Schema specification, this module
+ *       is further abstracted into the following modules:
+ *          - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
+ *          - Block Structural (div, p)
+ *          - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
+ *          - Inline Structural (br, span)
+ *       We have elected not to follow suite, but this may change.
+ */
+class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Text';
+    
+    var $elements = array('abbr', 'acronym', 'address', 'blockquote',
+        'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
+        'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
+        'var');
+    
+    var $info = array();
+    
+    var $content_sets = array(
+        'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
+        'Block' => 'address | blockquote | div | p | pre',
+        'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
+        'Flow' => 'Heading | Block | Inline'
+    );
+    
+    function HTMLPurifier_HTMLModule_Text() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            // attributes
+            if ($element == 'br') {
+                $this->info[$element]->attr = array(0 => array('Core'));
+            } elseif ($element == 'blockquote' || $element == 'q') {
+                $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
+            } else {
+                $this->info[$element]->attr = array(0 => array('Common'));
+            }
+            // content models
+            if ($element == 'br') {
+                $this->info[$element]->content_model_type = 'empty';
+            } elseif ($element == 'blockquote') {
+                $this->info[$element]->content_model = 'Heading | Block | List';
+                $this->info[$element]->content_model_type = 'optional';
+            } elseif ($element == 'div') {
+                $this->info[$element]->content_model = '#PCDATA | Flow';
+                $this->info[$element]->content_model_type = 'optional';
+            } else {
+                $this->info[$element]->content_model = '#PCDATA | Inline';
+                $this->info[$element]->content_model_type = 'optional';
+            }
+        }
+        // SGML permits exclusions for all descendants, but this is
+        // not possible with DTDs or XML Schemas. W3C has elected to
+        // use complicated compositions of content_models to simulate
+        // exclusion for children, but we go the simpler, SGML-style
+        // route of flat-out exclusions. Note that the Abstract Module
+        // is blithely unaware of such distinctions.
+        $this->info['pre']->excludes = array_flip(array(
+            'img', 'big', 'small',
+            'object', 'applet', 'font', 'basefont' // generally not allowed
+        ));
+        $this->info['p']->auto_close = array_flip(array(
+            'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
+            'table', 'ul'
+        ));
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php
+++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php
@ -0,0 +1,86 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
+require_once 'HTMLPurifier/TagTransform/Simple.php';
+require_once 'HTMLPurifier/TagTransform/Center.php';
+require_once 'HTMLPurifier/TagTransform/Font.php';
+
+require_once 'HTMLPurifier/AttrTransform/Lang.php';
+require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
+
+/**
+ * Proprietary module that transforms deprecated elements into Strict
+ * HTML (see HTML 4.01 and XHTML 1.0) when possible.
+ */
+
+class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'TransformToStrict';
+    
+    // we're actually modifying these elements, not defining them
+    var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
+    
+    var $info_tag_transform = array(
+        // placeholders, see constructor for definitions
+        'font'  => false,
+        'menu'  => false,
+        'dir'   => false,
+        'center'=> false
+    );
+    
+    var $attr_collections = array(
+        'Lang' => array(
+            'lang' => false // placeholder
+        )
+    );
+    
+    var $info_attr_transform_post = array(
+        'lang' => false // placeholder
+    );
+    
+    function HTMLPurifier_HTMLModule_TransformToStrict() {
+        
+        // deprecated tag transforms
+        $this->info_tag_transform['font']   = new HTMLPurifier_TagTransform_Font();
+        $this->info_tag_transform['menu']   = new HTMLPurifier_TagTransform_Simple('ul');
+        $this->info_tag_transform['dir']    = new HTMLPurifier_TagTransform_Simple('ul');
+        $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+        
+        foreach ($this->elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            $this->info[$name]->standalone = false;
+        }
+        
+        // deprecated attribute transforms
+        $this->info['h1']->attr_transform_pre['align'] =
+        $this->info['h2']->attr_transform_pre['align'] =
+        $this->info['h3']->attr_transform_pre['align'] =
+        $this->info['h4']->attr_transform_pre['align'] =
+        $this->info['h5']->attr_transform_pre['align'] =
+        $this->info['h6']->attr_transform_pre['align'] =
+        $this->info['p'] ->attr_transform_pre['align'] = 
+                    new HTMLPurifier_AttrTransform_TextAlign();
+        
+        // xml:lang <=> lang mirroring, implement in TransformToStrict,
+        // this is overridden in TransformToXHTML11
+        $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
+        $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
+        
+        // this should not be applied to XHTML 1.0 Transitional, ONLY
+        // XHTML 1.0 Strict. We may need three classes
+        $this->info['blockquote']->content_model_type = 'strictblockquote';
+        $this->info['blockquote']->child = false; // recalculate please!
+        
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'strictblockquote') return false;
+        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
+++ b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Proprietary module that transforms XHTML 1.0 deprecated aspects into
+ * XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
+ * HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
+ * (otherwise, elements that were deprecated from Transitional to Strict
+ * will not be transformed).
+ * 
+ * XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
+ * although they may not be as friendly to legacy browsers.
+ */
+
+class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'TransformToXHTML11';
+    var $attr_collections = array(
+        'Lang' => array(
+            'lang' => false // remove it
+        )
+    );
+    
+    var $info_attr_transform_post = array(
+        'lang' => false // remove it
+    );
+    
+}
+
+?>
--- a/library/HTMLPurifier/HTMLModuleManager.php
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@ -0,0 +1,558 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ElementDef.php';
+
+require_once 'HTMLPurifier/ContentSets.php';
+require_once 'HTMLPurifier/AttrTypes.php';
+require_once 'HTMLPurifier/AttrCollections.php';
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/Enum.php';
+
+// W3C modules
+require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
+require_once 'HTMLPurifier/HTMLModule/Text.php';
+require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
+require_once 'HTMLPurifier/HTMLModule/List.php';
+require_once 'HTMLPurifier/HTMLModule/Presentation.php';
+require_once 'HTMLPurifier/HTMLModule/Edit.php';
+require_once 'HTMLPurifier/HTMLModule/Bdo.php';
+require_once 'HTMLPurifier/HTMLModule/Tables.php';
+require_once 'HTMLPurifier/HTMLModule/Image.php';
+require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
+require_once 'HTMLPurifier/HTMLModule/Legacy.php';
+
+// proprietary modules
+require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
+require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Doctype', null, 'string/null',
+    'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '.
+    'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
+    'Technically speaking this is not actually a doctype (as it does '.
+    'not identify a corresponding DTD), but we are using this name '.
+    'for sake of simplicity. This will override any older directives '.
+    'like %Core.XHTML or %HTML.Strict.'
+);
+
+class HTMLPurifier_HTMLModuleManager
+{
+    
+    /**
+     * Array of HTMLPurifier_Module instances, indexed by module's class name.
+     * All known modules, regardless of use, are in this array.
+     */
+    var $modules = array();
+    
+    /**
+     * String doctype we will validate against. See $validModules for use.
+     * 
+     * @note
+     * There is a special doctype '*' that acts both as the "default"
+     * doctype if a customized system only defines one doctype and
+     * also a catch-all doctype that gets merged into all the other
+     * module collections. When possible, use a private collection to
+     * share modules between doctypes: this special doctype is to
+     * make life more convenient for users.
+     */
+    var $doctype;
+    var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
+    
+    /**
+     * Associative array: $collections[$type][$doctype] = list of modules.
+     * This is used to logically separate types of functionality so that
+     * based on the doctype and other configuration settings they may
+     * be easily switched and on and off. Custom setups may not need
+     * to use this abstraction, opting to have only one big collection
+     * with one valid doctype.
+     */
+    var $collections = array();
+    
+    /**
+     * Modules that may be used in a valid doctype of this kind.
+     * Correctional and leniency modules should not be placed in this
+     * array unless the user said so: don't stuff every possible lenient
+     * module for this doctype in here.
+     */
+    var $validModules = array();
+    var $validCollections = array(); /**< Collections to merge into $validModules */
+    
+    /**
+     * Modules that we will allow in input, subset of $validModules. Single
+     * element definitions may result in us consulting validModules.
+     */
+    var $activeModules = array();
+    var $activeCollections = array(); /**< Collections to merge into $activeModules */
+    
+    var $counter = 0; /**< Designates next available integer order for modules. */
+    var $initialized = false; /**< Says whether initialize() was called */
+    
+    /**
+     * Specifies what doctype to siphon new modules from addModule() to,
+     * or false to disable the functionality. Must be used in conjunction
+     * with $autoCollection.
+     */
+    var $autoDoctype = false;
+    /**
+     * Specifies what collection to siphon new modules from addModule() to,
+     * or false to disable the functionality. Must be used in conjunction
+     * with $autoCollection.
+     */
+    var $autoCollection = false;
+    
+    /** Associative array of element name to defining modules (always array) */
+    var $elementLookup = array();
+    
+    /** List of prefixes we should use for resolving small names */
+    var $prefixes = array('HTMLPurifier_HTMLModule_');
+    
+    var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
+    var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
+    var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
+    
+    /**
+     * @param $blank If true, don't do any initializing
+     */
+    function HTMLPurifier_HTMLModuleManager($blank = false) {
+        
+        // the only editable internal object. The rest need to
+        // be manipulated through modules
+        $this->attrTypes = new HTMLPurifier_AttrTypes();
+        
+        if (!$blank) $this->initialize();
+        
+    }
+    
+    function initialize() {
+        $this->initialized = true;
+        
+        // load default modules to the recognized modules list (not active)
+        $modules = array(
+            // define
+            'CommonAttributes',
+            'Text', 'Hypertext', 'List', 'Presentation',
+            'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
+            // define-redefine
+            'Legacy',
+            // redefine
+            'TransformToStrict', 'TransformToXHTML11'
+        );
+        foreach ($modules as $module) {
+            $this->addModule($module);
+        }
+        
+        // Safe modules for supported doctypes. These are included
+        // in the valid and active module lists by default
+        $this->collections['Safe'] = array(
+            '_Common' => array( // leading _ indicates private
+                'CommonAttributes', 'Text', 'Hypertext', 'List',
+                'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+                'StyleAttribute'
+            ),
+            // HTML definitions, defer to XHTML definitions
+            'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+            'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+            // XHTML definitions
+            'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
+            'XHTML 1.0 Strict' => array(array('_Common')),
+            'XHTML 1.1' => array(array('_Common')),
+        );
+        
+        // Modules that specify elements that are unsafe from untrusted
+        // third-parties. These should be registered in $validModules but
+        // almost never $activeModules unless you really know what you're
+        // doing.
+        $this->collections['Unsafe'] = array();
+        
+        // Modules to import if lenient mode (attempt to convert everything
+        // to a valid representation) is on. These must not be in $validModules
+        // unless specified so.
+        $this->collections['Lenient'] = array(
+            'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+            'XHTML 1.0 Strict' => array('TransformToStrict'),
+            'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
+        );
+        
+        // Modules to import if correctional mode (correct everything that
+        // is feasible to strict mode) is on. These must not be in $validModules
+        // unless specified so.
+        $this->collections['Correctional'] = array(
+            'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+            'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
+        );
+        
+        // User-space modules, custom code or whatever
+        $this->collections['Extension'] = array();
+        
+        // setup active versus valid modules. ORDER IS IMPORTANT!
+        // definition modules
+        $this->makeCollectionActive('Safe');
+        $this->makeCollectionValid('Unsafe');
+        // redefinition modules
+        $this->makeCollectionActive('Lenient');
+        $this->makeCollectionActive('Correctional');
+        
+        $this->autoDoctype    = '*';
+        $this->autoCollection = 'Extension';
+        
+    }
+    
+    /**
+     * Adds a module to the recognized module list. This does not
+     * do anything else: the module must be added to a corresponding
+     * collection to be "activated".
+     * @param $module Mixed: string module name, with or without
+     *                HTMLPurifier_HTMLModule prefix, or instance of
+     *                subclass of HTMLPurifier_HTMLModule.
+     */
+    function addModule($module) {
+        if (is_string($module)) {
+            $original_module = $module;
+            if (!class_exists($module)) {
+                foreach ($this->prefixes as $prefix) {
+                    $module = $prefix . $original_module;
+                    if (class_exists($module)) break;
+                }
+            }
+            if (!class_exists($module)) {
+                trigger_error($original_module . ' module does not exist',
+                    E_USER_ERROR);
+                return;
+            }
+            $module = new $module();
+        }
+        $module->order = $this->counter++; // assign then increment
+        $this->modules[$module->name] = $module;
+        if ($this->autoDoctype !== false && $this->autoCollection !== false) {
+            $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
+        }
+    }
+    
+    /**
+     * Makes a collection active, while also making it valid if not
+     * already done so. See $activeModules for the semantics of "active".
+     * @param $collection_name Name of collection to activate
+     */
+    function makeCollectionActive($collection_name) {
+        if (!in_array($collection_name, $this->validCollections)) {
+            $this->makeCollectionValid($collection_name);
+        }
+        $this->activeCollections[] = $collection_name;
+    }
+    
+    /**
+     * Makes a collection valid. See $validModules for the semantics of "valid"
+     */
+    function makeCollectionValid($collection_name) {
+        $this->validCollections[] = $collection_name;
+    }
+    
+    /**
+     * Adds a class prefix that addModule() will use to resolve a
+     * string name to a concrete class
+     */
+    function addPrefix($prefix) {
+        $this->prefixes[] = (string) $prefix;
+    }
+    
+    function setup($config) {
+        
+        // load up the autocollection
+        if ($this->autoCollection !== false) {
+            $this->makeCollectionActive($this->autoCollection);
+        }
+        
+        // retrieve the doctype
+        $this->doctype = $this->getDoctype($config);
+        if (isset($this->doctypeAliases[$this->doctype])) {
+            $this->doctype = $this->doctypeAliases[$this->doctype];
+        }
+        
+        // process module collections to module name => module instance form
+        foreach ($this->collections as $col_i => $x) {
+            $this->processCollections($this->collections[$col_i]);
+        }
+        
+        $this->validModules  = $this->assembleModules($this->validCollections);
+        $this->activeModules = $this->assembleModules($this->activeCollections);
+        
+        // setup lookup table based on all valid modules
+        foreach ($this->validModules as $module) {
+            foreach ($module->info as $name => $def) {
+                if (!isset($this->elementLookup[$name])) {
+                    $this->elementLookup[$name] = array();
+                }
+                $this->elementLookup[$name][] = $module->name;
+            }
+        }
+        
+        // note the different choice
+        $this->contentSets = new HTMLPurifier_ContentSets(
+            // content models that contain non-allowed elements are 
+            // harmless because RemoveForeignElements will ensure
+            // they never get in anyway, and there is usually no
+            // reason why you should want to restrict a content
+            // model beyond what is mandated by the doctype.
+            // Note, however, that this means redefinitions of
+            // content models can't be tossed in validModels willy-nilly:
+            // that stuff still is regulated by configuration.
+            $this->validModules
+        );
+        $this->attrCollections = new HTMLPurifier_AttrCollections(
+            $this->attrTypes,
+            // only explicitly allowed modules are allowed to affect
+            // the global attribute collections. This mean's there's
+            // a distinction between loading the Bdo module, and the
+            // bdo element: Bdo will enable the dir attribute on all
+            // elements, while bdo will only define the bdo element,
+            // which will not have an editable directionality. This might
+            // catch people who are loading only elements by surprise, so
+            // we should consider loading an entire module if all the
+            // elements it defines are requested by the user, especially
+            // if it affects the global attribute collections.
+            $this->activeModules
+        );
+        
+    }
+    
+    /**
+     * Takes a list of collections and merges together all the defined
+     * modules for the current doctype from those collections.
+     * @param $collections List of collection suffixes we should grab
+     *                     modules from (like 'Safe' or 'Lenient')
+     */
+    function assembleModules($collections) {
+        $modules = array();
+        $numOfCollectionsUsed = 0;
+        foreach ($collections as $name) {
+            $disable_global = false;
+            if (!isset($this->collections[$name])) {
+                trigger_error("$name collection is undefined", E_USER_ERROR);
+                continue;
+            }
+            $cols = $this->collections[$name];
+            if (isset($cols[$this->doctype])) {
+                if (isset($cols[$this->doctype]['*'])) {
+                    unset($cols[$this->doctype]['*']);
+                    $disable_global = true;
+                }
+                $modules += $cols[$this->doctype];
+                $numOfCollectionsUsed++;
+            }
+            // accept catch-all doctype
+            if (
+                $this->doctype !== '*' && 
+                isset($cols['*']) &&
+                !$disable_global
+            ) {
+                $modules += $cols['*'];
+            }
+        }
+        
+        if ($numOfCollectionsUsed < 1) {
+            // possible XSS injection if user-specified doctypes
+            // are allowed
+            trigger_error("Doctype {$this->doctype} does not exist, ".
+                "check for typos (if you desire a doctype that allows ".
+                "no elements, use an empty array collection)", E_USER_ERROR);
+        }
+        return $modules;
+    }
+    
+    /**
+     * Takes a collection and performs inclusions and substitutions for it.
+     * @param $cols Reference to collections class member variable
+     */
+    function processCollections(&$cols) {
+        
+        // $cols is the set of collections
+        // $col_i is the name (index) of a collection
+        // $col is a collection/list of modules
+        
+        // perform inclusions
+        foreach ($cols as $col_i => $col) {
+            $seen = array();
+            if (!empty($col[0]) && is_array($col[0])) {
+                $seen[$col_i] = true; // recursion reporting
+                $includes = $col[0];
+                unset($cols[$col_i][0]); // remove inclusions value, recursion guard
+            } else {
+                $includes = array();
+            }
+            if (empty($includes)) continue;
+            for ($i = 0; isset($includes[$i]); $i++) {
+                $inc = $includes[$i];
+                if (isset($seen[$inc])) {
+                    trigger_error(
+                        "Circular inclusion detected in $col_i collection",
+                        E_USER_ERROR
+                    );
+                    continue;
+                } else {
+                    $seen[$inc] = true;
+                }
+                if (!isset($cols[$inc])) {
+                    trigger_error(
+                        "Collection $col_i tried to include undefined ".
+                        "collection $inc", E_USER_ERROR);
+                    continue;
+                }
+                foreach ($cols[$inc] as $module) {
+                    if (is_array($module)) { // another inclusion!
+                        foreach ($module as $inc2) $includes[] = $inc2;
+                        continue;
+                    }
+                    $cols[$col_i][] = $module; // merge in the other modules
+                }
+            }
+        }
+        
+        // replace with real modules, invert module from list to
+        // assoc array of module name to module instance
+        foreach ($cols as $col_i => $col) {
+            $ignore_global = false;
+            $order = array();
+            foreach ($col as $module_i => $module) {
+                unset($cols[$col_i][$module_i]);
+                if (is_array($module)) {
+                    trigger_error("Illegal inclusion array at index".
+                        " $module_i found collection $col_i, inclusion".
+                        " arrays must be at start of collection (index 0)",
+                        E_USER_ERROR);
+                    continue;
+                }
+                if ($module_i === '*' && $module === false) {
+                    $ignore_global = true;
+                    continue;
+                }
+                if (!isset($this->modules[$module])) {
+                    trigger_error(
+                        "Collection $col_i references undefined ".
+                        "module $module",
+                        E_USER_ERROR
+                    );
+                    continue;
+                }
+                $module = $this->modules[$module];
+                $cols[$col_i][$module->name] = $module;
+                $order[$module->name] = $module->order;
+            }
+            array_multisort(
+                $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
+            );
+            if ($ignore_global) $cols[$col_i]['*'] = false;
+        }
+        
+        // delete pseudo-collections
+        foreach ($cols as $col_i => $col) {
+            if ($col_i[0] == '_') unset($cols[$col_i]);
+        }
+        
+    }
+    
+    /**
+     * Retrieves the doctype from the configuration object
+     */
+    function getDoctype($config) {
+        $doctype = $config->get('HTML', 'Doctype');
+        if ($doctype !== null) {
+            return $doctype;
+        }
+        if (!$this->initialized) {
+            // don't do HTML-oriented backwards compatibility stuff
+            // use either the auto-doctype, or the catch-all doctype
+            return $this->autoDoctype ? $this->autoDoctype : '*';
+        }
+        // this is backwards-compatibility stuff
+        if ($config->get('Core', 'XHTML')) {
+            $doctype = 'XHTML 1.0';
+        } else {
+            $doctype = 'HTML 4.01';
+        }
+        if ($config->get('HTML', 'Strict')) {
+            $doctype .= ' Strict';
+        } else {
+            $doctype .= ' Transitional';
+        }
+        return $doctype;
+    }
+    
+    /**
+     * Retrieves merged element definitions for all active elements.
+     * @note We may want to generate an elements array during setup
+     *       and pass that on, because a specific combination of
+     *       elements may trigger the loading of a module.
+     * @param $config Instance of HTMLPurifier_Config, for determining
+     *                stray elements.
+     */
+    function getElements($config) {
+        
+        $elements = array();
+        foreach ($this->activeModules as $module) {
+            foreach ($module->elements as $name) {
+                $elements[$name] = $this->getElement($name, $config);
+            }
+        }
+        
+        // standalone elements now loaded
+        
+        return $elements;
+        
+    }
+    
+    /**
+     * Retrieves a single merged element definition
+     * @param $name Name of element
+     * @param $config Instance of HTMLPurifier_Config, may not be necessary.
+     */
+    function getElement($name, $config) {
+        
+        $def = false;
+        
+        $modules = $this->validModules;
+        
+        if (!isset($this->elementLookup[$name])) {
+            return false;
+        }
+        
+        foreach($this->elementLookup[$name] as $module_name) {
+            
+            $module = $modules[$module_name];
+            $new_def = $module->info[$name];
+            
+            if (!$def && $new_def->standalone) {
+                $def = $new_def;
+            } elseif ($def) {
+                $def->mergeIn($new_def);
+            } else {
+                // could "save it for another day":
+                // non-standalone definitions that don't have a standalone
+                // to merge into could be deferred to the end
+                continue;
+            }
+            
+            // attribute value expansions
+            $this->attrCollections->performInclusions($def->attr);
+            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
+            
+            // descendants_are_inline, for ChildDef_Chameleon
+            if (is_string($def->content_model) &&
+                strpos($def->content_model, 'Inline') !== false) {
+                if ($name != 'del' && $name != 'ins') {
+                    // this is for you, ins/del
+                    $def->descendants_are_inline = true;
+                }
+            }
+            
+            $this->contentSets->generateChildDef($def, $module);
+        }
+        
+        return $def;
+        
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/Language.php
+++ b/library/HTMLPurifier/Language.php
@ -0,0 +1,56 @@
+<?php
+
+require_once 'HTMLPurifier/LanguageFactory.php';
+
+class HTMLPurifier_Language
+{
+    
+    /**
+     * ISO 639 language code of language. Prefers shortest possible version
+     */
+    var $code = 'en';
+    
+    /**
+     * Fallback language code
+     */
+    var $fallback = false;
+    
+    /**
+     * Array of localizable messages
+     */
+    var $messages = array();
+    
+    /**
+     * Has the language object been loaded yet?
+     * @private
+     */
+    var $_loaded = false;
+    
+    /**
+     * Loads language object with necessary info from factory cache
+     * @note This is a lazy loader
+     */
+    function load() {
+        if ($this->_loaded) return;
+        $factory = HTMLPurifier_LanguageFactory::instance();
+        $factory->loadLanguage($this->code);
+        foreach ($factory->keys as $key) {
+            $this->$key = $factory->cache[$this->code][$key];
+        }
+        $this->_loaded = true;
+    }
+    
+    /**
+     * Retrieves a localised message. Does not perform any operations.
+     * @param $key string identifier of message
+     * @return string localised message
+     */
+    function getMessage($key) {
+        if (!$this->_loaded) $this->load();
+        if (!isset($this->messages[$key])) return '';
+        return $this->messages[$key];
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/Language/classes/en-x-test.php
+++ b/library/HTMLPurifier/Language/classes/en-x-test.php
@ -0,0 +1,12 @@
+<?php
+
+// private class for unit testing
+
+class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language
+{
+    
+    
+    
+}
+
+?>
--- a/library/HTMLPurifier/Language/messages/en-x-test.php
+++ b/library/HTMLPurifier/Language/messages/en-x-test.php
@ -0,0 +1,11 @@
+<?php
+
+// private language message file for unit testing purposes
+
+$fallback = 'en';
+
+$messages = array(
+    'htmlpurifier' => 'HTML Purifier X'
+);
+
+?>
--- a/library/HTMLPurifier/Language/messages/en.php
+++ b/library/HTMLPurifier/Language/messages/en.php
@ -0,0 +1,12 @@
+<?php
+
+$fallback = false;
+
+$messages = array(
+
+'htmlpurifier' => 'HTML Purifier',
+'pizza' => 'Pizza', // for unit testing purposes
+
+);
+
+?>
--- a/library/HTMLPurifier/LanguageFactory.php
+++ b/library/HTMLPurifier/LanguageFactory.php
@ -0,0 +1,196 @@
+<?php
+
+require_once 'HTMLPurifier/Language.php';
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+
+/**
+ * Class responsible for generating HTMLPurifier_Language objects, managing
+ * caching and fallbacks.
+ * @note Thanks to MediaWiki for the general logic, although this version
+ *       has been entirely rewritten
+ */
+class HTMLPurifier_LanguageFactory
+{
+    
+    /**
+     * Cache of language code information used to load HTMLPurifier_Language objects
+     * Structure is: $factory->cache[$language_code][$key] = $value
+     * @value array map
+     */
+    var $cache;
+    
+    /**
+     * Valid keys in the HTMLPurifier_Language object. Designates which
+     * variables to slurp out of a message file.
+     * @value array list
+     */
+    var $keys = array('fallback', 'messages');
+    
+    /**
+     * Instance of HTMLPurifier_AttrDef_Lang to validate language codes
+     * @value object HTMLPurifier_AttrDef_Lang
+     */
+    var $validator;
+    
+    /**
+     * Cached copy of dirname(__FILE__), directory of current file without
+     * trailing slash
+     * @value string filename
+     */
+    var $dir;
+    
+    /**
+     * Keys whose contents are a hash map and can be merged
+     * @value array lookup
+     */
+    var $mergeable_keys_map = array('messages' => true);
+    
+    /**
+     * Keys whose contents are a list and can be merged
+     * @value array lookup
+     */
+    var $mergeable_keys_list = array();
+    
+    /**
+     * Retrieve sole instance of the factory.
+     * @static
+     * @param $prototype Optional prototype to overload sole instance with,
+     *                   or bool true to reset to default factory.
+     */
+    static function &instance($prototype = null) {
+        static $instance = null;
+        if ($prototype !== null) {
+            $instance = $prototype;
+        } elseif ($instance === null || $prototype == true) {
+            $instance = new HTMLPurifier_LanguageFactory();
+            $instance->setup();
+        }
+        return $instance;
+    }
+    
+    /**
+     * Sets up the singleton, much like a constructor
+     * @note Prevents people from getting this outside of the singleton
+     */
+    function setup() {
+        $this->validator = new HTMLPurifier_AttrDef_Lang();
+        $this->dir = dirname(__FILE__);
+    }
+    
+    /**
+     * Creates a language object, handles class fallbacks
+     * @param $code string language code
+     */
+    function create($code) {
+        
+        $config = $context = false; // hope it doesn't use these!
+        $code = $this->validator->validate($code, $config, $context);
+        if ($code === false) $code = 'en'; // malformed code becomes English
+        
+        $pcode = str_replace('-', '_', $code); // make valid PHP classname
+        static $depth = 0; // recursion protection
+        
+        if ($code == 'en') {
+            $class = 'HTMLPurifier_Language';
+            $file  = $this->dir . '/Language.php';
+        } else {
+            $class = 'HTMLPurifier_Language_' . $pcode;
+            $file  = $this->dir . '/Language/classes/' . $code . '.php';
+            // PHP5/APC deps bug workaround can go here
+            // you can bypass the conditional include by loading the
+            // file yourself
+            if (file_exists($file) && !class_exists($class)) {
+				include_once $file;
+			}
+        }
+        
+        if (!class_exists($class)) {
+            // go fallback
+            $fallback = HTMLPurifier_Language::getFallbackFor($code);
+            $depth++;
+            $lang = Language::factory( $fallback );
+            $depth--;
+        } else {
+            $lang = new $class;
+        }
+        $lang->code = $code;
+        
+        return $lang;
+        
+    }
+    
+    /**
+     * Returns the fallback language for language
+     * @note Loads the original language into cache
+     * @param $code string language code
+     */
+    function getFallbackFor($code) {
+        $this->loadLanguage($code);
+        return $this->cache[$code]['fallback'];
+    }
+    
+    /**
+     * Loads language into the cache, handles message file and fallbacks
+     * @param $code string language code
+     */
+    function loadLanguage($code) {
+        static $languages_seen = array(); // recursion guard
+        
+        // abort if we've already loaded it
+        if (isset($this->cache[$code])) return;
+        
+        // generate filename
+        $filename = $this->dir . '/Language/messages/' . $code . '.php';
+        
+        // default fallback : may be overwritten by the ensuing include
+        $fallback = ($code != 'en') ? 'en' : false;
+        
+        // load primary localisation
+        if (!file_exists($filename)) {
+            // skip the include: will rely solely on fallback
+            $filename = $this->dir . '/Language/messages/en.php';
+            $cache = array();
+        } else {
+            include $filename;
+            $cache = compact($this->keys);
+        }
+        
+        // load fallback localisation
+        if (!empty($fallback)) {
+            
+            // infinite recursion guard
+            if (isset($languages_seen[$code])) {
+                trigger_error('Circular fallback reference in language ' .
+                    $code, E_USER_ERROR);
+                $fallback = 'en';
+            }
+            $language_seen[$code] = true;
+            
+            // load the fallback recursively
+            $this->loadLanguage($fallback);
+            $fallback_cache = $this->cache[$fallback];
+            
+            // merge fallback with current language
+            foreach ( $this->keys as $key ) {
+				if (isset($cache[$key]) && isset($fallback_cache[$key])) {
+                    if (isset($this->mergeable_keys_map[$key])) {
+                        $cache[$key] = $cache[$key] + $fallback_cache[$key];
+                    } elseif (isset($this->mergeable_keys_list[$key])) {
+                        $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
+                    }
+				} else {
+					$cache[$key] = $fallback_cache[$key];
+				}
+            }
+            
+        }
+        
+        // save to cache for later retrieval
+        $this->cache[$code] = $cache;
+        
+        return;
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@ -151,7 +151,8 @@ class HTMLPurifier_Lexer
            $lexer = $prototype;
        }
        if (empty($lexer)) {
-            if (class_exists('DOMDocument')) { // check for DOM support
+            if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
+                class_exists('DOMDocument')) { // check for DOM support
                require_once 'HTMLPurifier/Lexer/DOMLex.php';
                $lexer = new HTMLPurifier_Lexer_DOMLex();
            } else {
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@ -21,7 +21,7 @@ require_once 'HTMLPurifier/TokenFactory.php';
 * 
 * @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
 *          If this is a huge problem, due to the fact that HTML is hand
- *          edited and youa re unable to get a parser cache that caches the
+ *          edited and you are unable to get a parser cache that caches the
 *          the output of HTML Purifier while keeping the original HTML lying
 *          around, you may want to run Tidy on the resulting output or use
 *          HTMLPurifier_DirectLex
@ -54,7 +54,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        
        $doc = new DOMDocument();
        $doc->encoding = 'UTF-8'; // technically does nothing, but whatever
-        @$doc->loadHTML($string); // mute all errors, handle it transparently
+        
+        // DOM will toss errors if the HTML its parsing has really big
+        // problems, so we're going to mute them. This can cause problems
+        // if a custom error handler that doesn't implement error_reporting
+        // is set, as noted by a Drupal plugin of HTML Purifier. Consider
+        // making our own error reporter to temporarily load in
+        @$doc->loadHTML($string);
        
        $tokens = array();
        $this->tokenizeDOM(
--- a/library/HTMLPurifier/Printer/HTMLDefinition.php
+++ b/library/HTMLPurifier/Printer/HTMLDefinition.php
@ -13,6 +13,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
    function render($config) {
        $ret = '';
        $this->config =& $config;
+        
        $this->def = $config->getHTMLDefinition();
        $def =& $this->def;
        
@ -21,16 +22,14 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
        $ret .= $this->element('caption', 'Environment');
        
        $ret .= $this->row('Parent of fragment', $def->info_parent);
-        $ret .= $this->row('Strict mode', $def->strict);
-        if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
+        $ret .= $this->renderChildren($def->info_parent_def->child);
+        $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
        
        $ret .= $this->start('tr');
            $ret .= $this->element('th', 'Global attributes');
            $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
        $ret .= $this->end('tr');
        
-        $ret .= $this->renderChildren($def->info_parent_def->child);
-        
        $ret .= $this->start('tr');
            $ret .= $this->element('th', 'Tag transforms');
            $list = array();
@ -81,8 +80,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
                $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
            $ret .= $this->end('tr');
            $ret .= $this->start('tr');
-                $ret .= $this->element('th', 'Type');
-                $ret .= $this->element('td', ucfirst($def->type));
+                $ret .= $this->element('th', 'Inline content');
+                $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
            $ret .= $this->end('tr');
            if (!empty($def->excludes)) {
                $ret .= $this->start('tr');
@ -130,15 +129,17 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
            $elements = array();
            $attr = array();
            if (isset($def->elements)) {
-                if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
+                if ($def->type == 'strictblockquote') {
+                    $def->validateChildren(array(), $this->config, $context);
+                }
                $elements = $def->elements;
            } elseif ($def->type == 'chameleon') {
                $attr['rowspan'] = 2;
            } elseif ($def->type == 'empty') {
                $elements = array();
            } elseif ($def->type == 'table') {
-                $elements = array('col', 'caption', 'colgroup', 'thead',
-                    'tfoot', 'tbody', 'tr');
+                $elements = array_flip(array('col', 'caption', 'colgroup', 'thead',
+                    'tfoot', 'tbody', 'tr'));
            }
            $ret .= $this->element('th', 'Allowed children', $attr);
            
@ -167,6 +168,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
     * @param $array Tag lookup array in form of array('tagname' => true)
     */
    function listifyTagLookup($array) {
+        ksort($array);
        $list = array();
        foreach ($array as $name => $discard) {
            if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
@ -181,6 +183,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
     * @todo Also add information about internal state
     */
    function listifyObjectList($array) {
+        ksort($array);
        $list = array();
        foreach ($array as $discard => $obj) {
            $list[] = $this->getClass($obj, 'AttrTransform_');
@ -193,6 +196,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
     * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
     */
    function listifyAttr($array) {
+        ksort($array);
        $list = array();
        foreach ($array as $name => $obj) {
            if ($obj === false) continue;
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
        $tokens[] = new HTMLPurifier_Token_End($parent_name);
        
        // setup the context variables
-        $parent_type = 'unknown'; // reference var that we alter
-        $context->register('ParentType', $parent_type);
+        $is_inline = false; // reference var that we alter
+        $context->register('IsInline', $is_inline);
        
        //####################################################################//
        // Loop initialization
@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
            }
            
            // calculate context
-            if (isset($parent_def)) {
-                $parent_type = $parent_def->type;
+            if ($is_inline === false) {
+                // check if conditions make it inline
+                if (!empty($parent_def) && $parent_def->descendants_are_inline) {
+                    $is_inline = $count - 1;
+                }
            } else {
-                // generally found in specialized elements like UL
-                $parent_type = 'unknown';
+                // check if we're out of inline
+                if ($count === $is_inline) {
+                    $is_inline = false;
+                }
            }
            
            //################################################################//
@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
        array_pop($tokens);
        
        // remove context variables
-        $context->destroy('ParentType');
+        $context->destroy('IsInline');
        
        //####################################################################//
        // Return
--- a/library/HTMLPurifier/TagTransform.php
+++ b/library/HTMLPurifier/TagTransform.php
@ -1,6 +1,6 @@
 <?php

-require_once('HTMLPurifier/Token.php');
+require_once 'HTMLPurifier/Token.php';

 /**
 * Defines a mutation of an obsolete tag into a valid tag.
@ -26,132 +26,4 @@ class HTMLPurifier_TagTransform
    
 }

-/**
- * Simple transformation, just change tag name to something else.
- */
-class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
-{
-    
-    /**
-     * @param $transform_to Tag name to transform to.
-     */
-    function HTMLPurifier_TagTransform_Simple($transform_to) {
-        $this->transform_to = $transform_to;
-    }
-    
-    function transform($tag, $config, &$context) {
-        $new_tag = $tag->copy();
-        $new_tag->name = $this->transform_to;
-        return $new_tag;
-    }
-    
-}
-
-/**
- * Transforms CENTER tags into proper version (DIV with text-align CSS)
- * 
- * Takes a CENTER tag, parses the align attribute, and then if it's valid
- * assigns it to the CSS property text-align.
- */
-class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
-{
-    var $transform_to = 'div';
-    
-    function transform($tag, $config, &$context) {
-        if ($tag->type == 'end') {
-            $new_tag = new HTMLPurifier_Token_End($this->transform_to);
-            return $new_tag;
-        }
-        $attr = $tag->attr;
-        $prepend_css = 'text-align:center;';
-        if (isset($attr['style'])) {
-            $attr['style'] = $prepend_css . $attr['style'];
-        } else {
-            $attr['style'] = $prepend_css;
-        }
-        $new_tag = $tag->copy();
-        $new_tag->name = $this->transform_to;
-        $new_tag->attr = $attr;
-        return $new_tag;
-    }
-}
-
-/**
- * Transforms FONT tags to the proper form (SPAN with CSS styling)
- * 
- * This transformation takes the three proprietary attributes of FONT and
- * transforms them into their corresponding CSS attributes.  These are color,
- * face, and size.
- * 
- * @note Size is an interesting case because it doesn't map cleanly to CSS.
- *       Thanks to
- *       http://style.cleverchimp.com/font_size_intervals/altintervals.html
- *       for reasonable mappings.
- */
-class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
-{
-    
-    var $transform_to = 'span';
-    
-    var $_size_lookup = array(
-        '1' => 'xx-small',
-        '2' => 'small',
-        '3' => 'medium',
-        '4' => 'large',
-        '5' => 'x-large',
-        '6' => 'xx-large',
-        '7' => '300%',
-        '-1' => 'smaller',
-        '+1' => 'larger',
-        '-2' => '60%',
-        '+2' => '150%',
-        '+4' => '300%'
-    );
-    
-    function transform($tag, $config, &$context) {
-        
-        if ($tag->type == 'end') {
-            $new_tag = new HTMLPurifier_Token_End($this->transform_to);
-            return $new_tag;
-        }
-        
-        $attr = $tag->attr;
-        $prepend_style = '';
-        
-        // handle color transform
-        if (isset($attr['color'])) {
-            $prepend_style .= 'color:' . $attr['color'] . ';';
-            unset($attr['color']);
-        }
-        
-        // handle face transform
-        if (isset($attr['face'])) {
-            $prepend_style .= 'font-family:' . $attr['face'] . ';';
-            unset($attr['face']);
-        }
-        
-        // handle size transform
-        if (isset($attr['size'])) {
-            if (isset($this->_size_lookup[$attr['size']])) {
-                $prepend_style .= 'font-size:' .
-                  $this->_size_lookup[$attr['size']] . ';';
-            }
-            unset($attr['size']);
-        }
-        
-        if ($prepend_style) {
-            $attr['style'] = isset($attr['style']) ?
-                $prepend_style . $attr['style'] :
-                $prepend_style;
-        }
-        
-        $new_tag = $tag->copy();
-        $new_tag->name = $this->transform_to;
-        $new_tag->attr = $attr;
-        
-        return $new_tag;
-        
-    }
-}
-
 ?>
--- a/library/HTMLPurifier/TagTransform/Center.php
+++ b/library/HTMLPurifier/TagTransform/Center.php
@ -0,0 +1,34 @@
+<?php
+
+require_once 'HTMLPurifier/TagTransform.php';
+
+/**
+ * Transforms CENTER tags into proper version (DIV with text-align CSS)
+ * 
+ * Takes a CENTER tag, parses the align attribute, and then if it's valid
+ * assigns it to the CSS property text-align.
+ */
+class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
+{
+    var $transform_to = 'div';
+    
+    function transform($tag, $config, &$context) {
+        if ($tag->type == 'end') {
+            $new_tag = new HTMLPurifier_Token_End($this->transform_to);
+            return $new_tag;
+        }
+        $attr = $tag->attr;
+        $prepend_css = 'text-align:center;';
+        if (isset($attr['style'])) {
+            $attr['style'] = $prepend_css . $attr['style'];
+        } else {
+            $attr['style'] = $prepend_css;
+        }
+        $new_tag = $tag->copy();
+        $new_tag->name = $this->transform_to;
+        $new_tag->attr = $attr;
+        return $new_tag;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/TagTransform/Font.php
+++ b/library/HTMLPurifier/TagTransform/Font.php
@ -0,0 +1,83 @@
+<?php
+
+require_once 'HTMLPurifier/TagTransform.php';
+
+/**
+ * Transforms FONT tags to the proper form (SPAN with CSS styling)
+ * 
+ * This transformation takes the three proprietary attributes of FONT and
+ * transforms them into their corresponding CSS attributes.  These are color,
+ * face, and size.
+ * 
+ * @note Size is an interesting case because it doesn't map cleanly to CSS.
+ *       Thanks to
+ *       http://style.cleverchimp.com/font_size_intervals/altintervals.html
+ *       for reasonable mappings.
+ */
+class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
+{
+    
+    var $transform_to = 'span';
+    
+    var $_size_lookup = array(
+        '1' => 'xx-small',
+        '2' => 'small',
+        '3' => 'medium',
+        '4' => 'large',
+        '5' => 'x-large',
+        '6' => 'xx-large',
+        '7' => '300%',
+        '-1' => 'smaller',
+        '+1' => 'larger',
+        '-2' => '60%',
+        '+2' => '150%',
+        '+4' => '300%'
+    );
+    
+    function transform($tag, $config, &$context) {
+        
+        if ($tag->type == 'end') {
+            $new_tag = new HTMLPurifier_Token_End($this->transform_to);
+            return $new_tag;
+        }
+        
+        $attr = $tag->attr;
+        $prepend_style = '';
+        
+        // handle color transform
+        if (isset($attr['color'])) {
+            $prepend_style .= 'color:' . $attr['color'] . ';';
+            unset($attr['color']);
+        }
+        
+        // handle face transform
+        if (isset($attr['face'])) {
+            $prepend_style .= 'font-family:' . $attr['face'] . ';';
+            unset($attr['face']);
+        }
+        
+        // handle size transform
+        if (isset($attr['size'])) {
+            if (isset($this->_size_lookup[$attr['size']])) {
+                $prepend_style .= 'font-size:' .
+                  $this->_size_lookup[$attr['size']] . ';';
+            }
+            unset($attr['size']);
+        }
+        
+        if ($prepend_style) {
+            $attr['style'] = isset($attr['style']) ?
+                $prepend_style . $attr['style'] :
+                $prepend_style;
+        }
+        
+        $new_tag = $tag->copy();
+        $new_tag->name = $this->transform_to;
+        $new_tag->attr = $attr;
+        
+        return $new_tag;
+        
+    }
+}
+
+?>
--- a/library/HTMLPurifier/TagTransform/Simple.php
+++ b/library/HTMLPurifier/TagTransform/Simple.php
@ -0,0 +1,26 @@
+<?php
+
+require_once 'HTMLPurifier/TagTransform.php';
+
+/**
+ * Simple transformation, just change tag name to something else.
+ */
+class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
+{
+    
+    /**
+     * @param $transform_to Tag name to transform to.
+     */
+    function HTMLPurifier_TagTransform_Simple($transform_to) {
+        $this->transform_to = $transform_to;
+    }
+    
+    function transform($tag, $config, &$context) {
+        $new_tag = $tag->copy();
+        $new_tag->name = $this->transform_to;
+        return $new_tag;
+    }
+    
+}
+
+?>
--- a/library/HTMLPurifier/URISchemeRegistry.php
+++ b/library/HTMLPurifier/URISchemeRegistry.php
@ -10,7 +10,7 @@ HTMLPurifier_ConfigSchema::define(
        'irc'   => true, // "Internet Relay Chat", usually needs another app
        // for Usenet, these two are similar, but distinct
        'nntp'  => true, // individual Netnews articles
-        'news'  => true  // newsgroup or individual Netnews articles),
+        'news'  => true  // newsgroup or individual Netnews articles
    ), 'lookup',
    'Whitelist that defines the schemes that a URI is allowed to have.  This '.
    'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
--- a/smoketests/printDefinition.php
+++ b/smoketests/printDefinition.php
@ -22,6 +22,17 @@ foreach ($_GET as $key => $value) {

@$config->loadArray($get);

+/* // sample local definition, obviously needs to be less clunky
+$html_definition =& $config->getHTMLDefinition(true);
+$module = new HTMLPurifier_HTMLModule();
+$module->name = 'Marquee';
+$module->info['marquee'] = new HTMLPurifier_ElementDef();
+$module->info['marquee']->content_model = '#PCDATA | Inline';
+$module->info['marquee']->content_model_type = 'optional';
+$module->content_sets = array('Inline' => 'marquee');
+$html_definition->manager->addModule($module);
+*/
+
 $printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
 $printer_css_definition  = new HTMLPurifier_Printer_CSSDefinition();

--- a/tests/HTMLPurifier/AttrDef/CSS/BackgroundPositionTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BackgroundPositionTest.php
@ -1,14 +1,14 @@
 <?php

 require_once 'HTMLPurifier/AttrDefHarness.php';
-require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php';
+require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';

-class HTMLPurifier_AttrDef_BackgroundPositionTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_BackgroundPositionTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_BackgroundPosition();
+        $this->def = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
        
        // explicitly cited in spec
        $this->assertDef('0% 0%');
--- a/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php
@ -1,14 +1,15 @@
 <?php

 require_once 'HTMLPurifier/AttrDefHarness.php';
-require_once 'HTMLPurifier/AttrDef/Background.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Background.php';

-class HTMLPurifier_AttrDef_BackgroundTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_Background(HTMLPurifier_Config::createDefault());
+        $config = HTMLPurifier_Config::createDefault();
+        $this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
        
        $valid = '#333 url(chess.png) repeat fixed 50% top';
        $this->assertDef($valid);
--- a/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php
@ -1,14 +1,14 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/Border.php';
-require_once 'HTMLPurifier/AttrDef/PixelsTest.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Border.php';

-class HTMLPurifier_AttrDef_BorderTest extends HTMLPurifier_AttrDef_PixelsTest
+class HTMLPurifier_AttrDef_CSS_BorderTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_Border(HTMLPurifier_Config::createDefault());
+        $config = HTMLPurifier_Config::createDefault();
+        $this->def = new HTMLPurifier_AttrDef_CSS_Border($config);
        
        $this->assertDef('thick solid red', 'thick solid #F00');
        $this->assertDef('thick solid');
--- a/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php
@ -1,14 +1,14 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/Color.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
 require_once 'HTMLPurifier/AttrDefHarness.php';

-class HTMLPurifier_AttrDef_ColorTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_ColorTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_Color();
+        $this->def = new HTMLPurifier_AttrDef_CSS_Color();
        
        $this->assertDef('#F00');
        $this->assertDef('#808080');
--- a/tests/HTMLPurifier/AttrDef/CSS/CompositeTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/CompositeTest.php
@ -1,20 +1,20 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/Composite.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
 require_once 'HTMLPurifier/AttrDefHarness.php';

-class HTMLPurifier_AttrDef_Composite_Testable extends
-      HTMLPurifier_AttrDef_Composite
+class HTMLPurifier_AttrDef_CSS_Composite_Testable extends
+      HTMLPurifier_AttrDef_CSS_Composite
 {
    
    // we need to pass by ref to get the mocks in
-    function HTMLPurifier_AttrDef_Composite_Testable(&$defs) {
+    function HTMLPurifier_AttrDef_CSS_Composite_Testable(&$defs) {
        $this->defs =& $defs;
    }
    
 }

-class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
 {
    
    var $def1, $def2;
@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
        $def1 = new HTMLPurifier_AttrDefMock($this);
        $def2 = new HTMLPurifier_AttrDefMock($this);
        $defs = array(&$def1, &$def2);
-        $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+        $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
        $input = 'FOOBAR';
        $output = 'foobar';
        $def1_params = array($input, $config, $context);
@ -51,7 +51,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
        $def1 = new HTMLPurifier_AttrDefMock($this);
        $def2 = new HTMLPurifier_AttrDefMock($this);
        $defs = array(&$def1, &$def2);
-        $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+        $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
        $input = 'BOOMA';
        $output = 'booma';
        $def_params = array($input, $config, $context);
@ -71,7 +71,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
        $def1 = new HTMLPurifier_AttrDefMock($this);
        $def2 = new HTMLPurifier_AttrDefMock($this);
        $defs = array(&$def1, &$def2);
-        $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+        $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
        $input = 'BOOMA';
        $output = false;
        $def_params = array($input, $config, $context);
--- a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
@ -1,14 +1,14 @@
 <?php

 require_once 'HTMLPurifier/AttrDefHarness.php';
-require_once 'HTMLPurifier/AttrDef/FontFamily.php';
+require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';

-class HTMLPurifier_AttrDef_FontFamilyTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_FontFamily();
+        $this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
        
        $this->assertDef('Gill, Helvetica, sans-serif');
        $this->assertDef('\'Times New Roman\', serif');
--- a/tests/HTMLPurifier/AttrDef/CSS/FontTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontTest.php
@ -1,14 +1,15 @@
 <?php

 require_once 'HTMLPurifier/AttrDefHarness.php';
-require_once 'HTMLPurifier/AttrDef/Font.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Font.php';

-class HTMLPurifier_AttrDef_FontTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_Font(HTMLPurifier_Config::createDefault());
+        $config = HTMLPurifier_Config::createDefault();
+        $this->def = new HTMLPurifier_AttrDef_CSS_Font($config);
        
        // hodgepodge of usage cases from W3C spec, but " -> '
        $this->assertDef('12px/14px sans-serif');
--- a/tests/HTMLPurifier/AttrDef/CSS/LengthTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/LengthTest.php
@ -1,14 +1,14 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/CSSLength.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
 require_once 'HTMLPurifier/AttrDefHarness.php';

-class HTMLPurifier_AttrDef_CSSLengthTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_LengthTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_CSSLength();
+        $this->def = new HTMLPurifier_AttrDef_CSS_Length();
        
        $this->assertDef('0');
        $this->assertDef('0px');
@ -31,7 +31,7 @@ class HTMLPurifier_AttrDef_CSSLengthTest extends HTMLPurifier_AttrDefHarness
    
    function testNonNegative() {
        
-        $this->def = new HTMLPurifier_AttrDef_CSSLength(true);
+        $this->def = new HTMLPurifier_AttrDef_CSS_Length(true);
        
        $this->assertDef('3cm');
        $this->assertDef('-3mm', false);
--- a/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php
@ -1,14 +1,15 @@
 <?php

 require_once 'HTMLPurifier/AttrDefHarness.php';
-require_once 'HTMLPurifier/AttrDef/ListStyle.php';
+require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';

-class HTMLPurifier_AttrDef_ListStyleTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
        
-        $this->def = new HTMLPurifier_AttrDef_ListStyle(HTMLPurifier_Config::createDefault());
+        $config = HTMLPurifier_Config::createDefault();
+        $this->def = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
        
        $this->assertDef('lower-alpha');
        $this->assertDef('upper-roman inside');
--- a/tests/HTMLPurifier/AttrDef/CSS/MultipleTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/MultipleTest.php
@ -1,16 +1,16 @@
 <?php

-require_once 'HTMLPurifier/AttrDef/Multiple.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
 require_once 'HTMLPurifier/AttrDefHarness.php';

 // borrowed for the sakes of this test
 require_once 'HTMLPurifier/AttrDef/Integer.php';

-class HTMLPurifier_AttrDef_MultipleTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_MultipleTest extends HTMLPurifier_AttrDefHarness
 {
    
    function test() {
-        $this->def = new HTMLPurifier_AttrDef_Multiple(
+        $this->def = new HTMLPurifier_AttrDef_CSS_Multiple(
            new HTMLPurifier_AttrDef_Integer()
        );
        
--- a/Show More
+++ b/Show More