[1.3.0] Refactored ChildDef classes into their own files

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@558 48356398-32a2-884e-a903-53898d9a118a
2025-03-11 17:18:44 +00:00 · 2006-11-22 18:55:15 +00:00 · 2006-11-22 18:55:15 +00:00 · 3b26e5dc5b
commit 3b26e5dc5b
parent c5ea987069
17 changed files with 645 additions and 556 deletions
--- a/1
+++ b/1
@ -14,6 +14,7 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier

 1.2.1, unknown release date
 (bugfix/minor feature release, may be dropped if 1.2.0 is stable)
+. Refactored ChildDef classes into their own files

 1.2.0, released 2006-11-19
 # ID attributes now disabled by default. New directives:
--- a/library/HTMLPurifier/ChildDef.php
+++ b/library/HTMLPurifier/ChildDef.php
@ -50,391 +50,4 @@ class HTMLPurifier_ChildDef
    }
 }

-/**
- * Custom validation class, accepts DTD child definitions
- * 
- * @warning Currently this class is an all or nothing proposition, that is,
- *          it will only give a bool return value.
- * @note This class is currently not used by any code, although it is unit
- *       tested.
- */
-class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
-{
-    var $type = 'custom';
-    var $allow_empty = false;
-    /**
-     * Allowed child pattern as defined by the DTD
-     */
-    var $dtd_regex;
-    /**
-     * PCRE regex derived from $dtd_regex
-     * @private
-     */
-    var $_pcre_regex;
-    /**
-     * @param $dtd_regex Allowed child pattern from the DTD
-     */
-    function HTMLPurifier_ChildDef_Custom($dtd_regex) {
-        $this->dtd_regex = $dtd_regex;
-        $this->_compileRegex();
-    }
-    /**
-     * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
-     */
-    function _compileRegex() {
-        $raw = str_replace(' ', '', $this->dtd_regex);
-        if ($raw{0} != '(') {
-            $raw = "($raw)";
-        }
-        $reg = str_replace(',', ',?', $raw);
-        $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
-        $this->_pcre_regex = $reg;
-    }
-    function validateChildren($tokens_of_children, $config, &$context) {
-        $list_of_children = '';
-        $nesting = 0; // depth into the nest
-        foreach ($tokens_of_children as $token) {
-            if (!empty($token->is_whitespace)) continue;
-            
-            $is_child = ($nesting == 0); // direct
-            
-            if ($token->type == 'start') {
-                $nesting++;
-            } elseif ($token->type == 'end') {
-                $nesting--;
-            }
-            
-            if ($is_child) {
-                $list_of_children .= $token->name . ',';
-            }
-        }
-        $list_of_children = rtrim($list_of_children, ',');
-        
-        $okay =
-            preg_match(
-                '/^'.$this->_pcre_regex.'$/',
-                $list_of_children
-            );
-        
-        return (bool) $okay;
-    }
-}
-
-/**
- * Definition that allows a set of elements, but disallows empty children.
- */
-class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
-{
-    /**
-     * Lookup table of allowed elements.
-     */
-    var $elements = array();
-    /**
-     * @param $elements List of allowed element names (lowercase).
-     */
-    function HTMLPurifier_ChildDef_Required($elements) {
-        if (is_string($elements)) {
-            $elements = str_replace(' ', '', $elements);
-            $elements = explode('|', $elements);
-        }
-        $elements = array_flip($elements);
-        foreach ($elements as $i => $x) $elements[$i] = true;
-        $this->elements = $elements;
-        $this->gen = new HTMLPurifier_Generator();
-    }
-    var $allow_empty = false;
-    var $type = 'required';
-    function validateChildren($tokens_of_children, $config, &$context) {
-        // if there are no tokens, delete parent node
-        if (empty($tokens_of_children)) return false;
-        
-        // the new set of children
-        $result = array();
-        
-        // current depth into the nest
-        $nesting = 0;
-        
-        // whether or not we're deleting a node
-        $is_deleting = false;
-        
-        // whether or not parsed character data is allowed
-        // this controls whether or not we silently drop a tag
-        // or generate escaped HTML from it
-        $pcdata_allowed = isset($this->elements['#PCDATA']);
-        
-        // a little sanity check to make sure it's not ALL whitespace
-        $all_whitespace = true;
-        
-        // some configuration
-        $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
-        
-        foreach ($tokens_of_children as $token) {
-            if (!empty($token->is_whitespace)) {
-                $result[] = $token;
-                continue;
-            }
-            $all_whitespace = false; // phew, we're not talking about whitespace
-            
-            $is_child = ($nesting == 0);
-            
-            if ($token->type == 'start') {
-                $nesting++;
-            } elseif ($token->type == 'end') {
-                $nesting--;
-            }
-            
-            if ($is_child) {
-                $is_deleting = false;
-                if (!isset($this->elements[$token->name])) {
-                    $is_deleting = true;
-                    if ($pcdata_allowed && $token->type == 'text') {
-                        $result[] = $token;
-                    } elseif ($pcdata_allowed && $escape_invalid_children) {
-                        $result[] = new HTMLPurifier_Token_Text(
-                            $this->gen->generateFromToken($token, $config)
-                        );
-                    }
-                    continue;
-                }
-            }
-            if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
-                $result[] = $token;
-            } elseif ($pcdata_allowed && $escape_invalid_children) {
-                $result[] =
-                    new HTMLPurifier_Token_Text(
-                        $this->gen->generateFromToken( $token, $config )
-                    );
-            } else {
-                // drop silently
-            }
-        }
-        if (empty($result)) return false;
-        if ($all_whitespace) return false;
-        if ($tokens_of_children == $result) return true;
-        return $result;
-    }
-}
-
-/**
- * Definition that allows a set of elements, and allows no children.
- * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
- *       really, one shouldn't inherit from the other.  Only altered behavior
- *       is to overload a returned false with an array.  Thus, it will never
- *       return false.
- */
-class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
-{
-    var $allow_empty = true;
-    var $type = 'optional';
-    function validateChildren($tokens_of_children, $config, &$context) {
-        $result = parent::validateChildren($tokens_of_children, $config, $context);
-        if ($result === false) return array();
-        return $result;
-    }
-}
-
-/**
- * Definition that disallows all elements.
- * @warning validateChildren() in this class is actually never called, because
- *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
- *          before child definitions are parsed in earnest by
- *          HTMLPurifier_Strategy_FixNesting.
- */
-class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
-{
-    var $allow_empty = true;
-    var $type = 'empty';
-    function HTMLPurifier_ChildDef_Empty() {}
-    function validateChildren($tokens_of_children, $config, &$context) {
-        return array();
-    }
-}
-
-/**
- * Definition that uses different definitions depending on context.
- * 
- * The del and ins tags are notable because they allow different types of
- * elements depending on whether or not they're in a block or inline context.
- * Chameleon allows this behavior to happen by using two different
- * definitions depending on context.  While this somewhat generalized,
- * it is specifically intended for those two tags.
- */
-class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
-{
-    
-    /**
-     * Instance of the definition object to use when inline. Usually stricter.
-     */
-    var $inline;
-    /**
-     * Instance of the definition object to use when block.
-     */
-    var $block;
-    
-    /**
-     * @param $inline List of elements to allow when inline.
-     * @param $block List of elements to allow when block.
-     */
-    function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
-        $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
-        $this->block  = new HTMLPurifier_ChildDef_Optional($block);
-    }
-    
-    function validateChildren($tokens_of_children, $config, &$context) {
-        $parent_type = $context->get('ParentType');
-        switch ($parent_type) {
-            case 'unknown':
-            case 'inline':
-                $result = $this->inline->validateChildren(
-                    $tokens_of_children, $config, $context);
-                break;
-            case 'block':
-                $result = $this->block->validateChildren(
-                    $tokens_of_children, $config, $context);
-                break;
-            default:
-                trigger_error('Invalid context', E_USER_ERROR);
-                return false;
-        }
-        return $result;
-    }
-}
-
-/**
- * Definition for tables
- */
-class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
-{
-    var $allow_empty = false;
-    var $type = 'table';
-    function HTMLPurifier_ChildDef_Table() {}
-    function validateChildren($tokens_of_children, $config, &$context) {
-        if (empty($tokens_of_children)) return false;
-        
-        // this ensures that the loop gets run one last time before closing
-        // up. It's a little bit of a hack, but it works! Just make sure you
-        // get rid of the token later.
-        $tokens_of_children[] = false;
-        
-        // only one of these elements is allowed in a table
-        $caption = false;
-        $thead   = false;
-        $tfoot   = false;
-        
-        // as many of these as you want
-        $cols    = array();
-        $content = array();
-        
-        $nesting = 0; // current depth so we can determine nodes
-        $is_collecting = false; // are we globbing together tokens to package
-                                // into one of the collectors?
-        $collection = array(); // collected nodes
-        $tag_index = 0; // the first node might be whitespace,
-                            // so this tells us where the start tag is
-        
-        foreach ($tokens_of_children as $token) {
-            $is_child = ($nesting == 0);
-            
-            if ($token === false) {
-                // terminating sequence started
-            } elseif ($token->type == 'start') {
-                $nesting++;
-            } elseif ($token->type == 'end') {
-                $nesting--;
-            }
-            
-            // handle node collection
-            if ($is_collecting) {
-                if ($is_child) {
-                    // okay, let's stash the tokens away
-                    // first token tells us the type of the collection
-                    switch ($collection[$tag_index]->name) {
-                        case 'tr':
-                        case 'tbody':
-                            $content[] = $collection;
-                            break;
-                        case 'caption':
-                            if ($caption !== false) break;
-                            $caption = $collection;
-                            break;
-                        case 'thead':
-                        case 'tfoot':
-                            // access the appropriate variable, $thead or $tfoot
-                            $var = $collection[$tag_index]->name;
-                            if ($$var === false) {
-                                $$var = $collection;
-                            } else {
-                                // transmutate the first and less entries into
-                                // tbody tags, and then put into content
-                                $collection[$tag_index]->name = 'tbody';
-                                $collection[count($collection)-1]->name = 'tbody';
-                                $content[] = $collection;
-                            }
-                            break;
-                         case 'colgroup':
-                            $cols[] = $collection;
-                            break;
-                    }
-                    $collection = array();
-                    $is_collecting = false;
-                    $tag_index = 0;
-                } else {
-                    // add the node to the collection
-                    $collection[] = $token;
-                }
-            }
-            
-            // terminate
-            if ($token === false) break;
-            
-            if ($is_child) {
-                // determine what we're dealing with
-                if ($token->name == 'col') {
-                    // the only empty tag in the possie, we can handle it
-                    // immediately
-                    $cols[] = array_merge($collection, array($token));
-                    $collection = array();
-                    $tag_index = 0;
-                    continue;
-                }
-                switch($token->name) {
-                    case 'caption':
-                    case 'colgroup':
-                    case 'thead':
-                    case 'tfoot':
-                    case 'tbody':
-                    case 'tr':
-                        $is_collecting = true;
-                        $collection[] = $token;
-                        continue;
-                    default:
-                        if ($token->type == 'text' && $token->is_whitespace) {
-                            $collection[] = $token;
-                            $tag_index++;
-                        }
-                        continue;
-                }
-            }
-        }
-        
-        if (empty($content)) return false;
-        
-        $ret = array();
-        if ($caption !== false) $ret = array_merge($ret, $caption);
-        if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
-        if ($thead !== false)   $ret = array_merge($ret, $thead);
-        if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
-        foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
-        if (!empty($collection) && $is_collecting == false){
-            // grab the trailing space
-            $ret = array_merge($ret, $collection);
-        }
-        
-        array_pop($tokens_of_children); // remove phantom token
-        
-        return ($ret === $tokens_of_children) ? true : $ret;
-        
-    }
-}
-
 ?>
--- a/library/HTMLPurifier/ChildDef/Chameleon.php
+++ b/library/HTMLPurifier/ChildDef/Chameleon.php
@ -0,0 +1,55 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that uses different definitions depending on context.
+ * 
+ * The del and ins tags are notable because they allow different types of
+ * elements depending on whether or not they're in a block or inline context.
+ * Chameleon allows this behavior to happen by using two different
+ * definitions depending on context.  While this somewhat generalized,
+ * it is specifically intended for those two tags.
+ */
+class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
+{
+    
+    /**
+     * Instance of the definition object to use when inline. Usually stricter.
+     */
+    var $inline;
+    /**
+     * Instance of the definition object to use when block.
+     */
+    var $block;
+    
+    /**
+     * @param $inline List of elements to allow when inline.
+     * @param $block List of elements to allow when block.
+     */
+    function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
+        $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
+        $this->block  = new HTMLPurifier_ChildDef_Optional($block);
+    }
+    
+    function validateChildren($tokens_of_children, $config, &$context) {
+        $parent_type = $context->get('ParentType');
+        switch ($parent_type) {
+            case 'unknown':
+            case 'inline':
+                $result = $this->inline->validateChildren(
+                    $tokens_of_children, $config, $context);
+                break;
+            case 'block':
+                $result = $this->block->validateChildren(
+                    $tokens_of_children, $config, $context);
+                break;
+            default:
+                trigger_error('Invalid context', E_USER_ERROR);
+                return false;
+        }
+        return $result;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ChildDef/Custom.php
+++ b/library/HTMLPurifier/ChildDef/Custom.php
@ -0,0 +1,75 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Custom validation class, accepts DTD child definitions
+ * 
+ * @warning Currently this class is an all or nothing proposition, that is,
+ *          it will only give a bool return value.
+ * @note This class is currently not used by any code, although it is unit
+ *       tested.
+ */
+class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
+{
+    var $type = 'custom';
+    var $allow_empty = false;
+    /**
+     * Allowed child pattern as defined by the DTD
+     */
+    var $dtd_regex;
+    /**
+     * PCRE regex derived from $dtd_regex
+     * @private
+     */
+    var $_pcre_regex;
+    /**
+     * @param $dtd_regex Allowed child pattern from the DTD
+     */
+    function HTMLPurifier_ChildDef_Custom($dtd_regex) {
+        $this->dtd_regex = $dtd_regex;
+        $this->_compileRegex();
+    }
+    /**
+     * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
+     */
+    function _compileRegex() {
+        $raw = str_replace(' ', '', $this->dtd_regex);
+        if ($raw{0} != '(') {
+            $raw = "($raw)";
+        }
+        $reg = str_replace(',', ',?', $raw);
+        $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
+        $this->_pcre_regex = $reg;
+    }
+    function validateChildren($tokens_of_children, $config, &$context) {
+        $list_of_children = '';
+        $nesting = 0; // depth into the nest
+        foreach ($tokens_of_children as $token) {
+            if (!empty($token->is_whitespace)) continue;
+            
+            $is_child = ($nesting == 0); // direct
+            
+            if ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            if ($is_child) {
+                $list_of_children .= $token->name . ',';
+            }
+        }
+        $list_of_children = rtrim($list_of_children, ',');
+        
+        $okay =
+            preg_match(
+                '/^'.$this->_pcre_regex.'$/',
+                $list_of_children
+            );
+        
+        return (bool) $okay;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ChildDef/Empty.php
+++ b/library/HTMLPurifier/ChildDef/Empty.php
@ -0,0 +1,22 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that disallows all elements.
+ * @warning validateChildren() in this class is actually never called, because
+ *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
+ *          before child definitions are parsed in earnest by
+ *          HTMLPurifier_Strategy_FixNesting.
+ */
+class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
+{
+    var $allow_empty = true;
+    var $type = 'empty';
+    function HTMLPurifier_ChildDef_Empty() {}
+    function validateChildren($tokens_of_children, $config, &$context) {
+        return array();
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ChildDef/Optional.php
+++ b/library/HTMLPurifier/ChildDef/Optional.php
@ -0,0 +1,23 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef/Required.php';
+
+/**
+ * Definition that allows a set of elements, and allows no children.
+ * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
+ *       really, one shouldn't inherit from the other.  Only altered behavior
+ *       is to overload a returned false with an array.  Thus, it will never
+ *       return false.
+ */
+class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
+{
+    var $allow_empty = true;
+    var $type = 'optional';
+    function validateChildren($tokens_of_children, $config, &$context) {
+        $result = parent::validateChildren($tokens_of_children, $config, $context);
+        if ($result === false) return array();
+        return $result;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ChildDef/Required.php
+++ b/library/HTMLPurifier/ChildDef/Required.php
@ -0,0 +1,100 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that allows a set of elements, but disallows empty children.
+ */
+class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
+{
+    /**
+     * Lookup table of allowed elements.
+     */
+    var $elements = array();
+    /**
+     * @param $elements List of allowed element names (lowercase).
+     */
+    function HTMLPurifier_ChildDef_Required($elements) {
+        if (is_string($elements)) {
+            $elements = str_replace(' ', '', $elements);
+            $elements = explode('|', $elements);
+        }
+        $elements = array_flip($elements);
+        foreach ($elements as $i => $x) $elements[$i] = true;
+        $this->elements = $elements;
+        $this->gen = new HTMLPurifier_Generator();
+    }
+    var $allow_empty = false;
+    var $type = 'required';
+    function validateChildren($tokens_of_children, $config, &$context) {
+        // if there are no tokens, delete parent node
+        if (empty($tokens_of_children)) return false;
+        
+        // the new set of children
+        $result = array();
+        
+        // current depth into the nest
+        $nesting = 0;
+        
+        // whether or not we're deleting a node
+        $is_deleting = false;
+        
+        // whether or not parsed character data is allowed
+        // this controls whether or not we silently drop a tag
+        // or generate escaped HTML from it
+        $pcdata_allowed = isset($this->elements['#PCDATA']);
+        
+        // a little sanity check to make sure it's not ALL whitespace
+        $all_whitespace = true;
+        
+        // some configuration
+        $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
+        
+        foreach ($tokens_of_children as $token) {
+            if (!empty($token->is_whitespace)) {
+                $result[] = $token;
+                continue;
+            }
+            $all_whitespace = false; // phew, we're not talking about whitespace
+            
+            $is_child = ($nesting == 0);
+            
+            if ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            if ($is_child) {
+                $is_deleting = false;
+                if (!isset($this->elements[$token->name])) {
+                    $is_deleting = true;
+                    if ($pcdata_allowed && $token->type == 'text') {
+                        $result[] = $token;
+                    } elseif ($pcdata_allowed && $escape_invalid_children) {
+                        $result[] = new HTMLPurifier_Token_Text(
+                            $this->gen->generateFromToken($token, $config)
+                        );
+                    }
+                    continue;
+                }
+            }
+            if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
+                $result[] = $token;
+            } elseif ($pcdata_allowed && $escape_invalid_children) {
+                $result[] =
+                    new HTMLPurifier_Token_Text(
+                        $this->gen->generateFromToken( $token, $config )
+                    );
+            } else {
+                // drop silently
+            }
+        }
+        if (empty($result)) return false;
+        if ($all_whitespace) return false;
+        if ($tokens_of_children == $result) return true;
+        return $result;
+    }
+}
+
+?>
--- a/library/HTMLPurifier/ChildDef/Table.php
+++ b/library/HTMLPurifier/ChildDef/Table.php
@ -0,0 +1,142 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition for tables
+ */
+class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
+{
+    var $allow_empty = false;
+    var $type = 'table';
+    function HTMLPurifier_ChildDef_Table() {}
+    function validateChildren($tokens_of_children, $config, &$context) {
+        if (empty($tokens_of_children)) return false;
+        
+        // this ensures that the loop gets run one last time before closing
+        // up. It's a little bit of a hack, but it works! Just make sure you
+        // get rid of the token later.
+        $tokens_of_children[] = false;
+        
+        // only one of these elements is allowed in a table
+        $caption = false;
+        $thead   = false;
+        $tfoot   = false;
+        
+        // as many of these as you want
+        $cols    = array();
+        $content = array();
+        
+        $nesting = 0; // current depth so we can determine nodes
+        $is_collecting = false; // are we globbing together tokens to package
+                                // into one of the collectors?
+        $collection = array(); // collected nodes
+        $tag_index = 0; // the first node might be whitespace,
+                            // so this tells us where the start tag is
+        
+        foreach ($tokens_of_children as $token) {
+            $is_child = ($nesting == 0);
+            
+            if ($token === false) {
+                // terminating sequence started
+            } elseif ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            // handle node collection
+            if ($is_collecting) {
+                if ($is_child) {
+                    // okay, let's stash the tokens away
+                    // first token tells us the type of the collection
+                    switch ($collection[$tag_index]->name) {
+                        case 'tr':
+                        case 'tbody':
+                            $content[] = $collection;
+                            break;
+                        case 'caption':
+                            if ($caption !== false) break;
+                            $caption = $collection;
+                            break;
+                        case 'thead':
+                        case 'tfoot':
+                            // access the appropriate variable, $thead or $tfoot
+                            $var = $collection[$tag_index]->name;
+                            if ($$var === false) {
+                                $$var = $collection;
+                            } else {
+                                // transmutate the first and less entries into
+                                // tbody tags, and then put into content
+                                $collection[$tag_index]->name = 'tbody';
+                                $collection[count($collection)-1]->name = 'tbody';
+                                $content[] = $collection;
+                            }
+                            break;
+                         case 'colgroup':
+                            $cols[] = $collection;
+                            break;
+                    }
+                    $collection = array();
+                    $is_collecting = false;
+                    $tag_index = 0;
+                } else {
+                    // add the node to the collection
+                    $collection[] = $token;
+                }
+            }
+            
+            // terminate
+            if ($token === false) break;
+            
+            if ($is_child) {
+                // determine what we're dealing with
+                if ($token->name == 'col') {
+                    // the only empty tag in the possie, we can handle it
+                    // immediately
+                    $cols[] = array_merge($collection, array($token));
+                    $collection = array();
+                    $tag_index = 0;
+                    continue;
+                }
+                switch($token->name) {
+                    case 'caption':
+                    case 'colgroup':
+                    case 'thead':
+                    case 'tfoot':
+                    case 'tbody':
+                    case 'tr':
+                        $is_collecting = true;
+                        $collection[] = $token;
+                        continue;
+                    default:
+                        if ($token->type == 'text' && $token->is_whitespace) {
+                            $collection[] = $token;
+                            $tag_index++;
+                        }
+                        continue;
+                }
+            }
+        }
+        
+        if (empty($content)) return false;
+        
+        $ret = array();
+        if ($caption !== false) $ret = array_merge($ret, $caption);
+        if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
+        if ($thead !== false)   $ret = array_merge($ret, $thead);
+        if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
+        foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
+        if (!empty($collection) && $is_collecting == false){
+            // grab the trailing space
+            $ret = array_merge($ret, $collection);
+        }
+        
+        array_pop($tokens_of_children); // remove phantom token
+        
+        return ($ret === $tokens_of_children) ? true : $ret;
+        
+    }
+}
+
+?>
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@ -18,6 +18,11 @@ require_once 'HTMLPurifier/AttrTransform.php';
    require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
    require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
 require_once 'HTMLPurifier/ChildDef.php';
+    require_once 'HTMLPurifier/ChildDef/Chameleon.php';
+    require_once 'HTMLPurifier/ChildDef/Empty.php';
+    require_once 'HTMLPurifier/ChildDef/Required.php';
+    require_once 'HTMLPurifier/ChildDef/Optional.php';
+    require_once 'HTMLPurifier/ChildDef/Table.php';
 require_once 'HTMLPurifier/Generator.php';
 require_once 'HTMLPurifier/Token.php';
 require_once 'HTMLPurifier/TagTransform.php';
--- a/tests/HTMLPurifier/ChildDef/ChameleonTest.php
+++ b/tests/HTMLPurifier/ChildDef/ChameleonTest.php
@ -0,0 +1,35 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDefHarness.php';
+require_once 'HTMLPurifier/ChildDef/Chameleon.php';
+
+class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
+{
+    
+    function test() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Chameleon(
+            'b | i',      // allowed only when in inline context
+            'b | i | div' // allowed only when in block context
+        );
+        
+        $this->assertResult(
+            '<b>Allowed.</b>', true,
+            array(), array('ParentType' => 'inline')
+        );
+        
+        $this->assertResult(
+            '<div>Not allowed.</div>', '',
+            array(), array('ParentType' => 'inline')
+        );
+        
+        $this->assertResult(
+            '<div>Allowed.</div>', true,
+            array(), array('ParentType' => 'block')
+        );
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDef/CustomTest.php
+++ b/tests/HTMLPurifier/ChildDef/CustomTest.php
@ -0,0 +1,24 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDefHarness.php';
+require_once 'HTMLPurifier/ChildDef/Custom.php';
+
+class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
+{
+    
+    function test() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
+        
+        $this->assertResult('', false);
+        $this->assertResult('<a /><a />', false);
+        
+        $this->assertResult('<a /><b /><c /><d /><a /><b />');
+        $this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
+          '<a href="moo" /><b>foo</b>');
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDef/OptionalTest.php
+++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php
@ -0,0 +1,20 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDefHarness.php';
+require_once 'HTMLPurifier/ChildDef/Optional.php';
+
+class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
+{
+    
+    function test() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
+        
+        $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
+        $this->assertResult('Not allowed text', '');
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDef/RequiredTest.php
+++ b/tests/HTMLPurifier/ChildDef/RequiredTest.php
@ -0,0 +1,69 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDefHarness.php';
+require_once 'HTMLPurifier/ChildDef/Required.php';
+
+class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
+{
+    
+    function testParsing() {
+        
+        $def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
+        $this->assertEqual($def->elements,
+          array(
+            'foobar' => true
+           ,'bang'   => true
+           ,'gizmo'  => true
+          ));
+        
+        $def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
+        $this->assertEqual($def->elements,
+          array(
+            'href' => true
+           ,'src'  => true
+          ));
+        
+    }
+    
+    function testPCDATAForbidden() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
+        
+        $this->assertResult('', false);
+        $this->assertResult(
+          '<dt>Term</dt>Text in an illegal location'.
+             '<dd>Definition</dd><b>Illegal tag</b>',
+          '<dt>Term</dt><dd>Definition</dd>');
+        $this->assertResult('How do you do!', false);
+        
+        // whitespace shouldn't trigger it
+        $this->assertResult("\n<dd>Definition</dd>       ");
+        
+        $this->assertResult(
+          '<dd>Definition</dd>       <b></b>       ',
+          '<dd>Definition</dd>              '
+        );
+        $this->assertResult("\t      ", false);
+        
+    }
+    
+    function testPCDATAAllowed() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
+        
+        $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
+        
+        // with child escaping on
+        $this->assertResult(
+            '<b>Bold text</b><img />',
+            '<b>Bold text</b>&lt;img /&gt;',
+            array(
+              'Core.EscapeInvalidChildren' => true
+            )
+        );
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDef/TableTest.php
+++ b/tests/HTMLPurifier/ChildDef/TableTest.php
@ -0,0 +1,51 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDefHarness.php';
+require_once 'HTMLPurifier/ChildDef/Table.php';
+
+class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
+{
+    
+    function test() {
+        
+        $this->obj = new HTMLPurifier_ChildDef_Table();
+        
+        $this->assertResult('', false);
+        
+        // we're using empty tags to compact the tests: under real circumstances
+        // there would be contents in them
+        
+        $this->assertResult('<tr />');
+        $this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
+            '<tr><td>asdf</td></tr></tbody>');
+        $this->assertResult('<col /><col /><col /><tr />');
+        
+        // mixed up order
+        $this->assertResult(
+          '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
+          '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
+        
+        // duplicates of singles
+        // - first caption serves
+        // - trailing tfoots/theads get turned into tbodys
+        $this->assertResult(
+          '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
+          '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
+        );
+        
+        // errant text dropped (until bubbling is implemented)
+        $this->assertResult('foo', false);
+        
+        // whitespace sticks to the previous element, last whitespace is
+        // stationary
+        $this->assertResult("\n   <tr />\n  <tr />\n ");
+        $this->assertResult(
+          "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
+          "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
+        );
+        
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDefHarness.php
+++ b/tests/HTMLPurifier/ChildDefHarness.php
@ -0,0 +1,18 @@
+<?php
+
+require_once 'HTMLPurifier/Harness.php';
+require_once 'HTMLPurifier/ChildDef.php';
+
+class HTMLPurifier_ChildDefHarness extends HTMLPurifier_Harness
+{
+    
+    function setUp() {
+        $this->obj       = null;
+        $this->func      = 'validateChildren';
+        $this->to_tokens = true;
+        $this->to_html   = true;
+    }
+    
+}
+
+?>
--- a/tests/HTMLPurifier/ChildDefTest.php
+++ b/tests/HTMLPurifier/ChildDefTest.php
@ -1,168 +0,0 @@
-<?php
-
-require_once 'HTMLPurifier/Harness.php';
-
-require_once 'HTMLPurifier/ChildDef.php';
-require_once 'HTMLPurifier/Lexer/DirectLex.php';
-require_once 'HTMLPurifier/Generator.php';
-
-class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
-{
-    
-    function setUp() {
-        $this->obj       = null;
-        $this->func      = 'validateChildren';
-        $this->to_tokens = true;
-        $this->to_html   = true;
-    }
-    
-    function test_custom() {
-        
-        $this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
-        
-        $this->assertResult('', false);
-        $this->assertResult('<a /><a />', false);
-        
-        $this->assertResult('<a /><b /><c /><d /><a /><b />');
-        $this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
-          '<a href="moo" /><b>foo</b>');
-        
-    }
-    
-    function test_table() {
-        
-        // the table definition
-        $this->obj = new HTMLPurifier_ChildDef_Table();
-        
-        $inputs = $expect = $config = array();
-        
-        $this->assertResult('', false);
-        
-        // we're using empty tags to compact the tests: under real circumstances
-        // there would be contents in them
-        
-        $this->assertResult('<tr />');
-        $this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
-            '<tr><td>asdf</td></tr></tbody>');
-        $this->assertResult('<col /><col /><col /><tr />');
-        
-        // mixed up order
-        $this->assertResult(
-          '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
-          '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
-        
-        // duplicates of singles
-        // - first caption serves
-        // - trailing tfoots/theads get turned into tbodys
-        $this->assertResult(
-          '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
-          '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
-        );
-        
-        // errant text dropped (until bubbling is implemented)
-        $this->assertResult('foo', false);
-        
-        // whitespace sticks to the previous element, last whitespace is
-        // stationary
-        $this->assertResult("\n   <tr />\n  <tr />\n ");
-        $this->assertResult(
-          "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
-          "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
-        );
-        
-    }
-    
-    function testParsing() {
-        
-        $def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
-        $this->assertEqual($def->elements,
-          array(
-            'foobar' => true
-           ,'bang'   => true
-           ,'gizmo'  => true
-          ));
-        
-        $def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
-        $this->assertEqual($def->elements,
-          array(
-            'href' => true
-           ,'src'  => true
-          ));
-        
-    }
-    
-    function test_required_pcdata_forbidden() {
-        
-        $this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
-        
-        $this->assertResult('', false);
-        $this->assertResult(
-          '<dt>Term</dt>Text in an illegal location'.
-             '<dd>Definition</dd><b>Illegal tag</b>',
-          '<dt>Term</dt><dd>Definition</dd>');
-        $this->assertResult('How do you do!', false);
-        
-        // whitespace shouldn't trigger it
-        $this->assertResult("\n<dd>Definition</dd>       ");
-        
-        $this->assertResult(
-          '<dd>Definition</dd>       <b></b>       ',
-          '<dd>Definition</dd>              '
-        );
-        $this->assertResult("\t      ", false);
-        
-    }
-    
-    function test_required_pcdata_allowed() {
-        
-        $this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
-        
-        $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
-        
-        // with child escaping on
-        $this->assertResult(
-            '<b>Bold text</b><img />',
-            '<b>Bold text</b>&lt;img /&gt;',
-            array(
-              'Core.EscapeInvalidChildren' => true
-            )
-        );
-        
-    }
-    
-    function test_optional() {
-        
-        $this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
-        
-        $this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
-        $this->assertResult('Not allowed text', '');
-        
-    }
-    
-    function test_chameleon() {
-        
-        $this->obj = new HTMLPurifier_ChildDef_Chameleon(
-            'b | i',      // allowed only when in inline context
-            'b | i | div' // allowed only when in block context
-        );
-        
-        $this->assertResult(
-            '<b>Allowed.</b>', true,
-            array(), array('ParentType' => 'inline')
-        );
-        
-        $this->assertResult(
-            '<div>Not allowed.</div>', '',
-            array(), array('ParentType' => 'inline')
-        );
-        
-        $this->assertResult(
-            '<div>Allowed.</div>', true,
-            array(), array('ParentType' => 'block')
-        );
-        
-    }
-    
-}
-
-?>
--- a/tests/index.php
+++ b/tests/index.php
@ -44,7 +44,11 @@ $test_files[] = 'ConfigSchemaTest.php';
 $test_files[] = 'LexerTest.php';
 $test_files[] = 'Lexer/DirectLexTest.php';
 $test_files[] = 'TokenTest.php';
-$test_files[] = 'ChildDefTest.php';
+$test_files[] = 'ChildDef/RequiredTest.php';
+$test_files[] = 'ChildDef/OptionalTest.php';
+$test_files[] = 'ChildDef/ChameleonTest.php';
+$test_files[] = 'ChildDef/CustomTest.php';
+$test_files[] = 'ChildDef/TableTest.php';
 $test_files[] = 'GeneratorTest.php';
 $test_files[] = 'EntityLookupTest.php';
 $test_files[] = 'Strategy/RemoveForeignElementsTest.php';