dtd_regex = $dtd_regex; $this->_compileRegex(); } /** * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) */ function _compileRegex() { $raw = str_replace(' ', '', $this->dtd_regex); if ($raw{0} != '(') { $raw = "($raw)"; } $reg = str_replace(',', ',?', $raw); $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg); $this->_pcre_regex = $reg; } function validateChildren($tokens_of_children, $config, &$context) { $list_of_children = ''; $nesting = 0; // depth into the nest foreach ($tokens_of_children as $token) { if (!empty($token->is_whitespace)) continue; $is_child = ($nesting == 0); // direct if ($token->type == 'start') { $nesting++; } elseif ($token->type == 'end') { $nesting--; } if ($is_child) { $list_of_children .= $token->name . ','; } } $list_of_children = rtrim($list_of_children, ','); $okay = preg_match( '/^'.$this->_pcre_regex.'$/', $list_of_children ); return (bool) $okay; } } /** * Definition that allows a set of elements, but disallows empty children. */ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef { /** * Lookup table of allowed elements. */ var $elements = array(); /** * @param $elements List of allowed element names (lowercase). */ function HTMLPurifier_ChildDef_Required($elements) { if (is_string($elements)) { $elements = str_replace(' ', '', $elements); $elements = explode('|', $elements); } $elements = array_flip($elements); foreach ($elements as $i => $x) $elements[$i] = true; $this->elements = $elements; $this->gen = new HTMLPurifier_Generator(); } var $allow_empty = false; var $type = 'required'; function validateChildren($tokens_of_children, $config, &$context) { // if there are no tokens, delete parent node if (empty($tokens_of_children)) return false; // the new set of children $result = array(); // current depth into the nest $nesting = 0; // whether or not we're deleting a node $is_deleting = false; // whether or not parsed character data is allowed // this controls whether or not we silently drop a tag // or generate escaped HTML from it $pcdata_allowed = isset($this->elements['#PCDATA']); // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; // some configuration $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); foreach ($tokens_of_children as $token) { if (!empty($token->is_whitespace)) { $result[] = $token; continue; } $all_whitespace = false; // phew, we're not talking about whitespace $is_child = ($nesting == 0); if ($token->type == 'start') { $nesting++; } elseif ($token->type == 'end') { $nesting--; } if ($is_child) { $is_deleting = false; if (!isset($this->elements[$token->name])) { $is_deleting = true; if ($pcdata_allowed && $token->type == 'text') { $result[] = $token; } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = new HTMLPurifier_Token_Text( $this->gen->generateFromToken($token, $config) ); } continue; } } if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) { $result[] = $token; } elseif ($pcdata_allowed && $escape_invalid_children) { $result[] = new HTMLPurifier_Token_Text( $this->gen->generateFromToken( $token, $config ) ); } else { // drop silently } } if (empty($result)) return false; if ($all_whitespace) return false; if ($tokens_of_children == $result) return true; return $result; } } /** * Definition that allows a set of elements, and allows no children. * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, * really, one shouldn't inherit from the other. Only altered behavior * is to overload a returned false with an array. Thus, it will never * return false. */ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required { var $allow_empty = true; var $type = 'optional'; function validateChildren($tokens_of_children, $config, &$context) { $result = parent::validateChildren($tokens_of_children, $config, $context); if ($result === false) return array(); return $result; } } /** * Definition that disallows all elements. * @warning validateChildren() in this class is actually never called, because * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed * before child definitions are parsed in earnest by * HTMLPurifier_Strategy_FixNesting. */ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef { var $allow_empty = true; var $type = 'empty'; function HTMLPurifier_ChildDef_Empty() {} function validateChildren($tokens_of_children, $config, &$context) { return array(); } } /** * Definition that uses different definitions depending on context. * * The del and ins tags are notable because they allow different types of * elements depending on whether or not they're in a block or inline context. * Chameleon allows this behavior to happen by using two different * definitions depending on context. While this somewhat generalized, * it is specifically intended for those two tags. */ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef { /** * Instance of the definition object to use when inline. Usually stricter. */ var $inline; /** * Instance of the definition object to use when block. */ var $block; /** * @param $inline List of elements to allow when inline. * @param $block List of elements to allow when block. */ function HTMLPurifier_ChildDef_Chameleon($inline, $block) { $this->inline = new HTMLPurifier_ChildDef_Optional($inline); $this->block = new HTMLPurifier_ChildDef_Optional($block); } function validateChildren($tokens_of_children, $config, &$context) { $parent_type = $context->get('ParentType'); switch ($parent_type) { case 'unknown': case 'inline': $result = $this->inline->validateChildren( $tokens_of_children, $config, $context); break; case 'block': $result = $this->block->validateChildren( $tokens_of_children, $config, $context); break; default: trigger_error('Invalid context', E_USER_ERROR); return false; } return $result; } } /** * Definition for tables */ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef { var $allow_empty = false; var $type = 'table'; function HTMLPurifier_ChildDef_Table() {} function validateChildren($tokens_of_children, $config, &$context) { if (empty($tokens_of_children)) return false; // this ensures that the loop gets run one last time before closing // up. It's a little bit of a hack, but it works! Just make sure you // get rid of the token later. $tokens_of_children[] = false; // only one of these elements is allowed in a table $caption = false; $thead = false; $tfoot = false; // as many of these as you want $cols = array(); $content = array(); $nesting = 0; // current depth so we can determine nodes $is_collecting = false; // are we globbing together tokens to package // into one of the collectors? $collection = array(); // collected nodes $tag_index = 0; // the first node might be whitespace, // so this tells us where the start tag is foreach ($tokens_of_children as $token) { $is_child = ($nesting == 0); if ($token === false) { // terminating sequence started } elseif ($token->type == 'start') { $nesting++; } elseif ($token->type == 'end') { $nesting--; } // handle node collection if ($is_collecting) { if ($is_child) { // okay, let's stash the tokens away // first token tells us the type of the collection switch ($collection[$tag_index]->name) { case 'tr': case 'tbody': $content[] = $collection; break; case 'caption': if ($caption !== false) break; $caption = $collection; break; case 'thead': case 'tfoot': // access the appropriate variable, $thead or $tfoot $var = $collection[$tag_index]->name; if ($$var === false) { $$var = $collection; } else { // transmutate the first and less entries into // tbody tags, and then put into content $collection[$tag_index]->name = 'tbody'; $collection[count($collection)-1]->name = 'tbody'; $content[] = $collection; } break; case 'colgroup': $cols[] = $collection; break; } $collection = array(); $is_collecting = false; $tag_index = 0; } else { // add the node to the collection $collection[] = $token; } } // terminate if ($token === false) break; if ($is_child) { // determine what we're dealing with if ($token->name == 'col') { // the only empty tag in the possie, we can handle it // immediately $cols[] = array_merge($collection, array($token)); $collection = array(); $tag_index = 0; continue; } switch($token->name) { case 'caption': case 'colgroup': case 'thead': case 'tfoot': case 'tbody': case 'tr': $is_collecting = true; $collection[] = $token; continue; default: if ($token->type == 'text' && $token->is_whitespace) { $collection[] = $token; $tag_index++; } continue; } } } if (empty($content)) return false; $ret = array(); if ($caption !== false) $ret = array_merge($ret, $caption); if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); if ($thead !== false) $ret = array_merge($ret, $thead); if ($tfoot !== false) $ret = array_merge($ret, $tfoot); foreach ($content as $token_array) $ret = array_merge($ret, $token_array); if (!empty($collection) && $is_collecting == false){ // grab the trailing space $ret = array_merge($ret, $collection); } array_pop($tokens_of_children); // remove phantom token return ($ret === $tokens_of_children) ? true : $ret; } } ?>