mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-06 22:41:54 +00:00
0767bbc12d
This mega-patch rips out the FixNesting implementation and the related ChildDef components. The primary algorithmic change is to convert from use of tokens to tree nodes, which are far more amenable to the style of processing that FixNesting uses. Additionally, FixNesting has been changed to go bottom-up rather than top-down, in order to avoid needing to implement backtracking. This patch simplifies a good deal of the relevant logic, since we no longer need to continually recalculate the nesting structure when processing things. However, the conversion to the alternate format incurs some overhead, so for small inputs these changes are not a win. One possibility to greatly reduce the constant factors here is to switch to entirely using libxml's representation, and never serializing tokens; this would require one to rewrite injectors, however. The iterative post-order traversal in FixNesting is a bit subtle, but we have essentially reified the stack and continuations. We've removed support for %Core.EscapeInvalidChildren. Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
111 lines
2.8 KiB
PHP
111 lines
2.8 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Takes the contents of blockquote when in strict and reformats for validation.
|
|
*/
|
|
class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
|
|
{
|
|
/**
|
|
* @type array
|
|
*/
|
|
protected $real_elements;
|
|
|
|
/**
|
|
* @type array
|
|
*/
|
|
protected $fake_elements;
|
|
|
|
/**
|
|
* @type bool
|
|
*/
|
|
public $allow_empty = true;
|
|
|
|
/**
|
|
* @type string
|
|
*/
|
|
public $type = 'strictblockquote';
|
|
|
|
/**
|
|
* @type bool
|
|
*/
|
|
protected $init = false;
|
|
|
|
/**
|
|
* @param HTMLPurifier_Config $config
|
|
* @return array
|
|
* @note We don't want MakeWellFormed to auto-close inline elements since
|
|
* they might be allowed.
|
|
*/
|
|
public function getAllowedElements($config)
|
|
{
|
|
$this->init($config);
|
|
return $this->fake_elements;
|
|
}
|
|
|
|
/**
|
|
* @param array $children
|
|
* @param HTMLPurifier_Config $config
|
|
* @param HTMLPurifier_Context $context
|
|
* @return array
|
|
*/
|
|
public function validateChildren($children, $config, $context)
|
|
{
|
|
$this->init($config);
|
|
|
|
// trick the parent class into thinking it allows more
|
|
$this->elements = $this->fake_elements;
|
|
$result = parent::validateChildren($children, $config, $context);
|
|
$this->elements = $this->real_elements;
|
|
|
|
if ($result === false) {
|
|
return array();
|
|
}
|
|
if ($result === true) {
|
|
$result = $children;
|
|
}
|
|
|
|
$def = $config->getHTMLDefinition();
|
|
$block_wrap_name = $def->info_block_wrapper;
|
|
$block_wrap = false;
|
|
$ret = array();
|
|
|
|
foreach ($result as $node) {
|
|
if ($block_wrap === false) {
|
|
if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) ||
|
|
($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) {
|
|
$block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper);
|
|
$ret[] = $block_wrap;
|
|
}
|
|
} else {
|
|
if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) {
|
|
$block_wrap = false;
|
|
|
|
}
|
|
}
|
|
if ($block_wrap) {
|
|
$block_wrap->children[] = $node;
|
|
} else {
|
|
$ret[] = $node;
|
|
}
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
/**
|
|
* @param HTMLPurifier_Config $config
|
|
*/
|
|
private function init($config)
|
|
{
|
|
if (!$this->init) {
|
|
$def = $config->getHTMLDefinition();
|
|
// allow all inline elements
|
|
$this->real_elements = $this->elements;
|
|
$this->fake_elements = $def->info_content_sets['Flow'];
|
|
$this->fake_elements['#PCDATA'] = true;
|
|
$this->init = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|