0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-03-15 19:07:05 +00:00

[1.5.0] Implement TransformToStrict proprietary module

- Factored out strictblockquote from the common definition
- Text module now follows "strict" rules by default
- attr_transform_* now are indexed with string keys, to allow overloading
- Implement ElementDef mergin, and add standalone class variable to ElementDef to prevent half-baked element definitions from masquerading as full ones
- Implement merging global attributes from modules, namely info_attr_transform_post, info_attr_transform_pre and info_tag_transform
- Rename setupInfo() to processModules()
- Fix typo in HTMLModule/Bdo.php

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@731 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-02-10 23:35:21 +00:00
parent bd544ad038
commit 54a68a1713
8 changed files with 156 additions and 56 deletions

View File

@ -5,7 +5,6 @@ require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/ChildDef/Empty.php';
require_once 'HTMLPurifier/ChildDef/Required.php';
require_once 'HTMLPurifier/ChildDef/Optional.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php'; // transform
class HTMLPurifier_ContentSets
{
@ -111,8 +110,6 @@ class HTMLPurifier_ContentSets
return new HTMLPurifier_ChildDef_Optional($value);
case 'empty':
return new HTMLPurifier_ChildDef_Empty();
case 'strictblockquote':
return new HTMLPurifier_ChildDef_StrictBlockquote($value);
case 'custom':
return new HTMLPurifier_ChildDef_Custom($value);
}

View File

@ -7,6 +7,12 @@
class HTMLPurifier_ElementDef
{
/**
* Does the definition work by itself, or is it created solely
* for the purpose of merging into another definition?
*/
var $standalone = true;
/**
* Associative array of attribute name to HTMLPurifier_AttrDef
* @note Before being processed by HTMLPurifier_AttrCollections
@ -22,13 +28,13 @@ class HTMLPurifier_ElementDef
var $attr = array();
/**
* List of tag's HTMLPurifier_AttrTransform to be done before validation
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
* @public
*/
var $attr_transform_pre = array();
/**
* List of tag's HTMLPurifier_AttrTransform to be done after validation
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
* @public
*/
var $attr_transform_post = array();
@ -79,6 +85,27 @@ class HTMLPurifier_ElementDef
*/
var $excludes = array();
/**
* Merges the values of another element definition into this one.
* Values from the new element def take precedence if a value is
* not mergeable.
*/
function mergeIn($def) {
// later keys takes precedence
foreach($def->attr as $k => $v) $this->attr[$k] = $v;
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
if(!is_null($def->child)) $this->child = $def->child;
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
}
}
?>

View File

@ -34,6 +34,9 @@ require_once 'HTMLPurifier/HTMLModule/Tables.php';
require_once 'HTMLPurifier/HTMLModule/Image.php';
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
// compat modules
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
HTMLPurifier_ConfigSchema::define(
'HTML', 'EnableAttrID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '.
@ -168,19 +171,19 @@ class HTMLPurifier_HTMLDefinition
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public
*/ // use + operator
*/
var $info_tag_transform = array();
/**
* List of HTMLPurifier_AttrTransform to be performed before validation.
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
* @public
*/ // use array_merge or a foreach loop
*/
var $info_attr_transform_pre = array();
/**
* List of HTMLPurifier_AttrTransform to be performed after validation.
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
* @public
*/ // use array_merge or a foreach loop
*/
var $info_attr_transform_post = array();
/**
@ -241,6 +244,7 @@ class HTMLPurifier_HTMLDefinition
// this will eventually influence module loading
$this->strict = $config->get('HTML', 'Strict');
// order is important!
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
$this->modules['List'] = new HTMLPurifier_HTMLModule_List();
@ -251,6 +255,8 @@ class HTMLPurifier_HTMLDefinition
$this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
$this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
$this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config);
$this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections();
$this->content_sets = new HTMLPurifier_ContentSets();
@ -279,7 +285,7 @@ class HTMLPurifier_HTMLDefinition
// would be nice if we could put each of these in their
// own object, would make this hookable too!
$this->setupInfo($config);
$this->processModules($config);
$this->setupAttrTransform($config);
$this->setupBlockWrapper($config);
$this->setupParent($config);
@ -288,24 +294,39 @@ class HTMLPurifier_HTMLDefinition
}
/**
* Sets up the info array.
* Processes the modules, setting up related info variables
* @param $config Instance of HTMLPurifier_Config
*/
function setupInfo($config) {
function processModules($config) {
$this->attr_collections->setup($this->attr_types, $this->modules);
$this->content_sets->setup($this->modules);
$this->info_content_sets = $this->content_sets->lookup;
foreach ($this->modules as $module_i => $module) {
// process element-wise definitions
foreach ($module->info as $name => $def) {
$def =& $this->modules[$module_i]->info[$name];
// setup info
if (!isset($this->info[$name])) {
if ($def->standalone) {
$this->info[$name] = $this->modules[$module_i]->info[$name];
} else {
// attempting to merge into an element that doesn't
// exist, ignore it
continue;
}
} else {
$this->info[$name]->mergeIn($this->modules[$module_i]->info[$name]);
}
// process info
$def = $this->info[$name];
// attribute value expansions
$this->attr_collections->performInclusions($def->attr);
$this->attr_collections->expandIdentifiers(
$def->attr, $this->attr_types);
// chameleon data, set descendants_are_inline
// descendants_are_inline, for ChildDef_Chameleon
if (is_string($def->content_model) &&
strpos($def->content_model, 'Inline') !== false) {
if ($name != 'del' && $name != 'ins') {
@ -317,13 +338,16 @@ class HTMLPurifier_HTMLDefinition
// set child def from content model
$this->content_sets->generateChildDef($def, $module);
// setup info
$this->info[$name] = $def;
if ($this->info_parent == $name) {
$this->info_parent_def = $this->info[$name];
}
}
// merge in global info variables from module
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
}
}
/**
@ -369,17 +393,13 @@ class HTMLPurifier_HTMLDefinition
*/
function setupCompat($config) {
// convenience for compat
$e_Inline = new HTMLPurifier_ChildDef_Optional(
$this->info_content_sets['Inline'] +
array('#PCDATA' => true));
// blockquote changes, implement in TransformStrict and Legacy
if ($this->strict) {
$this->info['blockquote']->child =
new HTMLPurifier_ChildDef_StrictBlockquote(
$this->info_content_sets['Block'] +
array('#PCDATA' => true));
} else {
// blockquote alt child def, implement in Legacy
if (!$this->strict) {
$this->info['blockquote']->child =
new HTMLPurifier_ChildDef_Optional(
$this->info_content_sets['Flow'] +
@ -409,8 +429,7 @@ class HTMLPurifier_HTMLDefinition
array('#PCDATA' => true, 'p' => true));
}
// custom, not sure where to implement, because it's not
// just /one/ module
// deprecated config setting, implement in DisableURI module
if ($config->get('Attr', 'DisableURI')) {
$this->info['a']->attr['href'] =
$this->info['img']->attr['longdesc'] =
@ -427,28 +446,7 @@ class HTMLPurifier_HTMLDefinition
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
}
// deprecated elements transforms, implement in TransformToStrict
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
// deprecated attribute transforms, implement in TransformToStrict
$this->info['h1']->attr_transform_pre[] =
$this->info['h2']->attr_transform_pre[] =
$this->info['h3']->attr_transform_pre[] =
$this->info['h4']->attr_transform_pre[] =
$this->info['h5']->attr_transform_pre[] =
$this->info['h6']->attr_transform_pre[] =
$this->info['p'] ->attr_transform_pre[] =
new HTMLPurifier_AttrTransform_TextAlign();
// xml:lang <=> lang mirroring, implement in TransformToStrict?
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
$this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang();
// setup allowed elements, obsoleted by Modules? (does offer
// different functionality)
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {

View File

@ -53,6 +53,24 @@ class HTMLPurifier_HTMLModule
*/
var $attr_collections = array();
/**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public
*/
var $info_tag_transform = array();
/**
* List of HTMLPurifier_AttrTransform to be performed before validation.
* @public
*/
var $info_attr_transform_pre = array();
/**
* List of HTMLPurifier_AttrTransform to be performed after validation.
* @public
*/
var $info_attr_transform_post = array();
/**
* Boolean flag that indicates whether or not getChildDef is implemented.
* For optimization reasons: may save a call to a function. Be sure
@ -72,6 +90,7 @@ class HTMLPurifier_HTMLModule
* @public
*/
function getChildDef($def) {return false;}
}
?>

View File

@ -13,13 +13,13 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
var $elements = array('bdo');
var $info = array();
var $content_sets = array('Inline' => 'bdo');
var $attr_collections_info = array(
var $attr_collections = array(
'I18N' => array('dir' => false)
);
function HTMLPurifier_HTMLModule_Bdo() {
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
$this->attr_collections_info['I18N']['dir'] = $dir;
$this->attr_collections['I18N']['dir'] = $dir;
$this->info['bdo'] = new HTMLPurifier_ElementDef();
$this->info['bdo']->attr = array(
0 => array('Core', 'Lang'),
@ -33,7 +33,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
$this->info['bdo']->content_model = '#PCDATA | Inline';
$this->info['bdo']->content_model_type = 'optional';
// provides fallback behavior if dir's missing (dir is required)
$this->info['bdo']->attr_transform_post[] =
$this->info['bdo']->attr_transform_post['required-dir'] =
new HTMLPurifier_AttrTransform_BdoDir();
}

View File

@ -45,7 +45,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
$this->info[$element]->content_model_type = 'empty';
} elseif ($element == 'blockquote') {
$this->info[$element]->content_model = 'Heading | Block | List';
$this->info[$element]->content_model_type = 'strictblockquote';
$this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'div') {
$this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional';

View File

@ -1,5 +1,7 @@
<?php
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
/**
* Proprietary module that transforms deprecated elements into Strict
* HTML (see HTML 4.01 and XHTML 1.0) when possible.
@ -8,7 +10,62 @@
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
{
// unimplemented
// we're actually modifying these elements, not defining them
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
var $info_tag_transform = array(
// placeholders, see constructor for definitions
'font' => false,
'menu' => false,
'dir' => false,
'center'=> false
);
var $attr_collections = array(
'Lang' => array(
'lang' => false // placeholder
)
);
function HTMLPurifier_HTMLModule_TransformToStrict($config) {
// deprecated tag transforms
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
foreach ($this->elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
$this->info[$name]->standalone = false;
}
// deprecated attribute transforms
$this->info['h1']->attr_transform_pre['align'] =
$this->info['h2']->attr_transform_pre['align'] =
$this->info['h3']->attr_transform_pre['align'] =
$this->info['h4']->attr_transform_pre['align'] =
$this->info['h5']->attr_transform_pre['align'] =
$this->info['h6']->attr_transform_pre['align'] =
$this->info['p'] ->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_TextAlign();
// xml:lang <=> lang mirroring, implement in TransformToStrict,
// this is overridden in TransformToXHTML11
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
if ($config->get('HTML', 'Strict')) {
$this->info['blockquote']->content_model_type = 'strictblockquote';
$this->info['blockquote']->child = false; // recalculate please!
}
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}

View File

@ -130,7 +130,9 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$elements = array();
$attr = array();
if (isset($def->elements)) {
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
if ($def->type == 'strictblockquote') {
$def->validateChildren(array(), $this->config, $context);
}
$elements = $def->elements;
} elseif ($def->type == 'chameleon') {
$attr['rowspan'] = 2;