0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00

[1.7.0] Implement and hook-in Tidy module setup.

- CommonAttributes factored into XMLCommonAttributes and NonXMLCommonAttributes
- Tidy abstract module was completely refactored in interest of usability
- Add friendly error message if module does not have name

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1070 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-05-19 21:00:12 +00:00
parent 83a50465dc
commit 4d38c02932
13 changed files with 552 additions and 297 deletions

2
NEWS
View File

@ -12,6 +12,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.7.0, unknown release date 1.7.0, unknown release date
# Completely refactored HTMLModuleManager, decentralizing safety # Completely refactored HTMLModuleManager, decentralizing safety
information information
# Transform modules changed to Tidy modules, which offer more flexibility
and better modularization
. Unit test for ElementDef created, ElementDef behavior modified to . Unit test for ElementDef created, ElementDef behavior modified to
be more flexible be more flexible
. Added convenience functions for HTMLModule constructors . Added convenience functions for HTMLModule constructors

5
TODO
View File

@ -10,10 +10,9 @@ TODO List
1.7 release [Advanced API] 1.7 release [Advanced API]
# Complete advanced API, and fully document it # Complete advanced API, and fully document it
- Add framework for unsafe attributes - Add framework for unsafe attributes
- Wire in modes (configuration, module and manager wise) - Document Tidy modules
- Reorganize configuration directives - Reorganize configuration directives
- Determine handling for complex/cascading configuration directives - Set up doctype object inside configuration object
- Reorganize transformation modules
- Set up anonymous module management by HTMLDefinition - Set up anonymous module management by HTMLDefinition
# Implement HTMLDefinition caching using serialize # Implement HTMLDefinition caching using serialize
# Implement all deprecated tags and attributes # Implement all deprecated tags and attributes

View File

@ -1,5 +1,7 @@
<?php <?php
require_once 'HTMLPurifier/HTMLModule.php';
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{ {
var $name = 'CommonAttributes'; var $name = 'CommonAttributes';
@ -12,9 +14,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
'id' => 'ID', 'id' => 'ID',
'title' => 'CDATA', 'title' => 'CDATA',
), ),
'Lang' => array( 'Lang' => array(),
'xml:lang' => 'LanguageCode',
),
'I18N' => array( 'I18N' => array(
0 => array('Lang'), // proprietary, for xml:lang/lang 0 => array('Lang'), // proprietary, for xml:lang/lang
), ),

View File

@ -0,0 +1,16 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'NonXMLCommonAttributes';
var $attr_collections = array(
'Lang' => array(
'lang' => 'LanguageCode',
)
);
}
?>

View File

@ -25,13 +25,13 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'TidyAdd', array(), 'list', ' 'HTML', 'TidyAdd', array(), 'lookup', '
Fixes to add to the default set of Tidy fixes as per your level. This Fixes to add to the default set of Tidy fixes as per your level. This
directive has been available since 1.7.0. directive has been available since 1.7.0.
' ); ' );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'TidyRemove', array(), 'list', ' 'HTML', 'TidyRemove', array(), 'lookup', '
Fixes to remove from the default set of Tidy fixes as per your level. This Fixes to remove from the default set of Tidy fixes as per your level. This
directive has been available since 1.7.0. directive has been available since 1.7.0.
' ); ' );
@ -49,6 +49,11 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
*/ */
var $levels = array(0 => 'none', 'light', 'medium', 'heavy'); var $levels = array(0 => 'none', 'light', 'medium', 'heavy');
/**
* Default level to place all fixes in. Disabled by default
*/
var $defaultLevel = null;
/** /**
* Lists of fixes used by getFixesForLevel(). Format is: * Lists of fixes used by getFixesForLevel(). Format is:
* HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
@ -66,20 +71,32 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
* subtracted fix has no effect. * subtracted fix has no effect.
*/ */
function construct($config) { function construct($config) {
// create fixes, initialize fixesForLevel
$fixes = $this->makeFixes();
$this->makeFixesForLevel($fixes);
// figure out which fixes to use
$level = $config->get('HTML', 'TidyLevel'); $level = $config->get('HTML', 'TidyLevel');
$fixes = $this->getFixesForLevel($level); $fixes_lookup = $this->getFixesForLevel($level);
// get custom fix declarations: these need namespace processing
$add_fixes = $config->get('HTML', 'TidyAdd'); $add_fixes = $config->get('HTML', 'TidyAdd');
foreach ($add_fixes as $fix) {
$fixes[$fix] = true;
}
$remove_fixes = $config->get('HTML', 'TidyRemove'); $remove_fixes = $config->get('HTML', 'TidyRemove');
foreach ($remove_fixes as $fix) {
unset($fixes[$fix]); foreach ($fixes as $name => $fix) {
// needs to be refactored a little to implement globbing
if (
isset($remove_fixes[$name]) ||
(!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
) {
unset($fixes[$name]);
}
} }
// populate this module with necessary fixes
$this->populate($fixes); $this->populate($fixes);
} }
/** /**
@ -113,13 +130,111 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
return $ret; return $ret;
} }
/**
* Dynamically populates the $fixesForLevel member variable using
* the fixes array. It may be custom overloaded, used in conjunction
* with $defaultLevel, or not used at all.
*/
function makeFixesForLevel($fixes) {
if (!isset($this->defaultLevel)) return;
if (!isset($this->fixesForLevel[$this->defaultLevel])) {
trigger_error(
'Default level ' . $this->defaultLevel . ' does not exist',
E_USER_ERROR
);
return;
}
$this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
}
/** /**
* Populates the module with transforms and other special-case code * Populates the module with transforms and other special-case code
* based on a list of fixes passed to it * based on a list of fixes passed to it
* @abstract
* @param $lookup Lookup table of fixes to activate * @param $lookup Lookup table of fixes to activate
*/ */
function populate($lookup) {} function populate($fixes) {
foreach ($fixes as $name => $fix) {
// determine what the fix is for
list($type, $params) = $this->getFixType($name);
switch ($type) {
case 'attr_transform_pre':
case 'attr_transform_post':
$attr = $params['attr'];
if (isset($params['element'])) {
$element = $params['element'];
if (empty($this->info[$element])) {
$e =& $this->addBlankElement($element);
} else {
$e =& $this->info[$element];
}
} else {
$type = "info_$type";
$e =& $this;
}
$f =& $e->$type;
$f[$attr] = $fix;
break;
case 'tag_transform':
$this->info_tag_transform[$params['element']] = $fix;
break;
case 'child':
case 'content_model_type':
$element = $params['element'];
if (empty($this->info[$element])) {
$e =& $this->addBlankElement($element);
} else {
$e =& $this->info[$element];
}
$e->$type = $fix;
break;
default:
trigger_error("Fix type $type not supported", E_USER_ERROR);
break;
}
}
}
/**
* Parses a fix name and determines what kind of fix it is, as well
* as other information defined by the fix
* @param $name String name of fix
* @return array(string $fix_type, array $fix_parameters)
* @note $fix_parameters is type dependant, see populate() for usage
* of these parameters
*/
function getFixType($name) {
// parse it
$property = $attr = null;
if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
// figure out the parameters
$params = array();
if ($name !== '') $params['element'] = $name;
if (!is_null($attr)) $params['attr'] = $attr;
// special case: attribute transform
if (!is_null($attr)) {
if (is_null($property)) $property = 'pre';
$type = 'attr_transform_' . $property;
return array($type, $params);
}
// special case: tag transform
if (is_null($property)) {
return array('tag_transform', $params);
}
return array($property, $params);
}
/**
* Defines all fixes the module will perform in a compact
* associative array of fix name to fix implementation.
* @abstract
*/
function makeFixes() {}
} }

View File

@ -0,0 +1,20 @@
<?php
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
class HTMLPurifier_HTMLModule_Tidy_XHTML extends
HTMLPurifier_HTMLModule_Tidy
{
var $name = 'Tidy_XHTML';
var $defaultLevel = 'medium';
function makeFixes() {
$r = array();
$r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
return $r;
}
}
?>

View File

@ -0,0 +1,179 @@
<?php
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
HTMLPurifier_HTMLModule_Tidy
{
function makeFixes() {
$r = array();
// == deprecated tag transforms ===================================
$r['font'] = new HTMLPurifier_TagTransform_Font();
$r['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$r['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$r['center'] = new HTMLPurifier_TagTransform_Center();
// == deprecated attribute transforms =============================
$r['caption@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we're following IE's behavior, not Firefox's, due
// to the fact that no one supports caption-side:right,
// W3C included (with CSS 2.1). This is a slightly
// unreasonable attribute!
'left' => 'text-align:left;',
'right' => 'text-align:right;',
'top' => 'caption-side:top;',
'bottom' => 'caption-side:bottom;' // not supported by IE
));
// @align for img -------------------------------------------------
$r['img@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'right' => 'float:right;',
'top' => 'vertical-align:top;',
'middle' => 'vertical-align:middle;',
'bottom' => 'vertical-align:baseline;',
));
// @align for table -----------------------------------------------
$r['table@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'center' => 'margin-left:auto;margin-right:auto;',
'right' => 'float:right;'
));
// @align for hr -----------------------------------------------
$r['hr@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we use both text-align and margin because these work
// for different browsers (IE and Firefox, respectively)
// and the melange makes for a pretty cross-compatible
// solution
'left' => 'margin-left:0;margin-right:auto;text-align:left;',
'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
'right' => 'margin-left:auto;margin-right:0;text-align:right;'
));
// @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
// {{{
$align_lookup = array();
$align_values = array('left', 'right', 'center', 'justify');
foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
// }}}
$r['h1@align'] =
$r['h2@align'] =
$r['h3@align'] =
$r['h4@align'] =
$r['h5@align'] =
$r['h6@align'] =
$r['p@align'] =
$r['div@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
// @bgcolor for table, tr, td, th ---------------------------------
$r['table@bgcolor'] =
$r['td@bgcolor'] =
$r['th@bgcolor'] =
new HTMLPurifier_AttrTransform_BgColor();
// @border for img ------------------------------------------------
$r['img@border'] = new HTMLPurifier_AttrTransform_Border();
// @clear for br --------------------------------------------------
$r['br@clear'] =
new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
'left' => 'clear:left;',
'right' => 'clear:right;',
'all' => 'clear:both;',
'none' => 'clear:none;',
));
// @height for td, th ---------------------------------------------
$r['td@height'] =
$r['th@height'] =
new HTMLPurifier_AttrTransform_Length('height');
// @hspace for img ------------------------------------------------
$r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
// @name for img, a -----------------------------------------------
$r['img@name'] =
$r['a@name'] = new HTMLPurifier_AttrTransform_Name();
// @noshade for hr ------------------------------------------------
// this transformation is not precise but often good enough.
// different browsers use different styles to designate noshade
$r['hr@noshade'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'noshade',
'color:#808080;background-color:#808080;border:0;'
);
// @nowrap for td, th ---------------------------------------------
$r['td@nowrap'] =
$r['th@nowrap'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'nowrap',
'white-space:nowrap;'
);
// @size for hr --------------------------------------------------
$r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
// @type for li, ol, ul -------------------------------------------
// {{{
$ul_types = array(
'disc' => 'list-style-type:disc;',
'square' => 'list-style-type:square;',
'circle' => 'list-style-type:circle;'
);
$ol_types = array(
'1' => 'list-style-type:decimal;',
'i' => 'list-style-type:lower-roman;',
'I' => 'list-style-type:upper-roman;',
'a' => 'list-style-type:lower-alpha;',
'A' => 'list-style-type:upper-alpha;'
);
$li_types = $ul_types + $ol_types;
// }}}
$r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
$r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
$r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
// @vspace for img ------------------------------------------------
$r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
// @width for hr, td, th ------------------------------------------
$r['td@width'] =
$r['th@width'] =
$r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
return $r;
}
}
class HTMLPurifier_HTMLModule_Tidy_Transitional extends
HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
{
var $name = 'Tidy_Transitional';
var $defaultLevel = 'light'; // switch this to heavy once we implement legacy fully
}
class HTMLPurifier_HTMLModule_Tidy_Strict extends
HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
{
var $name = 'Tidy_Strict';
var $defaultLevel = 'light';
}
?>

View File

@ -0,0 +1,27 @@
<?php
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
HTMLPurifier_HTMLModule_Tidy
{
var $name = 'Tidy_XHTMLStrict';
var $defaultLevel = 'light';
function makeFixes() {
$r = array();
$r['blockquote#child'] = false;
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}
?>

View File

@ -1,216 +0,0 @@
<?php
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require_once 'HTMLPurifier/TagTransform/Simple.php';
require_once 'HTMLPurifier/TagTransform/Center.php';
require_once 'HTMLPurifier/TagTransform/Font.php';
require_once 'HTMLPurifier/AttrTransform/Lang.php';
require_once 'HTMLPurifier/AttrTransform/BgColor.php';
require_once 'HTMLPurifier/AttrTransform/BoolToCSS.php';
require_once 'HTMLPurifier/AttrTransform/Border.php';
require_once 'HTMLPurifier/AttrTransform/Name.php';
require_once 'HTMLPurifier/AttrTransform/Length.php';
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
/**
* Proprietary module that transforms deprecated elements into Strict
* HTML (see HTML 4.01 and XHTML 1.0) when possible.
*/
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
{
var $name = 'TransformToStrict';
var $attr_collections = array(
'Lang' => array()
);
function HTMLPurifier_HTMLModule_TransformToStrict() {
// behavior with transformations when there's another CSS property
// working on it is interesting: the CSS will *always* override
// the deprecated attribute, whereas an inline CSS declaration will
// override the corresponding declaration in, say, an external
// stylesheet. This behavior won't affect most people, but it
// does represent an operational difference we CANNOT fix.
// == deprecated tag transforms ===================================
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
// == miscellaneous transforms ====================================
// initialize blank element definitions
$elements = array( // alphabetically sorted
'a', 'blockquote', 'br', 'caption', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'hr', 'img', 'li', 'ol', 'p', 'table', 'td',
'th', 'tr', 'ul' );
foreach ($elements as $name) $this->addBlankElement($name);
// this should not be applied to XHTML 1.0 Transitional, ONLY
// XHTML 1.0 Strict. We may need to put this in another class.
$this->info['blockquote']->content_model_type = 'strictblockquote';
$this->info['blockquote']->child = false;
// == deprecated attribute transforms =============================
// this segment will probably need to modularized in some fashion
// in order to allow for different "levels" of transformation
// @align for caption ---------------------------------------------
$this->info['caption']->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we're following IE's behavior, not Firefox's, due
// to the fact that no one supports caption-side:right,
// W3C included (with CSS 2.1). This is a slightly
// unreasonable attribute!
'left' => 'text-align:left;',
'right' => 'text-align:right;',
'top' => 'caption-side:top;',
'bottom' => 'caption-side:bottom;' // not supported by IE
));
// @align for img -------------------------------------------------
$this->info['img']->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'right' => 'float:right;',
'top' => 'vertical-align:top;',
'middle' => 'vertical-align:middle;',
'bottom' => 'vertical-align:baseline;',
));
// @align for table -----------------------------------------------
$this->info['table']->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'center' => 'margin-left:auto;margin-right:auto;',
'right' => 'float:right;'
));
// @align for hr -----------------------------------------------
$this->info['hr']->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we use both text-align and margin because these work
// for different browsers (IE and Firefox, respectively)
// and the melange makes for a pretty cross-compatible
// solution
'left' => 'margin-left:0;margin-right:auto;text-align:left;',
'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
'right' => 'margin-left:auto;margin-right:0;text-align:right;'
));
// @align for h1, h2, h3, h4, h5, h6, p ---------------------------
$align_lookup = array();
$align_values = array('left', 'right', 'center', 'justify');
foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
$this->info['h1']->attr_transform_pre['align'] =
$this->info['h2']->attr_transform_pre['align'] =
$this->info['h3']->attr_transform_pre['align'] =
$this->info['h4']->attr_transform_pre['align'] =
$this->info['h5']->attr_transform_pre['align'] =
$this->info['h6']->attr_transform_pre['align'] =
$this->info['p'] ->attr_transform_pre['align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
// @bgcolor for table, tr, td, th ---------------------------------
$this->info['table']->attr_transform_pre['bgcolor'] =
$this->info['tr']->attr_transform_pre['bgcolor'] =
$this->info['td']->attr_transform_pre['bgcolor'] =
$this->info['th']->attr_transform_pre['bgcolor'] =
new HTMLPurifier_AttrTransform_BgColor();
// @border for img ------------------------------------------------
$this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
// @clear for br --------------------------------------------------
$this->info['br']->attr_transform_pre['clear'] =
new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
'left' => 'clear:left;',
'right' => 'clear:right;',
'all' => 'clear:both;',
'none' => 'clear:none;',
));
// @height for td, th ---------------------------------------------
$this->info['td']->attr_transform_pre['height'] =
$this->info['th']->attr_transform_pre['height'] =
new HTMLPurifier_AttrTransform_Length('height');
// @hspace for img ------------------------------------------------
$this->info['img']->attr_transform_pre['hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
// @lang ----------------------------------------------------------
// this is overridden in TransformToXHTML11
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
// @name for img, a -----------------------------------------------
$this->info['img']->attr_transform_pre['name'] =
$this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
// @noshade for hr ------------------------------------------------
// this transformation is not precise but often good enough.
// different browsers use different styles to designate noshade
$this->info['hr']->attr_transform_pre['noshade'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'noshade',
'color:#808080;background-color:#808080;border:0;'
);
// @nowrap for td, th ---------------------------------------------
$this->info['td']->attr_transform_pre['nowrap'] =
$this->info['th']->attr_transform_pre['nowrap'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'nowrap',
'white-space:nowrap;'
);
// @size for hr --------------------------------------------------
$this->info['hr']->attr_transform_pre['size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
// @type for li, ol, ul -------------------------------------------
$ul_types = array(
'disc' => 'list-style-type:disc;',
'square' => 'list-style-type:square;',
'circle' => 'list-style-type:circle;'
);
$ol_types = array(
'1' => 'list-style-type:decimal;',
'i' => 'list-style-type:lower-roman;',
'I' => 'list-style-type:upper-roman;',
'a' => 'list-style-type:lower-alpha;',
'A' => 'list-style-type:upper-alpha;'
);
$li_types = $ul_types + $ol_types;
$this->info['ul']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
$this->info['ol']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
$this->info['li']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
// @vspace for img ------------------------------------------------
$this->info['img']->attr_transform_pre['vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
// @width for hr, td, th ------------------------------------------
$this->info['td']->attr_transform_pre['width'] =
$this->info['th']->attr_transform_pre['width'] =
$this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}
?>

View File

@ -1,36 +0,0 @@
<?php
require_once 'HTMLPurifier/AttrTransform/Lang.php';
/**
* Proprietary module that transforms XHTML 1.0 deprecated aspects into
* XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
* HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
* (otherwise, elements that were deprecated from Transitional to Strict
* will not be transformed).
*
* XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
* although they may not be as friendly to legacy browsers.
*/
class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
{
var $name = 'TransformToXHTML11';
var $attr_collections = array(
'Lang' => array(
'lang' => false // remove it
)
);
var $info_attr_transform_post = array(
'lang' => false // remove it
);
function HTMLPurifier_HTMLModule_TransformToXHTML11() {
$this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang();
}
}
?>

View File

@ -0,0 +1,16 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'XMLCommonAttributes';
var $attr_collections = array(
'Lang' => array(
'xml:lang' => 'LanguageCode',
)
);
}
?>

View File

@ -26,10 +26,14 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once 'HTMLPurifier/HTMLModule/Legacy.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php';
require_once 'HTMLPurifier/HTMLModule/Target.php'; require_once 'HTMLPurifier/HTMLModule/Target.php';
require_once 'HTMLPurifier/HTMLModule/Scripting.php'; require_once 'HTMLPurifier/HTMLModule/Scripting.php';
require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
// proprietary modules // tidy modules
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'Doctype', null, 'string/null', 'HTML', 'Doctype', null, 'string/null',
@ -113,40 +117,37 @@ class HTMLPurifier_HTMLModuleManager
'StyleAttribute', 'Scripting' 'StyleAttribute', 'Scripting'
); );
$transitional = array('Legacy', 'Target'); $transitional = array('Legacy', 'Target');
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
$this->doctypes->register( $this->doctypes->register(
'HTML 4.01 Transitional', false, 'HTML 4.01 Transitional', false,
array_merge($common, $transitional), array_merge($common, $transitional, $non_xml),
array('TransformToStrict') array('Tidy_Transitional')
// Tidy: Transitional
); );
$this->doctypes->register( $this->doctypes->register(
'HTML 4.01 Strict', false, 'HTML 4.01 Strict', false,
array_merge($common), array_merge($common, $non_xml),
array('TransformToStrict') array('Tidy_Strict')
// Tidy: Strict
); );
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.0 Transitional', true, 'XHTML 1.0 Transitional', true,
array_merge($common, $transitional), array_merge($common, $transitional, $xml, $non_xml),
array('TransformToStrict') array('Tidy_Transitional', 'Tidy_XHTML')
// Tidy: Transitional, XHTML
); );
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.0 Strict', true, 'XHTML 1.0 Strict', true,
array_merge($common), array_merge($common, $xml, $non_xml),
array('TransformToStrict') array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict')
// Tidy: Strict, XHTML
); );
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.1', true, 'XHTML 1.1', true,
array_merge($common), array_merge($common, $xml),
array('TransformToStrict', 'TransformToXHTML11') array('Tidy_Strict', 'Tidy_XHTML') // Tidy_XHTML1_1
// Tidy: Strict, XHTML1_1
); );
} }
@ -194,6 +195,10 @@ class HTMLPurifier_HTMLModuleManager
} }
$module = new $module(); $module = new $module();
} }
if (empty($module->name)) {
trigger_error('Module instance of ' . get_class($module) . ' must have name');
return;
}
$this->registeredModules[$module->name] = $module; $this->registeredModules[$module->name] = $module;
} }
@ -257,7 +262,9 @@ class HTMLPurifier_HTMLModuleManager
foreach ($doctype->tidyModules as $module) { foreach ($doctype->tidyModules as $module) {
$this->processModule($module); $this->processModule($module);
// FIXME!!! initialize the tidy modules here if (method_exists($this->modules[$module], 'construct')) {
$this->modules[$module]->construct($config);
}
} }
// setup lookup table based on all valid modules // setup lookup table based on all valid modules

View File

@ -5,7 +5,7 @@ require_once 'HTMLPurifier/HTMLModule/Tidy.php';
Mock::generatePartial( Mock::generatePartial(
'HTMLPurifier_HTMLModule_Tidy', 'HTMLPurifier_HTMLModule_Tidy',
'HTMLPurifier_HTMLModule_Tidy_TestForConstruct', 'HTMLPurifier_HTMLModule_Tidy_TestForConstruct',
array('populate') array('makeFixes', 'makeFixesForLevel', 'populate')
); );
class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
@ -49,7 +49,17 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
$module->fixesForLevel['light'] = array('light-fix-1', 'light-fix-2'); $module->fixesForLevel['light'] = array('light-fix-1', 'light-fix-2');
$module->fixesForLevel['medium'] = array('medium-fix-1', 'medium-fix-2'); $module->fixesForLevel['medium'] = array('medium-fix-1', 'medium-fix-2');
$module->fixesForLevel['heavy'] = array('heavy-fix-1', 'heavy-fix-2'); $module->fixesForLevel['heavy'] = array('heavy-fix-1', 'heavy-fix-2');
// $module->HTMLPurifier_HTMLModule_Tidy(); // constructor
$j = 0;
$fixes = array(
'light-fix-1' => $lf1 = $j++,
'light-fix-2' => $lf2 = $j++,
'medium-fix-1' => $mf1 = $j++,
'medium-fix-2' => $mf2 = $j++,
'heavy-fix-1' => $hf1 = $j++,
'heavy-fix-2' => $hf2 = $j++
);
$module->setReturnValue('makeFixes', $fixes);
$config = HTMLPurifier_Config::create(array( $config = HTMLPurifier_Config::create(array(
'HTML.TidyLevel' => 'none' 'HTML.TidyLevel' => 'none'
@ -62,13 +72,23 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
$config = HTMLPurifier_Config::create(array( $config = HTMLPurifier_Config::create(array(
'HTML.TidyLevel' => 'light' 'HTML.TidyLevel' => 'light'
)); ));
$module->expectAt($i++, 'populate', array($module->getFixesForLevel('light'))); $module->expectAt($i++, 'populate', array(array(
'light-fix-1' => $lf1,
'light-fix-2' => $lf2
)));
$module->construct($config); $module->construct($config);
$config = HTMLPurifier_Config::create(array( $config = HTMLPurifier_Config::create(array(
'HTML.TidyLevel' => 'heavy' 'HTML.TidyLevel' => 'heavy'
)); ));
$module->expectAt($i++, 'populate', array($module->getFixesForLevel('heavy'))); $module->expectAt($i++, 'populate', array(array(
'light-fix-1' => $lf1,
'light-fix-2' => $lf2,
'medium-fix-1' => $mf1,
'medium-fix-2' => $mf2,
'heavy-fix-1' => $hf1,
'heavy-fix-2' => $hf2
)));
$module->construct($config); $module->construct($config);
// fine grained tuning // fine grained tuning
@ -78,8 +98,8 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
'HTML.TidyAdd' => array('light-fix-1', 'medium-fix-1') 'HTML.TidyAdd' => array('light-fix-1', 'medium-fix-1')
)); ));
$module->expectAt($i++, 'populate', array(array( $module->expectAt($i++, 'populate', array(array(
'light-fix-1' => true, 'light-fix-1' => $lf1,
'medium-fix-1' => true 'medium-fix-1' => $mf1
))); )));
$module->construct($config); $module->construct($config);
@ -88,8 +108,8 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
'HTML.TidyRemove' => array('light-fix-1', 'medium-fix-1') 'HTML.TidyRemove' => array('light-fix-1', 'medium-fix-1')
)); ));
$module->expectAt($i++, 'populate', array(array( $module->expectAt($i++, 'populate', array(array(
'light-fix-2' => true, 'light-fix-2' => $lf2,
'medium-fix-2' => true 'medium-fix-2' => $mf2
))); )));
$module->construct($config); $module->construct($config);
@ -99,6 +119,112 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase
} }
function test_makeFixesForLevel() {
$module = new HTMLPurifier_HTMLModule_Tidy();
$module->defaultLevel = 'heavy';
$module->makeFixesForLevel(array(
'fix-1' => 0,
'fix-2' => 1,
'fix-3' => 2
));
$this->assertIdentical($module->fixesForLevel['heavy'], array('fix-1', 'fix-2', 'fix-3'));
$this->assertIdentical($module->fixesForLevel['medium'], array());
$this->assertIdentical($module->fixesForLevel['light'], array());
}
function test_makeFixesForLevel_undefinedLevel() {
$module = new HTMLPurifier_HTMLModule_Tidy();
$module->defaultLevel = 'bananas';
$this->expectError('Default level bananas does not exist');
$module->makeFixesForLevel(array(
'fix-1' => 0
));
}
function test_getFixType() {
// syntax needs documenting
$module = new HTMLPurifier_HTMLModule_Tidy();
$this->assertIdentical(
$module->getFixType('a'),
array('tag_transform', array('element' => 'a'))
);
$this->assertIdentical(
$module->getFixType('a@href'),
$reuse = array('attr_transform_pre', array('element' => 'a', 'attr' => 'href'))
);
$this->assertIdentical(
$module->getFixType('a@href#pre'),
$reuse
);
$this->assertIdentical(
$module->getFixType('a@href#post'),
array('attr_transform_post', array('element' => 'a', 'attr' => 'href'))
);
$this->assertIdentical(
$module->getFixType('xml:foo@xml:bar'),
array('attr_transform_pre', array('element' => 'xml:foo', 'attr' => 'xml:bar'))
);
$this->assertIdentical(
$module->getFixType('blockquote#child'),
array('child', array('element' => 'blockquote'))
);
$this->assertIdentical(
$module->getFixType('@lang'),
array('attr_transform_pre', array('attr' => 'lang'))
);
$this->assertIdentical(
$module->getFixType('@lang#post'),
array('attr_transform_post', array('attr' => 'lang'))
);
}
function test_populate() {
$i = 0;
$module = new HTMLPurifier_HTMLModule_Tidy();
$module->populate(array(
'element' => $element = $i++,
'element@attr' => $attr = $i++,
'element@attr#post' => $attr_post = $i++,
'element#child' => $child = $i++,
'element#content_model_type' => $content_model_type = $i++,
'@attr' => $global_attr = $i++,
'@attr#post' => $global_attr_post = $i++
));
$module2 = new HTMLPurifier_HTMLModule_Tidy();
$e =& $module2->addBlankElement('element');
$e->attr_transform_pre['attr'] = $attr;
$e->attr_transform_post['attr'] = $attr_post;
$e->child = $child;
$e->content_model_type = $content_model_type;
$module2->info_tag_transform['element'] = $element;
$module2->info_attr_transform_pre['attr'] = $global_attr;
$module2->info_attr_transform_post['attr'] = $global_attr_post;
$this->assertEqual($module, $module2);
}
} }
?> ?>