diff --git a/library/HTMLPurifier/HTMLModule/Tidy.php b/library/HTMLPurifier/HTMLModule/Tidy.php new file mode 100644 index 00000000..1b29f826 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy.php @@ -0,0 +1,126 @@ +General level of cleanliness the Tidy module should enforce. +There are four allowed values:

+
+
none
+
No extra tidying should be done
+
light
+
Only fix elements that would be discarded otherwise due to + lack of support in doctype
+
medium
+
Enforce best practices
+
heavy
+
Transform all deprecated elements and attributes to standards + compliant equivalents
+
+

This directive has been available since 1.7.0

+' ); +HTMLPurifier_ConfigSchema::defineAllowedValues( + 'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy') +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'TidyAdd', array(), 'list', ' +Fixes to add to the default set of Tidy fixes as per your level. This +directive has been available since 1.7.0. +' ); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'TidyRemove', array(), 'list', ' +Fixes to remove from the default set of Tidy fixes as per your level. This +directive has been available since 1.7.0. +' ); + +/** + * Abstract class for a set of proprietary modules that clean up (tidy) + * poorly written HTML. + */ +class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule +{ + + /** + * List of supported levels. Index zero is a special case "no fixes" + * level. + */ + var $levels = array(0 => 'none', 'light', 'medium', 'heavy'); + + /** + * Lists of fixes used by getFixesForLevel(). Format is: + * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); + */ + var $fixesForLevel = array( + 'light' => array(), + 'medium' => array(), + 'heavy' => array() + ); + + /** + * Lazy load constructs the module by determining the necessary + * fixes to create and then delegating to the populate() function. + * @todo Wildcard matching and error reporting when an added or + * subtracted fix has no effect. + */ + function construct($config) { + $level = $config->get('HTML', 'TidyLevel'); + $fixes = $this->getFixesForLevel($level); + + $add_fixes = $config->get('HTML', 'TidyAdd'); + foreach ($add_fixes as $fix) { + $fixes[$fix] = true; + } + + $remove_fixes = $config->get('HTML', 'TidyRemove'); + foreach ($remove_fixes as $fix) { + unset($fixes[$fix]); + } + + $this->populate($fixes); + } + + /** + * Retrieves all fixes per a level, returning fixes for that specific + * level as well as all levels below it. + * @param $level String level identifier, see $levels for valid values + * @return Lookup up table of fixes + */ + function getFixesForLevel($level) { + if ($level == $this->levels[0]) { + return array(); + } + $activated_levels = array(); + for ($i = 1, $c = count($this->levels); $i < $c; $i++) { + $activated_levels[] = $this->levels[$i]; + if ($this->levels[$i] == $level) break; + } + if ($i == $c) { + trigger_error( + 'Tidy level ' . htmlspecialchars($level) . ' not recognized', + E_USER_WARNING + ); + return array(); + } + $ret = array(); + foreach ($activated_levels as $level) { + foreach ($this->fixesForLevel[$level] as $fix) { + $ret[$fix] = true; + } + } + return $ret; + } + + /** + * Populates the module with transforms and other special-case code + * based on a list of fixes passed to it + * @abstract + * @param $lookup Lookup table of fixes to activate + */ + function populate($lookup) {} + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/HTMLModule/TidyTest.php b/tests/HTMLPurifier/HTMLModule/TidyTest.php new file mode 100644 index 00000000..0b9bfd11 --- /dev/null +++ b/tests/HTMLPurifier/HTMLModule/TidyTest.php @@ -0,0 +1,104 @@ +fixesForLevel['light'][] = 'light-fix'; + $module->fixesForLevel['medium'][] = 'medium-fix'; + $module->fixesForLevel['heavy'][] = 'heavy-fix'; + + $this->assertIdentical( + array(), + $module->getFixesForLevel('none') + ); + $this->assertIdentical( + array('light-fix' => true), + $module->getFixesForLevel('light') + ); + $this->assertIdentical( + array('light-fix' => true, 'medium-fix' => true), + $module->getFixesForLevel('medium') + ); + $this->assertIdentical( + array('light-fix' => true, 'medium-fix' => true, 'heavy-fix' => true), + $module->getFixesForLevel('heavy') + ); + + $this->expectError('Tidy level turbo not recognized'); + $module->getFixesForLevel('turbo'); + + } + + function test_construct() { + + $i = 0; // counter, helps us isolate expectations + + // initialize partial mock + $module = new HTMLPurifier_HTMLModule_Tidy_TestForConstruct($this); + $module->fixesForLevel['light'] = array('light-fix-1', 'light-fix-2'); + $module->fixesForLevel['medium'] = array('medium-fix-1', 'medium-fix-2'); + $module->fixesForLevel['heavy'] = array('heavy-fix-1', 'heavy-fix-2'); + // $module->HTMLPurifier_HTMLModule_Tidy(); // constructor + + $config = HTMLPurifier_Config::create(array( + 'HTML.TidyLevel' => 'none' + )); + $module->expectAt($i++, 'populate', array(array())); + $module->construct($config); + + // basic levels + + $config = HTMLPurifier_Config::create(array( + 'HTML.TidyLevel' => 'light' + )); + $module->expectAt($i++, 'populate', array($module->getFixesForLevel('light'))); + $module->construct($config); + + $config = HTMLPurifier_Config::create(array( + 'HTML.TidyLevel' => 'heavy' + )); + $module->expectAt($i++, 'populate', array($module->getFixesForLevel('heavy'))); + $module->construct($config); + + // fine grained tuning + + $config = HTMLPurifier_Config::create(array( + 'HTML.TidyLevel' => 'none', + 'HTML.TidyAdd' => array('light-fix-1', 'medium-fix-1') + )); + $module->expectAt($i++, 'populate', array(array( + 'light-fix-1' => true, + 'medium-fix-1' => true + ))); + $module->construct($config); + + $config = HTMLPurifier_Config::create(array( + 'HTML.TidyLevel' => 'medium', + 'HTML.TidyRemove' => array('light-fix-1', 'medium-fix-1') + )); + $module->expectAt($i++, 'populate', array(array( + 'light-fix-2' => true, + 'medium-fix-2' => true + ))); + $module->construct($config); + + // done + + $module->tally(); + + } + +} + +?> \ No newline at end of file diff --git a/tests/test_files.php b/tests/test_files.php index 81df7851..c9cab78a 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -69,6 +69,7 @@ $test_files[] = 'HTMLModule/HypertextTest.php'; $test_files[] = 'HTMLModule/ImageTest.php'; $test_files[] = 'HTMLModule/LegacyTest.php'; $test_files[] = 'HTMLModule/ScriptingTest.php'; +$test_files[] = 'HTMLModule/TidyTest.php'; $test_files[] = 'IDAccumulatorTest.php'; $test_files[] = 'LanguageFactoryTest.php'; $test_files[] = 'LanguageTest.php';