diff --git a/NEWS b/NEWS index 4a494326..2bd97555 100644 --- a/NEWS +++ b/NEWS @@ -11,12 +11,22 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.4.0, unknown release date ! Implemented list-style-image, URIs now allowed in list-style -! Implemented background-image, background-repeat and background-attachment - CSS properties. background shorthand property HAS NOT been extended - to allow these, and background-position IS NOT implemented yet. +! Implemented background-image, background-repeat, background-attachment + and background-position CSS properties. Shorthand property background + supports all of these properties. ! Configuration documentation looks nicer -! Added smoketest 'all.php', which loads all other smoketests via frames +! Added %Core.EscapeNonASCIICharacters to workaround loss of Unicode + characters while %Core.Encoding is set to a non-UTF-8 encoding. +! Support for configuration directive aliases added +! Config object can now be instantiated from ini files +! YouTube preservation code added to the core, with two lines of code + you can add it as a filter to your code. See smoketests/preserveYouTube.php + for sample code. +- Replaced version check with functionality check for DOM (thanks Stephen + Khoo) +. Added smoketest 'all.php', which loads all other smoketests via frames . Implemented AttrDef_CSSURI for url(http://google.com) style declarations +. Added convenient single test selector form on test runner 1.3.3, unknown release date, likely to be dropped ! Moved SLOW to docs/enduser-slow.html and added code examples diff --git a/TODO b/TODO index 6a3bc84b..aa625d3a 100644 --- a/TODO +++ b/TODO @@ -7,19 +7,14 @@ TODO List ? At-risk ========================== -1.4 release - # More extensive URI filtering schemes (see docs/proposal-new-directives.txt) - # Allow for background-image and list-style-image (intrinsically tied to above) - # Add hooks for custom behavior (for instance, YouTube preservation) - - Aggressive caching - ? Rich set* methods and config file loaders for HTMLPurifier_Config - ? Configuration profiles: sets of directives that get set with one func call - ? ConfigSchema directive aliases (so we can rename some of them) - ? URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX) - 1.5 release + # Implement all non-essential attribute transforms + # URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX) + # Advanced URI filtering schemes (see docs/proposal-new-directives.txt) # Error logging for filtering/cleanup procedures - Requires I18N facilities to be created first (COMPLEX) + ? Configuration profiles: sets of directives that get set with one func call + - XSS-attempt detection 1.6 release # Add pre-packaged "levels" of cleaning (custom behavior already done) @@ -28,14 +23,30 @@ TODO List specification of elements that, when detected as foreign, trigger removal of children, although unbalanced tags could wreck havoc (or at least delete the rest of the document)). + - Allow specifying global attributes on a tag-by-tag basis in + %HTML.AllowAttributes + ? More user-friendly warnings when %HTML.Allow* attempts to specify a + tag or attribute that is not supported + - Parse TinyMCE whitelist into our %HTML.Allow* whitelists 1.7 release # Additional support for poorly written HTML - - Implement all non-essential attribute transforms (BIG!) - Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!) - Friendly strict handling of
(block ->
) + - Remove redundant tags, ex. Underlined. Implementation notes: + 1. Analyzing which tags to remove duplicants + 2. Ensure attributes are merged into the parent tag + 3. Extend the tag exclusion system to specify whether or not the + contents should be dropped or not (currently, there's code that could do + something like this if it didn't drop the inner text too.) + - Remove tags that don't do anything (no attributes) + - Remove empty inline tags + - Append something to duplicate IDs so they're still usable (impl. note: the + dupe detector would also need to detect the suffix as well) 2.0 release + # Legit token based CSS parsing (will require revamping almost every + AttrDef class) # Formatters for plaintext (COMPLEX) - Auto-paragraphing (be sure to leverage fact that we know when things shouldn't be paragraphed, such as lists and tables). @@ -48,48 +59,32 @@ TODO List - Hooks for adding custom processors to custom namespaced tags and attributes, offer default implementation - Lots of documentation and samples + - Allow tags to be "armored", an internal flag that protects them + from validation and passes them out unharmed - XHTML 1.1 support Ongoing - Lots of profiling, make it faster! - Plugins for major CMSes (COMPLEX) - - Drupal - WordPress - eFiction - more! (look for ones that use WYSIWYGs) Unknown release (on a scratch-an-itch basis) + - Upgrade SimpleTest testing code to newest versions - Fixes for Firefox's inability to handle COL alignment props (Bug 915) - Automatically add non-breaking spaces to empty table cells when empty-cells:show is applied to have compatibility with Internet Explorer - Convert RTL/LTR override characters to tags, or vice versa on demand. Also, enable disabling of directionality - - Append something to duplicate IDs so they're still usable (impl. note: the - dupe detector would also need to detect the suffix as well) - Have 'lang' attribute be checked against official lists - -Encoding workarounds - - Non-lossy dumb alternate character encoding transformations, achieved by - numerically encoding all non-ASCII characters - - Semi-lossy dumb alternate character encoding transformations, achieved by + ? Semi-lossy dumb alternate character encoding transformations, achieved by encoding all characters that have string entity equivalents Requested - - Native content compression, whitespace stripping (don't rely on Tidy, make + ? Native content compression, whitespace stripping (don't rely on Tidy, make sure we don't remove from
 or related tags)
- - Win32 Phalanger C# binaries (?)
- - Remove redundant tags, ex. Underlined. Implementation notes:
-    1. Analyzing which tags to remove duplicants
-    2. Ensure attributes are merged into the parent tag
-    3. Extend the tag exclusion system to specify whether or not the
-    contents should be dropped or not (currently, there's code that could do
-    something like this if it didn't drop the inner text too.)
- - More user-friendly warnings when %HTML.Allow* attempts to specify a
-   tag or attribute that is not supported
- - Allow specifying global attributes on a tag-by-tag basis in
-   %HTML.AllowAttributes
- - Parse TinyMCE whitelist into our %HTML.Allow* whitelists
- - XSS-attempt detection
+ ? Win32 Phalanger C# binaries
 
 Wontfix
  - Non-lossy smart alternate character encoding transformations (unless
diff --git a/configdoc/generate.php b/configdoc/generate.php
index 93328356..14335e98 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -99,6 +99,8 @@ foreach($schema->info as $namespace_name => $namespace_info) {
     
     foreach ($namespace_info as $name => $info) {
         
+        if ($info->class == 'alias') continue;
+        
         $dom_directive = $dom_document->createElement('directive');
         $dom_namespace->appendChild($dom_directive);
         
diff --git a/docs/dev-progress.html b/docs/dev-progress.html
index 78156e6e..be35a9b6 100644
--- a/docs/dev-progress.html
+++ b/docs/dev-progress.html
@@ -60,7 +60,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
 
 Standard
 background-colorCOMPOSITE(<color>, transparent)
-backgroundSHORTHAND
+backgroundSHORTHAND, currently alias for background-color
 borderSHORTHAND, MULTIPLE
 border-colorMULTIPLE
 border-styleMULTIPLE
@@ -145,13 +145,13 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
 background-imageDangerous, target milestone 1.3
 background-attachmentENUM(scroll, fixed),
     Depends on background-image
-background-positionDepends on background-image
+background-positionDepends on background-image
 cursorDangerous but fluffy
 displayENUM(...), Dangerous but interesting;
     will not implement list-item, run-in (Opera only) or table (no IE);
     inline-block has incomplete IE6 support and requires -moz-inline-box
     for Mozilla. Unknown target milestone.
-heightInteresting, why use it? Unknown target milestone.
+heightInteresting, why use it? Unknown target milestone.
 list-style-imageDangerous?
 max-heightNo IE 5/6
 min-height
@@ -231,7 +231,7 @@ Mozilla on inside and needs -moz-outline, no IE support.
 
 
 CSS
-styleAllNot all properties may be implemented, parser is good though.
+styleAllParser is reasonably functional. Status here doesn't count individual properties.
 
 
 
@@ -266,13 +266,13 @@ Mozilla on inside and needs -moz-outline, no IE support.
 alignCAPTIONNear-equiv style 'caption-side', drop left and right
     IMGMargin-left and margin-right = auto or parent div
     TABLE
-    HREquivalent style 'text-align' (IE tested)
+    HRNear-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try margin-right:auto; margin-left:0; for left or margin-right:0; margin-left:auto; for right (optionally replacing 0 with the original margin for that side)
     H1, H2, H3, H4, H5, H6, PEquivalent style 'text-align'
 altIMGRequired, insert image filename if src is present or default invalid image text
-bgcolorTABLEEquivalent style 'background-color' (IE tested)
-    TREquivalent style 'background-color' (IE tested)
+bgcolorTABLEEquivalent style 'background-color'
+    TREquivalent style 'background-color'
     TD, THEquivalent style 'background-color'
-borderIMGEquivalent style 'border-width', only applies when link present
+borderIMGNear equivalent style 'border-width', as it only applies when link present
 clearBRNear-equiv style 'clear', transform 'all' into 'both'
 compactDL, OL, ULBoolean, needs custom CSS class; rarely used anyway
 dirBDORequired, insert ltr (or configuration value) if none
diff --git a/docs/enduser-security.txt b/docs/enduser-security.txt
index 695853d5..e7c9a8ce 100644
--- a/docs/enduser-security.txt
+++ b/docs/enduser-security.txt
@@ -7,6 +7,7 @@ and it's up to you to provide it the proper information and proper context
 to be effective. Things to remember:
 
 1. Character Encoding: UTF-8.
+    This segment will soon be obsoleted by enduser-utf8.html
 Currently, the parser runs under the assumption that it is dealing
 with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
 character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
@@ -27,6 +28,7 @@ this may be configurable in the future.  Do you want standards compliance?
 The doctype is a good place to start.
 
 3. IDs
+    This segment is obsoleted by enduser-id.html
 They need to be unique, but without some knowledge of the
 rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
 needs to be set: we may want to consider disallowing IDs by default to
diff --git a/docs/enduser-youtube.html b/docs/enduser-youtube.html
index c70d7b44..0cfd3587 100644
--- a/docs/enduser-youtube.html
+++ b/docs/enduser-youtube.html
@@ -172,9 +172,10 @@ introduced after it has finished.

Future plans

-

It would probably be a good idea if this code was added to the core -library. Look out for the inclusion of this into the core as a decorator -or the like.

+

This functionality is part of the core library, using the +HTMLPurifier_Filter class to acheive the desired effect. Our implementation +is slightly different, and this page will be updated to reflect that +once 1.4.0 is released.

\ No newline at end of file diff --git a/docs/index.html b/docs/index.html index e5d9d662..ea498147 100644 --- a/docs/index.html +++ b/docs/index.html @@ -31,6 +31,9 @@ information for casual developers using HTML Purifier.

Speeding up HTML Purifier
Explains how to speed up HTML Purifier through caching or inbound filtering.
+
UTF-8
+
Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.
+

Development

diff --git a/docs/proposal-config.txt b/docs/proposal-config.txt index 0ac54c67..d291a3fb 100644 --- a/docs/proposal-config.txt +++ b/docs/proposal-config.txt @@ -14,15 +14,15 @@ Since configuration is dependant on context, internal classes require a configuration object to be passed as a parameter. (They also require a Context object). -In relation to HTMLDefinition and CSSDefinition, there is a special class +In relation to HTMLDefinition and CSSDefinition, there could be a special class of directives that influence the *construction* of the Definition object. -A standard call pattern would look like: +A theoretical call pattern would look like: 1. Client calls Config->getHTMLDefinition() 2. Config calls HTMLDefinition->createNew(this) 3. HTMLDefinition constructs itself with base configuration -4. HTMLDefinition calls Config->get('HTMLDefinition') -5. Config returns array of directives that later construction +4. HTMLDefinition calls Config->get('HTML') +5. Config returns array of directives 6. HTMLDefinition performs operations and changes specified by directives 7. HTMLPurifier returns constructed definition 8. Config caches definition so it doesn't have to be generated again @@ -33,3 +33,7 @@ custom copy, which OVERRIDES all directives. Only the base, vanilla copy is the Singleton, the object actually interfaced with is a operated-upon clone of that object. Also, if an update to the directives would update the definition, you'd have to force reconstruction. + +In practice, the pulling directives from the config object are +solely need-based, and the flex points are littered throughout the +setup() function. Some sort of refactoring is likely in order. diff --git a/docs/proposal-filter-levels.txt b/docs/proposal-filter-levels.txt index 83b3fced..a8306152 100644 --- a/docs/proposal-filter-levels.txt +++ b/docs/proposal-filter-levels.txt @@ -15,7 +15,10 @@ and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier is. The idea, then, is to setup fundamentally different set of definitions, which -can further be customized using simpler configuration options. +can further be customized using simpler configuration options. Alternatively, +they could be implemented as configuration profiles, which simply load +a set of recommended directives to acheive a desired affect (no simpler +config options though). Here are some fuzzy levels you could set: diff --git a/docs/proposal-new-directives.txt b/docs/proposal-new-directives.txt index 75c963e6..2c08ddbb 100644 --- a/docs/proposal-new-directives.txt +++ b/docs/proposal-new-directives.txt @@ -4,8 +4,6 @@ Configuration Ideas Here are some theoretical configuration ideas that we could implement some time. Note the naming convention: %Namespace.Directive -%Attr.IDPrefix - prefix all ids with this - %Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently rewrite the URLs we parse too. However, we can only do it when it's a pure anchor link, so it's not foolproof diff --git a/docs/ref-strictness.txt b/docs/ref-strictness.txt index e383a29b..81907c1e 100644 --- a/docs/ref-strictness.txt +++ b/docs/ref-strictness.txt @@ -2,8 +2,8 @@ Is HTML Purifier Strict or Transitional? A little bit of helpful guidance -Despite the fact that HTML Purifier professes only to support transitional -HTML, it rejects a lot of attributes and elements that are actually, indeed, +Despite the fact that HTML Purifier professes to support both transitional and +strict HTML, it rejects a lot of attributes and elements that are actually, indeed, valid. You can investigate progress.html to find out precisely what we are doing to these *deprecated* attributes. @@ -11,8 +11,8 @@ However, users have found that Strict HTML imposes some quite unreasonable restrictions on certain things. The start and value attributes in ol and li (respectively) perhaps are the most contested. There's is currently no widely supported browser method short of JavaScript that can replace these -two deprecated elements. HTML Purifier does not currently support them, but -it might behoove us to do so while our output is still transitional. +two deprecated elements. It behooves us to allow these deprecated +attributes when the output is transitional. Fortunantely, that's the only real bugger case. The others have near-perfect CSS equivalents, and were presentational anyway. However, the other question @@ -32,5 +32,6 @@ these loose-only constructs in loose mode: The changed child definitions as well as the ul.start li.value are the most compelling reasons why loose should be used. We may want offer disabling , - and by themselves. + and by themselves. We may also want to offer no pre-emptive +deprecated conversions. This all must be unified. diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index 3bd32887..0e1b54ea 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -67,6 +67,7 @@ class HTMLPurifier var $version = '1.3.2'; var $config; + var $filters; var $lexer, $strategy, $generator; @@ -94,6 +95,14 @@ class HTMLPurifier } + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + function addFilter($filter) { + $this->filters[] = $filter; + } + /** * Filters an HTML snippet/document to be XSS-free and standards-compliant. * @@ -111,6 +120,10 @@ class HTMLPurifier $context = new HTMLPurifier_Context(); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + for ($i = 0, $size = count($this->filters); $i < $size; $i++) { + $html = $this->filters[$i]->preFilter($html, $config, $context); + } + // purified HTML $html = $this->generator->generateFromTokens( @@ -126,6 +139,10 @@ class HTMLPurifier $config, $context ); + for ($i = $size - 1; $i >= 0; $i--) { + $html = $this->filters[$i]->postFilter($html, $config, $context); + } + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); $this->context =& $context; return $html; diff --git a/library/HTMLPurifier/AttrDef/Background.php b/library/HTMLPurifier/AttrDef/Background.php new file mode 100644 index 00000000..1db3f88d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/Background.php @@ -0,0 +1,87 @@ +getCSSDefinition(); + $this->info['background-color'] = $def->info['background-color']; + $this->info['background-image'] = $def->info['background-image']; + $this->info['background-repeat'] = $def->info['background-repeat']; + $this->info['background-attachment'] = $def->info['background-attachment']; + $this->info['background-position'] = $def->info['background-position']; + } + + function validate($string, $config, &$context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['color'] = false; + $caught['image'] = false; + $caught['repeat'] = false; + $caught['attachment'] = false; + $caught['position'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($key != 'position') { + if ($status !== false) continue; + $r = $this->info['background-' . $key]->validate($bit, $config, $context); + } else { + $r = $bit; + } + if ($r === false) continue; + if ($key == 'position') { + if ($caught[$key] === false) $caught[$key] = ''; + $caught[$key] .= $r . ' '; + } else { + $caught[$key] = $r; + } + $i++; + break; + } + } + + if (!$i) return false; + if ($caught['position'] !== false) { + $caught['position'] = $this->info['background-position']-> + validate($caught['position'], $config, $context); + } + + $ret = array(); + foreach ($caught as $value) { + if ($value === false) continue; + $ret[] = $value; + } + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/BackgroundPosition.php b/library/HTMLPurifier/AttrDef/BackgroundPosition.php new file mode 100644 index 00000000..0c620b39 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/BackgroundPosition.php @@ -0,0 +1,130 @@ + | | left | center | right + ] + [ + | | top | center | bottom + ]? + ] | + [ // this signifies that the vertical and horizontal adjectives + // can be arbitrarily ordered, however, there can only be two, + // one of each, or none at all + [ + left | center | right + ] || + [ + top | center | bottom + ] + ] + top, left = 0% + center, (none) = 50% + bottom, right = 100% +*/ + +/* QuirksMode says: + keyword + length/percentage must be ordered correctly, as per W3C + + Internet Explorer and Opera, however, support arbitrary ordering. We + should fix it up. + + Minor issue though, not strictly necessary. +*/ + +// control freaks may appreciate the ability to convert these to +// percentages or something, but it's not necessary + +/** + * Validates the value of background-position. + */ +class HTMLPurifier_AttrDef_BackgroundPosition extends HTMLPurifier_AttrDef +{ + + var $length; + var $percentage; + + function HTMLPurifier_AttrDef_BackgroundPosition() { + $this->length = new HTMLPurifier_AttrDef_CSSLength(); + $this->percentage = new HTMLPurifier_AttrDef_Percentage(); + } + + function validate($string, $config, &$context) { + $string = $this->parseCDATA($string); + $bits = explode(' ', $string); + + $keywords = array(); + $keywords['h'] = false; // left, right + $keywords['v'] = false; // top, bottom + $keywords['c'] = false; // center + $measures = array(); + + $i = 0; + + $lookup = array( + 'top' => 'v', + 'bottom' => 'v', + 'left' => 'h', + 'right' => 'h', + 'center' => 'c' + ); + + foreach ($bits as $bit) { + if ($bit === '') continue; + + // test for keyword + $lbit = ctype_lower($bit) ? $bit : strtolower($bit); + if (isset($lookup[$lbit])) { + $status = $lookup[$lbit]; + $keywords[$status] = $lbit; + $i++; + } + + // test for length + $r = $this->length->validate($bit, $config, &$context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + // test for percentage + $r = $this->percentage->validate($bit, $config, &$context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + } + + if (!$i) return false; // no valid values were caught + + + $ret = array(); + + // first keyword + if ($keywords['h']) $ret[] = $keywords['h']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) { + $ret[] = $keywords['c']; + $keywords['c'] = false; // prevent re-use: center = center center + } + + if ($keywords['v']) $ret[] = $keywords['v']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) $ret[] = $keywords['c']; + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/CSSLength.php b/library/HTMLPurifier/AttrDef/CSSLength.php index b279eabf..50613a39 100644 --- a/library/HTMLPurifier/AttrDef/CSSLength.php +++ b/library/HTMLPurifier/AttrDef/CSSLength.php @@ -40,6 +40,7 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef // we assume all units are two characters $unit = substr($length, $strlen - 2); + if (!ctype_lower($unit)) $unit = strtolower($unit); $number = substr($length, 0, $strlen - 2); if (!isset($this->units[$unit])) return false; diff --git a/library/HTMLPurifier/AttrDef/ListStyle.php b/library/HTMLPurifier/AttrDef/ListStyle.php index b866798c..b09ee354 100644 --- a/library/HTMLPurifier/AttrDef/ListStyle.php +++ b/library/HTMLPurifier/AttrDef/ListStyle.php @@ -53,6 +53,7 @@ class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef } $caught[$key] = $r; $i++; + break; } } diff --git a/library/HTMLPurifier/AttrDef/Percentage.php b/library/HTMLPurifier/AttrDef/Percentage.php index 35fb5ab0..fcab2868 100644 --- a/library/HTMLPurifier/AttrDef/Percentage.php +++ b/library/HTMLPurifier/AttrDef/Percentage.php @@ -4,14 +4,13 @@ require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/AttrDef/Number.php'; /** - * Validates a Percentage as defined by the HTML spec. - * @note This also allows integer pixel values. + * Validates a Percentage as defined by the CSS spec. */ class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef { /** - * Instance of HTMLPurifier_AttrDef_Number to defer pixel validation + * Instance of HTMLPurifier_AttrDef_Number to defer number validation */ var $number_def; diff --git a/library/HTMLPurifier/CSSDefinition.php b/library/HTMLPurifier/CSSDefinition.php index d2227e03..0bbe8af5 100644 --- a/library/HTMLPurifier/CSSDefinition.php +++ b/library/HTMLPurifier/CSSDefinition.php @@ -12,6 +12,8 @@ require_once 'HTMLPurifier/AttrDef/Font.php'; require_once 'HTMLPurifier/AttrDef/Border.php'; require_once 'HTMLPurifier/AttrDef/ListStyle.php'; require_once 'HTMLPurifier/AttrDef/CSSURI.php'; +require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php'; +require_once 'HTMLPurifier/AttrDef/Background.php'; /** * Defines allowed CSS attributes and what their values are. @@ -79,9 +81,7 @@ class HTMLPurifier_CSSDefinition $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( array('scroll', 'fixed') ); - - // pending its own validator as a shorthand - $this->info['background'] = + $this->info['background-position'] = new HTMLPurifier_AttrDef_BackgroundPosition(); $border_color = $this->info['border-top-color'] = @@ -93,6 +93,8 @@ class HTMLPurifier_CSSDefinition new HTMLPurifier_AttrDef_Color() )); + $this->info['background'] = new HTMLPurifier_AttrDef_Background($config); + $this->info['border-color'] = new HTMLPurifier_AttrDef_Multiple($border_color); $border_width = diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 5def0aa3..252fef5a 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -48,14 +48,16 @@ class HTMLPurifier_Config * Convenience constructor that creates a config object based on a mixed var * @static * @param mixed $config Variable that defines the state of the config - * object. Can be: a HTMLPurifier_Config() object or - * an array of directives based on loadArray(). + * object. Can be: a HTMLPurifier_Config() object, + * an array of directives based on loadArray(), + * or a string filename of an ini file. * @return Configured HTMLPurifier_Config object */ static function create($config) { if ($config instanceof HTMLPurifier_Config) return $config; $ret = HTMLPurifier_Config::createDefault(); - if (is_array($config)) $ret->loadArray($config); + if (is_string($config)) $ret->loadIni($config); + elseif (is_array($config)) $ret->loadArray($config); return $ret; } @@ -75,12 +77,17 @@ class HTMLPurifier_Config * @param $namespace String namespace * @param $key String key */ - function get($namespace, $key) { + function get($namespace, $key, $from_alias = false) { if (!isset($this->def->info[$namespace][$key])) { trigger_error('Cannot retrieve value of undefined directive', E_USER_WARNING); return; } + if ($this->def->info[$namespace][$key]->class == 'alias') { + trigger_error('Cannot get value from aliased directive, use real name', + E_USER_ERROR); + return; + } return $this->conf[$namespace][$key]; } @@ -103,12 +110,22 @@ class HTMLPurifier_Config * @param $key String key * @param $value Mixed value */ - function set($namespace, $key, $value) { + function set($namespace, $key, $value, $from_alias = false) { if (!isset($this->def->info[$namespace][$key])) { trigger_error('Cannot set undefined directive to value', E_USER_WARNING); return; } + if ($this->def->info[$namespace][$key]->class == 'alias') { + if ($from_alias) { + trigger_error('Double-aliases not allowed, please fix '. + 'ConfigSchema bug'); + } + $this->set($this->def->info[$namespace][$key]->namespace, + $this->def->info[$namespace][$key]->name, + $value, true); + return; + } $value = $this->def->validate( $value, $this->def->info[$namespace][$key]->type, @@ -178,6 +195,15 @@ class HTMLPurifier_Config } } + /** + * Loads configuration values from an ini file + * @param $filename Name of ini file + */ + function loadIni($filename) { + $array = parse_ini_file($filename, true); + $this->loadArray($array); + } + } ?> diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php index a1a25dd2..301f2d1b 100644 --- a/library/HTMLPurifier/ConfigSchema.php +++ b/library/HTMLPurifier/ConfigSchema.php @@ -106,6 +106,11 @@ class HTMLPurifier_ConfigSchema { E_USER_ERROR); return; } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } if (isset($def->info[$namespace][$name])) { if ( $def->info[$namespace][$name]->type !== $type || @@ -161,6 +166,11 @@ class HTMLPurifier_ConfigSchema { E_USER_ERROR); return; } + if (empty($description)) { + trigger_error('Description must be non-empty', + E_USER_ERROR); + return; + } $def->info[$namespace] = array(); $def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace(); $def->info_namespace[$namespace]->description = $description; @@ -216,12 +226,66 @@ class HTMLPurifier_ConfigSchema { E_USER_ERROR); return; } - if ($def->info[$namespace][$name]->allowed === true) { - $def->info[$namespace][$name]->allowed = array(); + $directive =& $def->info[$namespace][$name]; + $type = $directive->type; + if ($type != 'string' && $type != 'istring') { + trigger_error('Cannot define allowed values for directive whose type is not string', + E_USER_ERROR); + return; + } + if ($directive->allowed === true) { + $directive->allowed = array(); } foreach ($allowed_values as $value) { - $def->info[$namespace][$name]->allowed[$value] = true; + $directive->allowed[$value] = true; } + if ($def->defaults[$namespace][$name] !== null && + !isset($directive->allowed[$def->defaults[$namespace][$name]])) { + trigger_error('Default value must be in allowed range of variables', + E_USER_ERROR); + $directive->allowed = true; // undo undo! + return; + } + } + + /** + * Defines a directive alias for backwards compatibility + * @static + * @param $namespace + * @param $name Directive that will be aliased + * @param $new_namespace + * @param $new_name Directive that the alias will be to + */ + static function defineAlias($namespace, $name, $new_namespace, $new_name) { + $def =& HTMLPurifier_ConfigSchema::instance(); + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive alias in undefined namespace', + E_USER_ERROR); + return; + } + if (!ctype_alnum($name)) { + trigger_error('Directive name must be alphanumeric', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + trigger_error('Cannot define alias over directive', + E_USER_ERROR); + return; + } + if (!isset($def->info[$new_namespace][$new_name])) { + trigger_error('Cannot define alias to undefined directive', + E_USER_ERROR); + return; + } + if ($def->info[$new_namespace][$new_name]->class == 'alias') { + trigger_error('Cannot define alias to alias', + E_USER_ERROR); + return; + } + $def->info[$namespace][$name] = + new HTMLPurifier_ConfigEntity_DirectiveAlias( + $new_namespace, $new_name); } /** @@ -318,13 +382,21 @@ class HTMLPurifier_ConfigSchema { /** * Base class for configuration entity */ -class HTMLPurifier_ConfigEntity {} +class HTMLPurifier_ConfigEntity { + var $class = false; +} /** * Structure object describing of a namespace */ class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity { + function HTMLPurifier_ConfigEntity_Namespace($description = null) { + $this->description = $description; + } + + var $class = 'namespace'; + /** * String description of what kinds of directives go in this namespace. */ @@ -339,15 +411,21 @@ class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity { class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity { - /** - * Hash of value aliases, i.e. values that are equivalent. - */ - var $aliases = array(); + var $class = 'directive'; - /** - * Lookup table of allowed values of the element, bool true if all allowed. - */ - var $allowed = true; + function HTMLPurifier_ConfigEntity_Directive( + $type = null, + $descriptions = null, + $allow_null = null, + $allowed = null, + $aliases = null + ) { + if ( $type !== null) $this->type = $type; + if ($descriptions !== null) $this->descriptions = $descriptions; + if ( $allow_null !== null) $this->allow_null = $allow_null; + if ( $allowed !== null) $this->allowed = $allowed; + if ( $aliases !== null) $this->aliases = $aliases; + } /** * Allowed type of the directive. Values are: @@ -364,16 +442,26 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity var $type = 'mixed'; /** - * Is null allowed? Has no affect for mixed type. + * Plaintext descriptions of the configuration entity is. Organized by + * file and line number, so multiple descriptions are allowed. + */ + var $descriptions = array(); + + /** + * Is null allowed? Has no effect for mixed type. * @bool */ var $allow_null = false; /** - * Plaintext descriptions of the configuration entity is. Organized by - * file and line number, so multiple descriptions are allowed. + * Lookup table of allowed values of the element, bool true if all allowed. */ - var $descriptions = array(); + var $allowed = true; + + /** + * Hash of value aliases, i.e. values that are equivalent. + */ + var $aliases = array(); /** * Adds a description to the array @@ -385,4 +473,26 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity } +/** + * Structure object describing a directive alias + */ +class HTMLPurifier_ConfigEntity_DirectiveAlias extends HTMLPurifier_ConfigEntity +{ + var $class = 'alias'; + + /** + * Namespace being aliased to + */ + var $namespace; + /** + * Directive being aliased to + */ + var $name; + + function HTMLPurifier_ConfigEntity_DirectiveAlias($namespace, $name) { + $this->namespace = $namespace; + $this->name = $name; + } +} + ?> diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index 31aeaaa6..84785d74 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -6,15 +6,29 @@ HTMLPurifier_ConfigSchema::define( 'Core', 'Encoding', 'utf-8', 'istring', 'If for some reason you are unable to convert all webpages to UTF-8, '. 'you can use this directive as a stop-gap compatibility change to '. - 'let HTMLPurifier deal with non UTF-8 input. This technique has '. + 'let HTML Purifier deal with non UTF-8 input. This technique has '. 'notable deficiencies: absolutely no characters outside of the selected '. 'character encoding will be preserved, not even the ones that have '. 'been ampersand escaped (this is due to a UTF-8 specific feature '. 'that automatically resolves all entities), making it pretty useless '. - 'for anything except the most I18N-blind applications. This directive '. + 'for anything except the most I18N-blind applications, although '. + '%Core.EscapeNonASCIICharacters offers fixes this trouble with '. + 'another tradeoff. This directive '. 'only accepts ISO-8859-1 if iconv is not enabled.' ); +HTMLPurifier_ConfigSchema::define( + 'Core', 'EscapeNonASCIICharacters', false, 'bool', + 'This directive overcomes a deficiency in %Core.Encoding by blindly '. + 'converting all non-ASCII characters into decimal numeric entities before '. + 'converting it to its native encoding. This means that even '. + 'characters that can be expressed in the non-UTF-8 encoding will '. + 'be entity-ized, which can be a real downer for encodings like Big5. '. + 'It also assumes that the ASCII repetoire is available, although '. + 'this is the case for almost all encodings. Anyway, use UTF-8! This '. + 'directive has been available since 1.4.0.' +); + if ( !function_exists('iconv') ) { // only encodings with native PHP support HTMLPurifier_ConfigSchema::defineAllowedValues( @@ -310,6 +324,7 @@ class HTMLPurifier_Encoder } elseif ($encoding === 'iso-8859-1') { return @utf8_encode($str); } + trigger_error('Encoding not supported', E_USER_ERROR); } /** @@ -323,11 +338,63 @@ class HTMLPurifier_Encoder if ($iconv === null) $iconv = function_exists('iconv'); $encoding = $config->get('Core', 'Encoding'); if ($encoding === 'utf-8') return $str; + if ($config->get('Core', 'EscapeNonASCIICharacters')) { + $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); + } if ($iconv && !$config->get('Test', 'ForceNoIconv')) { return @iconv('utf-8', $encoding . '//IGNORE', $str); } elseif ($encoding === 'iso-8859-1') { return @utf8_decode($str); } + trigger_error('Encoding not supported', E_USER_ERROR); + } + + /** + * Lossless (character-wise) conversion of HTML to ASCII + * @static + * @param $str UTF-8 string to be converted to ASCII + * @returns ASCII encoded string with non-ASCII character entity-ized + * @warning Adapted from MediaWiki, claiming fair use: this is a common + * algorithm. If you disagree with this license fudgery, + * implement it yourself. + * @note Uses decimal numeric entities since they are best supported. + * @note This is a DUMB function: it has no concept of keeping + * character entities that the projected character encoding + * can allow. We could possibly implement a smart version + * but that would require it to also know which Unicode + * codepoints the charset supported (not an easy task). + * @note Sort of with cleanUTF8() but it assumes that $str is + * well-formed UTF-8 + */ + static function convertToASCIIDumbLossless($str) { + $bytesleft = 0; + $result = ''; + $working = 0; + $len = strlen($str); + for( $i = 0; $i < $len; $i++ ) { + $bytevalue = ord( $str[$i] ); + if( $bytevalue <= 0x7F ) { //0xxx xxxx + $result .= chr( $bytevalue ); + $bytesleft = 0; + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx + $working = $working << 6; + $working += ($bytevalue & 0x3F); + $bytesleft--; + if( $bytesleft <= 0 ) { + $result .= "&#" . $working . ";"; + } + } elseif( $bytevalue <= 0xDF ) { //110x xxxx + $working = $bytevalue & 0x1F; + $bytesleft = 1; + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx + $working = $bytevalue & 0x0F; + $bytesleft = 2; + } else { //1111 0xxx + $working = $bytevalue & 0x07; + $bytesleft = 3; + } + } + return $result; } diff --git a/library/HTMLPurifier/Filter.php b/library/HTMLPurifier/Filter.php new file mode 100644 index 00000000..94c5ae7b --- /dev/null +++ b/library/HTMLPurifier/Filter.php @@ -0,0 +1,39 @@ +preFilter, + * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter, + * 1->postFilter. + */ + +class HTMLPurifier_Filter +{ + + /** + * Name of the filter for identification purposes + */ + var $name; + + /** + * Pre-processor function, handles HTML before HTML Purifier + */ + function preFilter($html, $config, &$context) {} + + /** + * Post-processor function, handles HTML after HTML Purifier + */ + function postFilter($html, $config, &$context) {} + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Filter/YouTube.php b/library/HTMLPurifier/Filter/YouTube.php new file mode 100644 index 00000000..8abbb693 --- /dev/null +++ b/library/HTMLPurifier/Filter/YouTube.php @@ -0,0 +1,34 @@ +]+>.+?'. + 'http://www.youtube.com/v/([A-Za-z0-9]+).+?#'; + $pre_replace = '\1'; + return preg_replace($pre_regex, $pre_replace, $html); + } + + function postFilter($html, $config, &$context) { + $post_regex = '#([A-Za-z0-9]+)#'; + $post_replace = ''. + ''. + ''. + ''. + ''; + return preg_replace($post_regex, $post_replace, $html); + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index fb2a138d..ca5a5328 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -151,7 +151,7 @@ class HTMLPurifier_Lexer $lexer = $prototype; } if (empty($lexer)) { - if (version_compare(PHP_VERSION, '5', '>=')) { + if (class_exists('DOMDocument')) { // check for DOM support require_once 'HTMLPurifier/Lexer/DOMLex.php'; $lexer = new HTMLPurifier_Lexer_DOMLex(); } else { diff --git a/smoketests/preserveYouTube.php b/smoketests/preserveYouTube.php index ef347b47..24820f8d 100644 --- a/smoketests/preserveYouTube.php +++ b/smoketests/preserveYouTube.php @@ -15,34 +15,13 @@ echo '';

HTML Purifier Preserve YouTube Smoketest

]+>.+?'. - 'http://www.youtube.com/v/([A-Za-z0-9]+).+?#'; - $pre_replace = '\1'; - $html = preg_replace($pre_regex, $pre_replace, $html); - $html = parent::purify($html, $config); - $post_regex = '#([A-Za-z0-9]+)#'; - $post_replace = ''. - ''. - ''. - ''. - ''; - $html = preg_replace($post_regex, $post_replace, $html); - return $html; - } -} - $string = ''; $regular_purifier = new HTMLPurifier(); -$youtube_purifier = new HTMLPurifierX_PreserveYouTube(); + +$youtube_purifier = new HTMLPurifier(); +require_once 'HTMLPurifier/Filter/YouTube.php'; +$youtube_purifier->addFilter(new HTMLPurifier_Filter_YouTube()); ?>

Unpurified

diff --git a/tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php b/tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php new file mode 100644 index 00000000..ce720841 --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php @@ -0,0 +1,71 @@ +def = new HTMLPurifier_AttrDef_BackgroundPosition(); + + // explicitly cited in spec + $this->assertDef('0% 0%'); + $this->assertDef('100% 100%'); + $this->assertDef('14% 84%'); + $this->assertDef('2cm 1cm'); + $this->assertDef('top'); + $this->assertDef('left'); + $this->assertDef('center'); + $this->assertDef('right'); + $this->assertDef('bottom'); + $this->assertDef('left top'); + $this->assertDef('center top'); + $this->assertDef('right top'); + $this->assertDef('left center'); + $this->assertDef('right center'); + $this->assertDef('left bottom'); + $this->assertDef('center bottom'); + $this->assertDef('right bottom'); + + // reordered due to internal impl details + $this->assertDef('top left', 'left top'); + $this->assertDef('top center', 'center top'); + $this->assertDef('top right', 'right top'); + $this->assertDef('center left', 'left center'); + $this->assertDef('center center', 'center'); // two centers collide + $this->assertDef('center right', 'right center'); + $this->assertDef('bottom left', 'left bottom'); + $this->assertDef('bottom center', 'center bottom'); + $this->assertDef('bottom right', 'right bottom'); + + // more cases from the defined syntax + $this->assertDef('1.32in 4ex'); + $this->assertDef('-14% -84.65%'); + $this->assertDef('-1in -4ex'); + $this->assertDef('-1pc 2.3%'); + + // keyword mixing + $this->assertDef('3em top'); + $this->assertDef('left 50%'); + + // fixable keyword mixing + $this->assertDef('top 3em', '3em top'); + $this->assertDef('50% left', 'left 50%'); + + // whitespace collapsing + $this->assertDef('3em top', '3em top'); + $this->assertDef("left\n \t foo ", 'left'); + + // invalid uses (we're going to be strict on these) + $this->assertDef('foo bar', false); + $this->assertDef('left left', 'left'); + $this->assertDef('left right top bottom center left', 'left bottom'); + $this->assertDef('0fr 9%', '9%'); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/BackgroundTest.php b/tests/HTMLPurifier/AttrDef/BackgroundTest.php new file mode 100644 index 00000000..69b3c1ba --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/BackgroundTest.php @@ -0,0 +1,21 @@ +def = new HTMLPurifier_AttrDef_Background(HTMLPurifier_Config::createDefault()); + + $valid = '#333 url(chess.png) repeat fixed 50% top'; + $this->assertDef($valid); + $this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/AttrDef/BorderTest.php b/tests/HTMLPurifier/AttrDef/BorderTest.php index 7a8153d5..b18bfe70 100644 --- a/tests/HTMLPurifier/AttrDef/BorderTest.php +++ b/tests/HTMLPurifier/AttrDef/BorderTest.php @@ -1,6 +1,7 @@ assertDef('3pt'); $this->assertDef('3pc'); + $this->assertDef('3PX', '3px'); + $this->assertDef('3', false); $this->assertDef('3miles', false); diff --git a/tests/HTMLPurifier/AttrDef/CSSTest.php b/tests/HTMLPurifier/AttrDef/CSSTest.php index cb5e8083..9371f788 100644 --- a/tests/HTMLPurifier/AttrDef/CSSTest.php +++ b/tests/HTMLPurifier/AttrDef/CSSTest.php @@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness $this->assertDef('text-transform:capitalize;'); $this->assertDef('background-color:rgb(0,0,255);'); $this->assertDef('background-color:transparent;'); - $this->assertDef('background:#FF9;'); + $this->assertDef('background:#333 url(chess.png) repeat fixed 50% top;'); $this->assertDef('color:#F00;'); $this->assertDef('border-top-color:#F00;'); $this->assertDef('border-color:#F00 #FF0;'); @@ -78,6 +78,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness $this->assertDef('background-image:none;'); $this->assertDef('background-repeat:repeat-y;'); $this->assertDef('background-attachment:fixed;'); + $this->assertDef('background-position:left 90%;'); // duplicates $this->assertDef('text-align:right;text-align:left;', diff --git a/tests/HTMLPurifier/AttrDef/ColorTest.php b/tests/HTMLPurifier/AttrDef/ColorTest.php index a38c551b..b44082c0 100644 --- a/tests/HTMLPurifier/AttrDef/ColorTest.php +++ b/tests/HTMLPurifier/AttrDef/ColorTest.php @@ -1,6 +1,7 @@ assertDef('user_story95_alas'); $this->assertDef('user_alas', 'user_story95_user_alas'); // ! + // no effect when IDPrefix isn't set $this->config->set('Attr', 'IDPrefix', ''); - $this->assertDef('amherst'); // no affect when IDPrefix isn't set - $this->assertError('%Attr.IDPrefixLocal cannot be used unless '. + $this->expectError('%Attr.IDPrefixLocal cannot be used unless '. '%Attr.IDPrefix is set'); - // SimpleTest has a bug and throws a sprintf error - // $this->assertNoErrors(); - $this->swallowErrors(); + $this->assertDef('amherst'); } diff --git a/tests/HTMLPurifier/AttrDef/IntegerTest.php b/tests/HTMLPurifier/AttrDef/IntegerTest.php index 20d71689..98cefbac 100644 --- a/tests/HTMLPurifier/AttrDef/IntegerTest.php +++ b/tests/HTMLPurifier/AttrDef/IntegerTest.php @@ -1,6 +1,7 @@ assertResult('Needs wrap', '
Needs wrap
', array('HTML.BlockWrapper' => 'div')); + $this->expectError('Cannot use non-block element as block wrapper.'); $this->assertResult('Needs wrap', '

Needs wrap

', array('HTML.BlockWrapper' => 'dav')); - $this->assertError('Cannot use non-block element as block wrapper.'); - $this->assertNoErrors(); } diff --git a/tests/HTMLPurifier/ConfigSchemaTest.php b/tests/HTMLPurifier/ConfigSchemaTest.php index f21c5ad1..075a552c 100644 --- a/tests/HTMLPurifier/ConfigSchemaTest.php +++ b/tests/HTMLPurifier/ConfigSchemaTest.php @@ -2,10 +2,26 @@ require_once 'HTMLPurifier/ConfigSchema.php'; +if (!class_exists('CS')) { + class CS extends HTMLPurifier_ConfigSchema {} +} + class HTMLPurifier_ConfigSchemaTest extends UnitTestCase { + /** + * Munged name of current file. + */ + var $file; + + /** + * Copy of the real ConfigSchema to revert to. + */ var $old_copy; + + /** + * Copy of dummy ConfigSchema for testing purposes. + */ var $our_copy; function setUp() { @@ -18,239 +34,214 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->old_copy = HTMLPurifier_ConfigSchema::instance(); // put in our copy, and reassign to the REAL reference $this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy); + + $this->file = $this->our_copy->mungeFilename(__FILE__); } function tearDown() { // testing is done, restore the old copy HTMLPurifier_ConfigSchema::instance($this->old_copy); + tally_errors(); } - function testNormal() { + function test_defineNamespace() { + CS::defineNamespace('http', $d = 'This is an internet protocol.'); - $file = $this->our_copy->mungeFilename(__FILE__); - - // define a namespace - $description = 'Configuration that is always available.'; - HTMLPurifier_ConfigSchema::defineNamespace( - 'Core', $description - ); - $this->assertIdentical($this->our_copy->defaults, array( - 'Core' => array() - )); - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array() - )); - $namespace = new HTMLPurifier_ConfigEntity_Namespace(); - $namespace->description = $description; $this->assertIdentical($this->our_copy->info_namespace, array( - 'Core' => $namespace + 'http' => new HTMLPurifier_ConfigEntity_Namespace($d) )); + $this->expectError('Cannot redefine namespace'); + CS::defineNamespace('http', 'It is used to serve webpages.'); + $this->expectError('Namespace name must be alphanumeric'); + CS::defineNamespace('ssh+http', 'This http is tunneled through SSH.'); - // define a directive - $description = 'This is a description of the directive.'; - HTMLPurifier_ConfigSchema::define( - 'Core', 'Name', 'default value', 'string', - $description - ); $line = __LINE__; - $this->assertIdentical($this->our_copy->defaults, array( - 'Core' => array( - 'Name' => 'default value' - ) - )); - $directive = new HTMLPurifier_ConfigEntity_Directive(); - $directive->type = 'string'; - $directive->addDescription($file, $line, $description); - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); + $this->expectError('Description must be non-empty'); + CS::defineNamespace('ftp', null); + } + + function test_define() { + CS::defineNamespace('Car', 'Automobiles, those gas-guzzlers!'); + CS::define('Car', 'Seats', 5, 'int', $d = 'Standard issue.'); $l = __LINE__; - - // define a directive in an undefined namespace - HTMLPurifier_ConfigSchema::define( - 'Extension', 'Name', false, 'bool', - 'This is for an extension, but we have not defined its namespace!' - ); - $this->assertError('Cannot define directive for undefined namespace'); - $this->assertNoErrors(); - - - - // redefine a value in a valid manner - $description = 'Alternative configuration definition'; - HTMLPurifier_ConfigSchema::define( - 'Core', 'Name', 'default value', 'string', - $description - ); $line = __LINE__; - $this->assertNoErrors(); - $directive->addDescription($file, $line, $description); - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); - - - - // redefine a directive in an invalid manner - HTMLPurifier_ConfigSchema::define( - 'Core', 'Name', 'different default', 'string', - 'Inconsistent default or type, cannot redefine' - ); - $this->assertError('Inconsistent default or type, cannot redefine'); - $this->assertNoErrors(); - - - - // make an enumeration - HTMLPurifier_ConfigSchema::defineAllowedValues( - 'Core', 'Name', array( - 'Real Value', - 'Real Value 2' + $this->assertIdentical($this->our_copy->defaults['Car']['Seats'], 5); + $this->assertIdentical($this->our_copy->info['Car']['Seats'], + new HTMLPurifier_ConfigEntity_Directive('int', + array($this->file => array($l => $d)) ) ); - $directive->allowed = array( - 'Real Value' => true, - 'Real Value 2' => true - ); - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); + CS::define('Car', 'Age', null, 'int/null', $d = 'Not always known.'); $l = __LINE__; - - // redefinition of enumeration is cumulative - HTMLPurifier_ConfigSchema::defineAllowedValues( - 'Core', 'Name', array( - 'Real Value 3', + $this->assertIdentical($this->our_copy->defaults['Car']['Age'], null); + $this->assertIdentical($this->our_copy->info['Car']['Age'], + new HTMLPurifier_ConfigEntity_Directive('int', + array($this->file => array($l => $d)), true ) ); - $directive->allowed['Real Value 3'] = true; - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); + $this->expectError('Cannot define directive for undefined namespace'); + CS::define('Train', 'Cars', 10, 'int', 'Including the caboose.'); + $this->expectError('Directive name must be alphanumeric'); + CS::define('Car', 'Is it shiny?', true, 'bool', 'Indicates regular waxing.'); - // cannot define enumeration for undefined directive - HTMLPurifier_ConfigSchema::defineAllowedValues( - 'Core', 'Foobar', array( - 'Real Value 9', + $this->expectError('Invalid type for configuration directive'); + CS::define('Car', 'Efficiency', 50, 'mpg', 'The higher the better.'); + + $this->expectError('Default value does not match directive type'); + CS::define('Car', 'Producer', 'Ford', 'int', 'ID of the company that made the car.'); + + $this->expectError('Description must be non-empty'); + CS::define('Car', 'ComplexAttribute', 'lawyers', 'istring', null); + } + + function testRedefinition_define() { + CS::defineNamespace('Cat', 'Belongs to Schrodinger.'); + + CS::define('Cat', 'Dead', false, 'bool', $d1 = 'Well, is it?'); $l1 = __LINE__; + CS::define('Cat', 'Dead', false, 'bool', $d2 = 'It is difficult to say.'); $l2 = __LINE__; + + $this->assertIdentical($this->our_copy->defaults['Cat']['Dead'], false); + $this->assertIdentical($this->our_copy->info['Cat']['Dead'], + new HTMLPurifier_ConfigEntity_Directive('bool', + array($this->file => array($l1 => $d1, $l2 => $d2)) ) ); - $this->assertError('Cannot define allowed values for undefined directive'); - $this->assertNoErrors(); + $this->expectError('Inconsistent default or type, cannot redefine'); + CS::define('Cat', 'Dead', true, 'bool', 'Quantum mechanics does not know.'); + $this->expectError('Inconsistent default or type, cannot redefine'); + CS::define('Cat', 'Dead', 'maybe', 'string', 'Perhaps if we look we will know.'); + } + + function test_defineAllowedValues() { + CS::defineNamespace('QuantumNumber', 'D'); + CS::define('QuantumNumber', 'Spin', 0.5, 'float', + 'Spin of particle. Fourth quantum number, represented by s.'); + CS::define('QuantumNumber', 'Current', 's', 'string', + 'Currently selected quantum number.'); + CS::define('QuantumNumber', 'Difficulty', null, 'string/null', $d = 'How hard are the problems?'); $l = __LINE__; - // test defining value aliases for an enumerated value - HTMLPurifier_ConfigSchema::defineValueAliases( - 'Core', 'Name', array( - 'Aliased Value' => 'Real Value' + CS::defineAllowedValues( // okay, since default is null + 'QuantumNumber', 'Difficulty', array('easy', 'medium', 'hard') + ); + + $this->assertIdentical($this->our_copy->defaults['QuantumNumber']['Difficulty'], null); + $this->assertIdentical($this->our_copy->info['QuantumNumber']['Difficulty'], + new HTMLPurifier_ConfigEntity_Directive( + 'string', + array($this->file => array($l => $d)), + true, + array( + 'easy' => true, + 'medium' => true, + 'hard' => true + ) ) ); - $directive->aliases['Aliased Value'] = 'Real Value'; - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); + $this->expectError('Cannot define allowed values for undefined directive'); + CS::defineAllowedValues( + 'SpaceTime', 'Symmetry', array('time', 'spatial', 'projective') + ); + $this->expectError('Cannot define allowed values for directive whose type is not string'); + CS::defineAllowedValues( + 'QuantumNumber', 'Spin', array(0.5, -0.5) + ); - // redefine should be cumulative - HTMLPurifier_ConfigSchema::defineValueAliases( - 'Core', 'Name', array( - 'Aliased Value 2' => 'Real Value 2' + $this->expectError('Default value must be in allowed range of variables'); + CS::defineAllowedValues( + 'QuantumNumber', 'Current', array('n', 'l', 'm') // forgot s! + ); + } + + function test_defineValueAliases() { + CS::defineNamespace('Abbrev', 'Stuff on abbreviations.'); + CS::define('Abbrev', 'HTH', 'Happy to Help', 'string', $d = 'Three-letters'); $l = __LINE__; + CS::defineAllowedValues( + 'Abbrev', 'HTH', array( + 'Happy to Help', + 'Hope that Helps', + 'HAIL THE HAND!' ) ); - $directive->aliases['Aliased Value 2'] = 'Real Value 2'; - $this->assertIdentical($this->our_copy->info, array( - 'Core' => array( - 'Name' => $directive - ) - )); - - - - // cannot create alias to not-allowed value - HTMLPurifier_ConfigSchema::defineValueAliases( - 'Core', 'Name', array( - 'Aliased Value 3' => 'Invalid Value' + CS::defineValueAliases( + 'Abbrev', 'HTH', array( + 'happy' => 'Happy to Help', + 'hope' => 'Hope that Helps' ) ); - $this->assertError('Cannot define alias to value that is not allowed'); - $this->assertNoErrors(); - - - - // cannot create alias for already allowed value - HTMLPurifier_ConfigSchema::defineValueAliases( - 'Core', 'Name', array( - 'Real Value' => 'Real Value 2' + CS::defineValueAliases( // delayed addition + 'Abbrev', 'HTH', array( + 'hail' => 'HAIL THE HAND!' ) ); - $this->assertError('Cannot define alias over allowed value'); - $this->assertNoErrors(); - - - // define a directive with an invalid type - HTMLPurifier_ConfigSchema::define( - 'Core', 'Foobar', false, 'omen', - 'Omen is not a valid type, so we reject this.' + $this->assertIdentical($this->our_copy->defaults['Abbrev']['HTH'], 'Happy to Help'); + $this->assertIdentical($this->our_copy->info['Abbrev']['HTH'], + new HTMLPurifier_ConfigEntity_Directive( + 'string', + array($this->file => array($l => $d)), + false, + array( + 'Happy to Help' => true, + 'Hope that Helps' => true, + 'HAIL THE HAND!' => true + ), + array( + 'happy' => 'Happy to Help', + 'hope' => 'Hope that Helps', + 'hail' => 'HAIL THE HAND!' + ) + ) ); - $this->assertError('Invalid type for configuration directive'); - $this->assertNoErrors(); - - - - // define a directive with inconsistent type - HTMLPurifier_ConfigSchema::define( - 'Core', 'Foobaz', 10, 'string', - 'If we say string, we should mean it, not integer 10.' + $this->expectError('Cannot define alias to value that is not allowed'); + CS::defineValueAliases( + 'Abbrev', 'HTH', array( + 'head' => 'Head to Head' + ) ); - $this->assertError('Default value does not match directive type'); - $this->assertNoErrors(); - - - - // define a directive that allows null - HTMLPurifier_ConfigSchema::define( - 'Core', 'Foobaz', null, 'string/null', - 'Nulls are allowed if you add on /null, cool huh?' + $this->expectError('Cannot define alias over allowed value'); + CS::defineValueAliases( + 'Abbrev', 'HTH', array( + 'Hope that Helps' => 'Happy to Help' + ) ); - $this->assertNoErrors(); + } + + function testAlias() { + CS::defineNamespace('Home', 'Sweet home.'); + CS::define('Home', 'Rug', 3, 'int', 'ID.'); + CS::defineAlias('Home', 'Carpet', 'Home', 'Rug'); - - // define a directive with bad characters - HTMLPurifier_ConfigSchema::define( - 'Core', 'Core.Attr', 10, 'int', - 'No periods! >:-(' + $this->assertTrue(!isset($this->our_copy->defaults['Home']['Carpet'])); + $this->assertIdentical($this->our_copy->info['Home']['Carpet'], + new HTMLPurifier_ConfigEntity_DirectiveAlias('Home', 'Rug') ); - $this->assertError('Directive name must be alphanumeric'); - $this->assertNoErrors(); + $this->expectError('Cannot define directive alias in undefined namespace'); + CS::defineAlias('Store', 'Rug', 'Home', 'Rug'); - // define a namespace with bad characters - HTMLPurifier_ConfigSchema::defineNamespace( - 'Foobar&Gromit', $description - ); + $this->expectError('Directive name must be alphanumeric'); + CS::defineAlias('Home', 'R.g', 'Home', 'Rug'); - $this->assertError('Namespace name must be alphanumeric'); - $this->assertNoErrors(); + CS::define('Home', 'Rugger', 'Bob Max', 'string', 'Name of.'); + $this->expectError('Cannot define alias over directive'); + CS::defineAlias('Home', 'Rugger', 'Home', 'Rug'); + $this->expectError('Cannot define alias to undefined directive'); + CS::defineAlias('Home', 'Rug2', 'Home', 'Rugavan'); + + $this->expectError('Cannot define alias to alias'); + CS::defineAlias('Home', 'Rug2', 'Home', 'Carpet'); } function assertValid($var, $type, $ret = null) { @@ -270,25 +261,32 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertValid('foobar', 'string'); $this->assertValid('FOOBAR', 'istring', 'foobar'); + $this->assertValid(34, 'int'); + $this->assertValid(3.34, 'float'); + $this->assertValid(false, 'bool'); $this->assertValid(0, 'bool', false); $this->assertValid(1, 'bool', true); - $this->assertInvalid(34, 'bool'); - $this->assertInvalid(null, 'bool'); - $this->assertValid(array('1', '2', '3'), 'list'); - $this->assertValid(array('1' => true, '2' => true), 'lookup'); - $this->assertValid(array('1', '2'), 'lookup', array('1' => true, '2' => true)); - $this->assertValid(array('foo' => 'bar'), 'hash'); - $this->assertInvalid(array(0 => 'moo'), 'hash'); - $this->assertValid(array(1 => 'moo'), 'hash'); - $this->assertValid(23, 'mixed'); - $this->assertValid('foo,bar, cow', 'list', array('foo', 'bar', 'cow')); - $this->assertValid('foo,bar', 'lookup', array('foo' => true, 'bar' => true)); $this->assertValid('true', 'bool', true); $this->assertValid('false', 'bool', false); $this->assertValid('1', 'bool', true); + $this->assertInvalid(34, 'bool'); + $this->assertInvalid(null, 'bool'); + + $this->assertValid(array('1', '2', '3'), 'list'); + $this->assertValid('foo,bar, cow', 'list', array('foo', 'bar', 'cow')); + + $this->assertValid(array('1' => true, '2' => true), 'lookup'); + $this->assertValid(array('1', '2'), 'lookup', array('1' => true, '2' => true)); + $this->assertValid('foo,bar', 'lookup', array('foo' => true, 'bar' => true)); + + $this->assertValid(array('foo' => 'bar'), 'hash'); + $this->assertValid(array(1 => 'moo'), 'hash'); + $this->assertInvalid(array(0 => 'moo'), 'hash'); + + $this->assertValid(23, 'mixed'); } @@ -318,12 +316,12 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase function testMungeFilename() { $this->assertMungeFilename( - 'C:\\php\\libs\\htmlpurifier\\library\\HTMLPurifier\\AttrDef.php', + 'C:\\php\\My Libraries\\htmlpurifier\\library\\HTMLPurifier\\AttrDef.php', 'HTMLPurifier/AttrDef.php' ); $this->assertMungeFilename( - 'C:\\php\\libs\\htmlpurifier\\library\\HTMLPurifier.php', + 'C:\\php\\My Libraries\\htmlpurifier\\library\\HTMLPurifier.php', 'HTMLPurifier.php' ); diff --git a/tests/HTMLPurifier/ConfigTest-create.ini b/tests/HTMLPurifier/ConfigTest-create.ini new file mode 100644 index 00000000..3441565b --- /dev/null +++ b/tests/HTMLPurifier/ConfigTest-create.ini @@ -0,0 +1,2 @@ +[Cake] +Sprinkles = 42 \ No newline at end of file diff --git a/tests/HTMLPurifier/ConfigTest-loadIni.ini b/tests/HTMLPurifier/ConfigTest-loadIni.ini new file mode 100644 index 00000000..80dba594 --- /dev/null +++ b/tests/HTMLPurifier/ConfigTest-loadIni.ini @@ -0,0 +1,4 @@ +[Shortcut] +Copy = q +Cut = t +Paste = p \ No newline at end of file diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php index a7eb6115..e04ac416 100644 --- a/tests/HTMLPurifier/ConfigTest.php +++ b/tests/HTMLPurifier/ConfigTest.php @@ -2,6 +2,10 @@ require_once 'HTMLPurifier/Config.php'; +if (!class_exists('CS')) { + class CS extends HTMLPurifier_ConfigSchema {} +} + class HTMLPurifier_ConfigTest extends UnitTestCase { @@ -16,109 +20,199 @@ class HTMLPurifier_ConfigTest extends UnitTestCase function tearDown() { HTMLPurifier_ConfigSchema::instance($this->old_copy); + tally_errors(); } - function test() { + // test functionality based on ConfigSchema + + function testNormal() { + CS::defineNamespace('Element', 'Chemical substances that cannot be further decomposed'); - HTMLPurifier_ConfigSchema::defineNamespace('Core', 'Corestuff'); - HTMLPurifier_ConfigSchema::defineNamespace('Attr', 'Attributes'); - HTMLPurifier_ConfigSchema::defineNamespace('Extension', 'Extensible'); - - HTMLPurifier_ConfigSchema::define( - 'Core', 'Key', false, 'bool', 'A boolean directive.' - ); - HTMLPurifier_ConfigSchema::define( - 'Attr', 'Key', 42, 'int', 'An integer directive.' - ); - HTMLPurifier_ConfigSchema::define( - 'Extension', 'Pert', 'foo', 'string', 'A string directive.' - ); - HTMLPurifier_ConfigSchema::define( - 'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!' - ); - - HTMLPurifier_ConfigSchema::define( - 'Extension', 'CanBeNull', null, 'string/null', 'Null or string!' - ); - - HTMLPurifier_ConfigSchema::defineAllowedValues( - 'Extension', 'Pert', array('foo', 'moo') - ); - HTMLPurifier_ConfigSchema::defineValueAliases( - 'Extension', 'Pert', array('cow' => 'moo') - ); - HTMLPurifier_ConfigSchema::defineAllowedValues( - 'Core', 'Encoding', array('utf-8', 'iso-8859-1') - ); + CS::define('Element', 'Abbr', 'H', 'string', 'Abbreviation of element name.'); + CS::define('Element', 'Name', 'hydrogen', 'istring', 'Full name of atoms.'); + CS::define('Element', 'Number', 1, 'int', 'Atomic number, is identity.'); + CS::define('Element', 'Mass', 1.00794, 'float', 'Atomic mass.'); + CS::define('Element', 'Radioactive', false, 'bool', 'Does it have rapid decay?'); + CS::define('Element', 'Isotopes', array(1 => true, 2 => true, 3 => true), 'lookup', + 'What numbers of neutrons for this element have been observed?'); + CS::define('Element', 'Traits', array('nonmetallic', 'odorless', 'flammable'), 'list', + 'What are general properties of the element?'); + CS::define('Element', 'IsotopeNames', array(1 => 'protium', 2 => 'deuterium', 3 => 'tritium'), 'hash', + 'Lookup hash of neutron counts to formal names.'); + CS::define('Element', 'Object', new stdClass(), 'mixed', 'Model representation.'); $config = HTMLPurifier_Config::createDefault(); // test default value retrieval - $this->assertIdentical($config->get('Core', 'Key'), false); - $this->assertIdentical($config->get('Attr', 'Key'), 42); - $this->assertIdentical($config->get('Extension', 'Pert'), 'foo'); + $this->assertIdentical($config->get('Element', 'Abbr'), 'H'); + $this->assertIdentical($config->get('Element', 'Name'), 'hydrogen'); + $this->assertIdentical($config->get('Element', 'Number'), 1); + $this->assertIdentical($config->get('Element', 'Mass'), 1.00794); + $this->assertIdentical($config->get('Element', 'Radioactive'), false); + $this->assertIdentical($config->get('Element', 'Isotopes'), array(1 => true, 2 => true, 3 => true)); + $this->assertIdentical($config->get('Element', 'Traits'), array('nonmetallic', 'odorless', 'flammable')); + $this->assertIdentical($config->get('Element', 'IsotopeNames'), array(1 => 'protium', 2 => 'deuterium', 3 => 'tritium')); + $this->assertIdentical($config->get('Element', 'Object'), new stdClass()); - // set some values - $config->set('Core', 'Key', true); - $this->assertIdentical($config->get('Core', 'Key'), true); + // test setting values + $config->set('Element', 'Abbr', 'Pu'); + $config->set('Element', 'Name', 'PLUTONIUM'); // test decaps + $config->set('Element', 'Number', '94'); // test parsing + $config->set('Element', 'Mass', '244.'); // test parsing + $config->set('Element', 'Radioactive', true); + $config->set('Element', 'Isotopes', array(238, 239)); // test inversion + $config->set('Element', 'Traits', 'nuclear, heavy, actinide'); // test parsing + $config->set('Element', 'IsotopeNames', array(238 => 'Plutonium-238', 239 => 'Plutonium-239')); + $config->set('Element', 'Object', false); // unmodeled - // try to retrieve undefined value - $config->get('Core', 'NotDefined'); - $this->assertError('Cannot retrieve value of undefined directive'); - $this->assertNoErrors(); + // test value retrieval + $this->assertIdentical($config->get('Element', 'Abbr'), 'Pu'); + $this->assertIdentical($config->get('Element', 'Name'), 'plutonium'); + $this->assertIdentical($config->get('Element', 'Number'), 94); + $this->assertIdentical($config->get('Element', 'Mass'), 244.); + $this->assertIdentical($config->get('Element', 'Radioactive'), true); + $this->assertIdentical($config->get('Element', 'Isotopes'), array(238 => true, 239 => true)); + $this->assertIdentical($config->get('Element', 'Traits'), array('nuclear', 'heavy', 'actinide')); + $this->assertIdentical($config->get('Element', 'IsotopeNames'), array(238 => 'Plutonium-238', 239 => 'Plutonium-239')); + $this->assertIdentical($config->get('Element', 'Object'), false); - // try to set undefined value - $config->set('Foobar', 'Key', 'foobar'); - $this->assertError('Cannot set undefined directive to value'); - $this->assertNoErrors(); + // errors - // try to set not allowed value - $config->set('Extension', 'Pert', 'wizard'); - $this->assertError('Value not supported'); - $this->assertNoErrors(); + $this->expectError('Cannot retrieve value of undefined directive'); + $config->get('Element', 'Metal'); - // try to set not allowed value - $config->set('Extension', 'Pert', 34); - $this->assertError('Value is of invalid type'); - $this->assertNoErrors(); + $this->expectError('Cannot set undefined directive to value'); + $config->set('Element', 'Metal', true); - // set aliased value - $config->set('Extension', 'Pert', 'cow'); - $this->assertNoErrors(); - $this->assertIdentical($config->get('Extension', 'Pert'), 'moo'); + $this->expectError('Value is of invalid type'); + $config->set('Element', 'Radioactive', 'very'); - // case-insensitive attempt to set value that is allowed - $config->set('Core', 'Encoding', 'ISO-8859-1'); - $this->assertNoErrors(); - $this->assertIdentical($config->get('Core', 'Encoding'), 'iso-8859-1'); + } + + function testEnumerated() { - // set null to directive that allows null - $config->set('Extension', 'CanBeNull', null); - $this->assertNoErrors(); - $this->assertIdentical($config->get('Extension', 'CanBeNull'), null); + CS::defineNamespace('Instrument', 'Of the musical type.'); - $config->set('Extension', 'CanBeNull', 'foobar'); - $this->assertNoErrors(); - $this->assertIdentical($config->get('Extension', 'CanBeNull'), 'foobar'); + // case sensitive + CS::define('Instrument', 'Manufacturer', 'Yamaha', 'string', 'Who made it?'); + CS::defineAllowedValues('Instrument', 'Manufacturer', array( + 'Yamaha', 'Conn-Selmer', 'Vandoren', 'Laubin', 'Buffet', 'other')); + CS::defineValueAliases('Instrument', 'Manufacturer', array( + 'Selmer' => 'Conn-Selmer')); - // set null to directive that doesn't allow null - $config->set('Extension', 'Pert', null); - $this->assertError('Value is of invalid type'); - $this->assertNoErrors(); + // case insensitive + CS::define('Instrument', 'Family', 'woodwind', 'istring', 'What family is it?'); + CS::defineAllowedValues('Instrument', 'Family', array( + 'brass', 'woodwind', 'percussion', 'string', 'keyboard', 'electronic')); + CS::defineValueAliases('Instrument', 'Family', array( + 'synth' => 'electronic')); + + $config = HTMLPurifier_Config::createDefault(); + + // case sensitive + + $config->set('Instrument', 'Manufacturer', 'Vandoren'); + $this->assertIdentical($config->get('Instrument', 'Manufacturer'), 'Vandoren'); + + $config->set('Instrument', 'Manufacturer', 'Selmer'); + $this->assertIdentical($config->get('Instrument', 'Manufacturer'), 'Conn-Selmer'); + + $this->expectError('Value not supported'); + $config->set('Instrument', 'Manufacturer', 'buffet'); + + // case insensitive + + $config->set('Instrument', 'Family', 'brass'); + $this->assertIdentical($config->get('Instrument', 'Family'), 'brass'); + + $config->set('Instrument', 'Family', 'PERCUSSION'); + $this->assertIdentical($config->get('Instrument', 'Family'), 'percussion'); + + $config->set('Instrument', 'Family', 'synth'); + $this->assertIdentical($config->get('Instrument', 'Family'), 'electronic'); + + $config->set('Instrument', 'Family', 'Synth'); + $this->assertIdentical($config->get('Instrument', 'Family'), 'electronic'); + + } + + function testNull() { + + CS::defineNamespace('ReportCard', 'It is for grades.'); + CS::define('ReportCard', 'English', null, 'string/null', 'Grade from English class.'); + CS::define('ReportCard', 'Absences', 0, 'int', 'How many times missing from school?'); + + $config = HTMLPurifier_Config::createDefault(); + + $config->set('ReportCard', 'English', 'B-'); + $this->assertIdentical($config->get('ReportCard', 'English'), 'B-'); + + $config->set('ReportCard', 'English', null); // not yet graded + $this->assertIdentical($config->get('ReportCard', 'English'), null); + + // error + $this->expectError('Value is of invalid type'); + $config->set('ReportCard', 'Absences', null); + + } + + function testAliases() { + + HTMLPurifier_ConfigSchema::defineNamespace('Home', 'Sweet home.'); + HTMLPurifier_ConfigSchema::define('Home', 'Rug', 3, 'int', 'ID.'); + HTMLPurifier_ConfigSchema::defineAlias('Home', 'Carpet', 'Home', 'Rug'); + + $config = HTMLPurifier_Config::createDefault(); + + $this->assertEqual($config->get('Home', 'Rug'), 3); + + $this->expectError('Cannot get value from aliased directive, use real name'); + $config->get('Home', 'Carpet'); + + $config->set('Home', 'Carpet', 999); + $this->assertEqual($config->get('Home', 'Rug'), 999); + + } + + // test functionality based on method + + function test_getBatch() { + + CS::defineNamespace('Variables', 'Changing quantities in equation.'); + CS::define('Variables', 'TangentialAcceleration', 'a_tan', 'string', 'In m/s^2'); + CS::define('Variables', 'AngularAcceleration', 'alpha', 'string', 'In rad/s^2'); + + $config = HTMLPurifier_Config::createDefault(); // grab a namespace - $config->set('Attr', 'Key', 0xBEEF); $this->assertIdentical( - $config->getBatch('Attr'), + $config->getBatch('Variables'), array( - 'Key' => 0xBEEF + 'TangentialAcceleration' => 'a_tan', + 'AngularAcceleration' => 'alpha' ) ); // grab a non-existant namespace - $config->getBatch('FurnishedGoods'); - $this->assertError('Cannot retrieve undefined namespace'); - $this->assertNoErrors(); + $this->expectError('Cannot retrieve undefined namespace'); + $config->getBatch('Constants'); + + } + + function test_loadIni() { + + CS::defineNamespace('Shortcut', 'Keyboard shortcuts for commands'); + CS::define('Shortcut', 'Copy', 'c', 'istring', 'Copy text'); + CS::define('Shortcut', 'Paste', 'v', 'istring', 'Paste clipboard'); + CS::define('Shortcut', 'Cut', 'x', 'istring', 'Cut text'); + + $config = HTMLPurifier_Config::createDefault(); + + $config->loadIni(dirname(__FILE__) . '/ConfigTest-loadIni.ini'); + + $this->assertIdentical($config->get('Shortcut', 'Copy'), 'q'); + $this->assertIdentical($config->get('Shortcut', 'Paste'), 'p'); + $this->assertIdentical($config->get('Shortcut', 'Cut'), 't'); } @@ -148,7 +242,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase 'Zoo', 'Others', array(), 'list', 'Other animals we have one of.' ); - $config_manual = HTMLPurifier_Config::createDefault(); + $config_manual = HTMLPurifier_Config::createDefault(); $config_loadabbr = HTMLPurifier_Config::createDefault(); $config_loadfull = HTMLPurifier_Config::createDefault(); @@ -197,6 +291,10 @@ class HTMLPurifier_ConfigTest extends UnitTestCase $created_config = HTMLPurifier_Config::create(array('Cake.Sprinkles' => 42)); $this->assertEqual($config, $created_config); + // test loadIni + $created_config = HTMLPurifier_Config::create(dirname(__FILE__) . '/ConfigTest-create.ini'); + $this->assertEqual($config, $created_config); + } } diff --git a/tests/HTMLPurifier/ContextTest.php b/tests/HTMLPurifier/ContextTest.php index 88c0f615..195a5030 100644 --- a/tests/HTMLPurifier/ContextTest.php +++ b/tests/HTMLPurifier/ContextTest.php @@ -29,12 +29,13 @@ class HTMLPurifier_ContextTest extends UnitTestCase $this->context->destroy('IDAccumulator'); $this->assertFalse($this->context->exists('IDAccumulator')); + + $this->expectError('Attempted to retrieve non-existent variable'); $accumulator_3 =& $this->context->get('IDAccumulator'); - $this->assertError('Attempted to retrieve non-existent variable'); $this->assertNull($accumulator_3); + $this->expectError('Attempted to destroy non-existent variable'); $this->context->destroy('IDAccumulator'); - $this->assertError('Attempted to destroy non-existent variable'); } @@ -42,15 +43,13 @@ class HTMLPurifier_ContextTest extends UnitTestCase $var = true; $this->context->register('OnceOnly', $var); - $this->assertNoErrors(); + $this->expectError('Name collision, cannot re-register'); $this->context->register('OnceOnly', $var); - $this->assertError('Name collision, cannot re-register'); // destroy it, now registration is okay $this->context->destroy('OnceOnly'); $this->context->register('OnceOnly', $var); - $this->assertNoErrors(); } diff --git a/tests/HTMLPurifier/EncoderTest.php b/tests/HTMLPurifier/EncoderTest.php index b8437fb2..ef14b139 100644 --- a/tests/HTMLPurifier/EncoderTest.php +++ b/tests/HTMLPurifier/EncoderTest.php @@ -5,7 +5,7 @@ require_once 'HTMLPurifier/Encoder.php'; class HTMLPurifier_EncoderTest extends UnitTestCase { - var $Encoder; + var $_entity_lookup; function setUp() { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); @@ -60,6 +60,9 @@ class HTMLPurifier_EncoderTest extends UnitTestCase $config = HTMLPurifier_Config::createDefault(); $context = new HTMLPurifier_Context(); + // zhong-wen + $chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)"; + // UTF-8 means that we don't touch it $this->assertIdentical( HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context), @@ -74,13 +77,55 @@ class HTMLPurifier_EncoderTest extends UnitTestCase "\xF6" ); - $config->set('Test', 'ForceNoIconv', true); + if (function_exists('iconv')) { + // iconv has it's own way + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), + " (Chinese)" + ); + } + // Plain PHP implementation has slightly different behavior + $config->set('Test', 'ForceNoIconv', true); $this->assertIdentical( HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context), "\xF6" ); + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), + "?? (Chinese)" + ); + + // Preserve the characters! + + $config->set('Core', 'EscapeNonASCIICharacters', true); + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context), + "中文 (Chinese)" + ); + + } + + function test_convertToASCIIDumbLossless() { + + // Uppercase thorn letter + $this->assertIdentical( + HTMLPurifier_Encoder::convertToASCIIDumbLossless("\xC3\x9Eorn"), + "Þorn" + ); + + $this->assertIdentical( + HTMLPurifier_Encoder::convertToASCIIDumbLossless("an"), + "an" + ); + + // test up to four bytes + $this->assertIdentical( + HTMLPurifier_Encoder::convertToASCIIDumbLossless("\xF3\xA0\x80\xA0"), + "󠀠" + ); + } } diff --git a/tests/HTMLPurifier/SimpleTest/Reporter.php b/tests/HTMLPurifier/SimpleTest/Reporter.php new file mode 100644 index 00000000..241b91f4 --- /dev/null +++ b/tests/HTMLPurifier/SimpleTest/Reporter.php @@ -0,0 +1,35 @@ + +
+ + +
+ \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index a395cf07..38bd996b 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -91,11 +91,10 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness '
Reject
', 'Reject', array('HTML.Parent' => 'span') ); + $this->expectError('Cannot use unrecognized element as parent.'); $this->assertResult( '
Accept
', true, array('HTML.Parent' => 'script') ); - $this->assertError('Cannot use unrecognized element as parent.'); - $this->assertNoErrors(); } diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 902d0edb..750d972c 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -154,7 +154,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends 'Invalid value!' ); - // comparison check for test 20 + // see above, behavior is subtly different $this->assertResult( 'Invalid value!', 'Invalid value!' @@ -176,4 +176,4 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends } -?> \ No newline at end of file +?> diff --git a/tests/generate_mock_once.func.php b/tests/generate_mock_once.func.php new file mode 100644 index 00000000..3e4d5b6d --- /dev/null +++ b/tests/generate_mock_once.func.php @@ -0,0 +1,11 @@ + \ No newline at end of file diff --git a/tests/index.php b/tests/index.php index 3f9775aa..b034d4d8 100644 --- a/tests/index.php +++ b/tests/index.php @@ -1,147 +1,82 @@ =')) { - $test_files[] = 'TokenFactoryTest.php'; -} - +require 'test_files.php'; // populates $test_files array +sort($test_files); // for the SELECT +$GLOBALS['HTMLPurifierTest']['Files'] = $test_files; // for the reporter $test_file_lookup = array_flip($test_files); -function htmlpurifier_path2class($path) { - $temp = $path; - $temp = str_replace('./', '', $temp); // remove leading './' - $temp = str_replace('.\\', '', $temp); // remove leading '.\' - $temp = str_replace('\\', '_', $temp); // normalize \ to _ - $temp = str_replace('/', '_', $temp); // normalize / to _ - while(strpos($temp, '__') !== false) $temp = str_replace('__', '_', $temp); - $temp = str_replace('.php', '', $temp); - return $temp; +// determine test file +if (isset($_GET['f']) && isset($test_file_lookup[$_GET['f']])) { + $GLOBALS['HTMLPurifierTest']['File'] = $_GET['f']; +} else { + $GLOBALS['HTMLPurifierTest']['File'] = false; } // we can't use addTestFile because SimpleTest chokes on E_STRICT warnings - -if (isset($_GET['file']) && isset($test_file_lookup[$_GET['file']])) { +if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) { - // execute only one test - $test_file = $_GET['file']; - - $test = new GroupTest('HTML Purifier - ' . $test_file); + $test = new GroupTest($test_file . ' - HTML Purifier'); $path = 'HTMLPurifier/' . $test_file; require_once $path; - $test->addTestClass(htmlpurifier_path2class($path)); + $test->addTestClass(path2class($path)); } else { - $test = new GroupTest('HTML Purifier'); + $test = new GroupTest('All Tests - HTML Purifier'); foreach ($test_files as $test_file) { $path = 'HTMLPurifier/' . $test_file; require_once $path; - $test->addTestClass(htmlpurifier_path2class($path)); + $test->addTestClass(path2class($path)); } } if (SimpleReporter::inCli()) $reporter = new TextReporter(); -else $reporter = new HTMLReporter('UTF-8'); +else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8'); $test->run($reporter); diff --git a/tests/path2class.func.php b/tests/path2class.func.php new file mode 100644 index 00000000..1fad4ea6 --- /dev/null +++ b/tests/path2class.func.php @@ -0,0 +1,14 @@ + \ No newline at end of file diff --git a/tests/tally_errors.func.php b/tests/tally_errors.func.php new file mode 100644 index 00000000..84aaef04 --- /dev/null +++ b/tests/tally_errors.func.php @@ -0,0 +1,18 @@ +get('SimpleErrorQueue'); + if (!isset($queue->_expectation_queue)) return; // fut-compat + foreach ($queue->_expectation_queue as $e) { + if (count($e) != 2) return; // fut-compat + if (!isset($e[0])) return; // fut-compat + $e[0]->_dumper = new SimpleDumper(); + $this->fail('Error expectation not fulfilled: ' . + $e[0]->testMessage(null)); + } + $queue->_expectation_queue = array(); +} + +?> \ No newline at end of file diff --git a/tests/test_files.php b/tests/test_files.php new file mode 100644 index 00000000..ab83ec47 --- /dev/null +++ b/tests/test_files.php @@ -0,0 +1,72 @@ +=')) { + $test_files[] = 'TokenFactoryTest.php'; +} + +?> \ No newline at end of file