From dd2fd0659195e87a90842d2945b0749736eec5c8 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" It makes no sense to adopt a Our goals are to let the user: By default, users will use a doctype-based, permissive but secure
+whitelist. They must define a doctype, and this serves
+as the first method of determining a filterset. This identifier is based
+on the name the W3C has given to the document type and not
+the DTD identifier. This parameter is set via the configuration object: However, selecting this doctype doesn't mean much, because if we
+adhered exactly to the definition we would be letting XSS and other
+nasties through. HTML Purifier must, in its filterset, allow a subset
+of the doctype, which we shall call a filterset. By default, HTML Purifier will use the Rich
+filterset, which allows as many elements as possible with untrusted
+sources. Other possible filtersets could be: Extension-authors would be able to define custom filtersets for
+other users to use. A possible call to select a filterset would be: Within filtersets, there are various modes of operation.
+These indicate variant behaviors that, while not strictly changing the
+allowed set of elements and attributes, will definitely affect the output.
+Currently, we have two modes, which may be used together: A possible call to select modes would be: If modes have extra parameters, a hash might work well: Modes may possibly be wrapped up with the filterset declaration: Further investigation in this field is necessary. If this cookie cutter approach doesn't appeal to a user, they may
+decide to roll their own filterset by selecting modules, tags and
+attributes to allow. This would make use of the same facilities
+as a filterset author would use, except that it would go under an
+ On the highest level, a user will usually be most interested in
+directly specifying which elements and attributes are desired. For
+example: Attribute declarations could be merged into this declaration as such: ...or be kept separate: Considering that, internally speaking, as mandated by
+the XHTML 1.1 Modularization specification, we have organized our
+elements around modules, considerable gymnastics will be needed to
+get this sort of functionality working. A user may also specify a module to load a class of elements and attributes
+into their filterest: The granularity of these modules is too coarse for
+the average user (for example, the core module loads everything from
+the essential Because selecting each and every one of these configuration options
+is a chore, we may wish to offer a specialized configuration method
+for selecting a filterset. Possibility: ...which is simply a light wrapper over the individual configuration
+calls. A custom config file format or text format could also be adopted. Character encoding and character sets, in truth, are not that
-difficult to understand. But if you don't understand them, you are going
-to be caught by surprise by some of HTML Purifier's behavior, namely
-the fact that it operates UTF-8 or the limitations of the character
-encoding transformations it does. This document will walk you through
+ Character encoding and character sets are not that
+difficult to understand, but so many people blithely stumble
+through the worlds of programming without knowing what to actually
+do about it, or say "Ah, it's a job for those internationalization
+experts." No, it is not! This document will walk you through
determining the encoding of your system and how you should handle
this information. It will stay away from excessive discussion on
-the internals of character encoding, but offer the information in
-asides that can easily be skipped.
)
@@ -76,7 +77,6 @@ Ongoing
- more! (look for ones that use WYSIWYGs)
Unknown release (on a scratch-an-itch basis)
- - Upgrade SimpleTest testing code to newest versions
- Have 'lang' attribute be checked against official lists
? Semi-lossy dumb alternate character encoding transformations, achieved by
encoding all characters that have string entity equivalents
diff --git a/benchmarks/Lexer.php b/benchmarks/Lexer.php
index 9e13b54b..86df149b 100644
--- a/benchmarks/Lexer.php
+++ b/benchmarks/Lexer.php
@@ -7,6 +7,7 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
require_once 'HTMLPurifier/ConfigSchema.php';
require_once 'HTMLPurifier/Config.php';
+require_once 'HTMLPurifier/Context.php';
$LEXERS = array();
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
@@ -93,11 +94,14 @@ function print_lexers() {
function do_benchmark($name, $document) {
global $LEXERS, $RUNS;
+ $config = HTMLPurifier_Config::createDefault();
+ $context = new HTMLPurifier_Context();
+
$timer = new RowTimer($name);
$timer->start();
foreach($LEXERS as $key => $lexer) {
- for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
+ for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
$timer->setMarker($key);
}
diff --git a/benchmarks/ProfileDirectLex.php b/benchmarks/ProfileDirectLex.php
index faf9bef5..20ff0159 100644
--- a/benchmarks/ProfileDirectLex.php
+++ b/benchmarks/ProfileDirectLex.php
@@ -5,12 +5,15 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
require_once 'HTMLPurifier/ConfigSchema.php';
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php';
+require_once 'HTMLPurifier/Context.php';
$input = file_get_contents('samples/Lexer/4.html');
$lexer = new HTMLPurifier_Lexer_DirectLex();
+$config = HTMLPurifier_Config::createDefault();
+$context = new HTMLPurifier_Context();
for ($i = 0; $i < 10; $i++) {
- $tokens = $lexer->tokenizeHTML($input);
+ $tokens = $lexer->tokenizeHTML($input, $config, $context);
}
?>
\ No newline at end of file
diff --git a/configdoc/generate.php b/configdoc/generate.php
index 14335e98..d5966e2e 100644
--- a/configdoc/generate.php
+++ b/configdoc/generate.php
@@ -188,7 +188,7 @@ $xsl_processor->importStylesheet($xsl_dom_stylesheet);
$html_output = $xsl_processor->transformToXML($dom_document);
// some slight fudges to preserve backwards compatibility
-$html_output = str_replace('/>', ' />', $html_output); //
not
+$html_output = str_replace('/>', ' />', $html_output); //
not
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
if (class_exists('Tidy')) {
diff --git a/docs/dev-advanced-api.html b/docs/dev-advanced-api.html
new file mode 100644
index 00000000..731397f2
--- /dev/null
+++ b/docs/dev-advanced-api.html
@@ -0,0 +1,188 @@
+
+
+
+
+
+
+
+Advanced API
+
+one-size-fits-all
approach to
+filtersets: therefore, users must be able to define their own sets of
+allowed
elements, as well as switch in-between doctypes of HTML.
+
+
+
+
+
+
Select
+
+Selecting a Doctype
+
+$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
+
+Selecting a Filterset
+
+
+
+
+$config->set('HTML', 'Filterset', 'Rich');
+
+Selecting Mode
+
+
+
+
+center
+ tag would be turned into a div
with the CSS property
+ text-align:center;
, but in XHTML 1.0 Transitional
+ the tag would be preserved. This mode is on by default.center
tag would
+ be transformed in both cases. However, tags without a
+ reasonable standards-compliant alternative will be preserved
+ in their form. This mode is on by default. It may have
+ various levels of operation.$config->set('HTML', 'Mode', array('correctional', 'lenient'));
+
+$config->set('HTML', 'Mode', array(
+ 'correctional' => 9, // strongest level
+ 'lenient' => true // this one's just boolean
+));
+
+$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');
+
+Selecting Modules / Tags / Attributes
+
+anonymous
filterset that would be auto-selected if any of the
+relevant module/tag/attribute selection configuration directives were
+non-null.$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');
+
+$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');
+
+$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');
+
+$config->set('HTML', 'Allowed', 'Hypertext,Core');
+
+p
tag to the not-so-safe h1
+tag). How do we make this still a viable solution?Unified selector
+
+function selectFilter($doctype, $filterset, $mode)
+
+UTF-8
+UTF-8: The Secret of Character Encoding
This document is not designed to be read in its entirety: it will +slowly introduce concepts that build on each other: you need not get to +the bottom to have learned something new. However, I strongly +recommend you read all the way to Why UTF-8?, because at least +at that point you'd have made a conscious decision not to migrate, +which can be a rewarding (but difficult) task.
+Asides@@ -43,6 +49,50 @@ asides that can easily be skipped. with a greater understanding of the underlying issues.
In the beginning, there was ASCII, and things were simple. But they @@ -275,7 +325,7 @@ your own php.ini file, ask your support for details. Use:
You may, for whatever reason, may need to set the character encoding +
You may, for whatever reason, need to set the character encoding on non-PHP files, usually plain ol' HTML files. Doing this is more of a hit-or-miss process: depending on the software being used as a webserver and the configuration of that software, certain @@ -386,8 +436,8 @@ processing instructions. They look like:
For XHTML, this processing instruction theoretically
overrides the META
tag. In reality, this happens only when the
-XHTML is actually served as legit XML and not HTML, which is almost
-always never due to Internet Explorer's lack of support for
+XHTML is actually served as legit XML and not HTML, which is almost always
+never due to Internet Explorer's lack of support for
application/xhtml+xml
(even though doing so is often
argued to be good practice).
In short, if you use XHTML and have gone through the
-trouble of adding the XML header, be sure to make sure it jives
+trouble of adding the XML header, make sure it jives
with your META
tags and HTTP headers.
This section is not required reading, but may answer some of your questions on what's going on in all @@ -572,7 +622,7 @@ Each method has deficiencies, especially the former.
the page, you still have the trouble of what to do with characters that are outside of the character encoding's range. The behavior, once again, varies: Firefox 2.0 entity-izes them while Internet Explorer -7.0 mangles them beyond intelligibility. For serious I18N purposes, +7.0 mangles them beyond intelligibility. For serious internationalization purposes, this is not an option.The other possibility is to set Accept-Encoding to UTF-8, which @@ -604,22 +654,374 @@ hounding you about broken pages.
And finally, we get to HTML Purifier.
+And finally, we get to HTML Purifier. HTML Purifier is built to +deal with UTF-8: any indications otherwise are the result of an +encoder that converts text from your preferred encoding to UTF-8, and +back again. HTML Purifier never touches anything else, and leaves +it up to the module iconv to do the dirty work.
+ +This approach, however, is not perfect. iconv is blithely unaware +of HTML character entities. HTML Purifier, in order to +protect against sophisticated escaping schemes, normalizes all character +and numeric entities before processing the text. This leads to +one important ramification:
+ +Any character that is not supported by the target character +set, regardless of whether or not it is in the form of a character +entity or a raw character, will be silently ignored.
+ +Example of this principle at work: say you have θ
+in your HTML, but the output is in Latin-1 (which, understandably,
+does not understand Greek), the following process will occur (assuming you've
+set the encoding correctly using %Core.Encoding):
Encoder
will transform the text from ISO 8859-1 to UTF-8
+ (note that theta is preserved since it doesn't actually use
+ any non-ASCII characters): θ
EntityParser
will transform all named and numeric
+ character entities to their corresponding raw UTF-8 equivalents:
+ θ
θ
Encoder
now transforms the text back from UTF-8
+ to ISO 8859-1. Since Greek is not supported by ISO 8859-1, it
+ will be either ignored or replaced with a question mark:
+ ?
This behaviour is quite unsatisfactory. It is a deal-breaker for +international applications, and it can be mildly annoying for the provincial +soul who occasionally needs a special character. Since 1.4.0, HTML +Purifier has provided a slightly more palatable workaround using +%Core.EscapeNonASCIICharacters. The process now looks like:
+ +Encoder
transforms encoding to UTF-8: θ
EntityParser
transforms entities: θ
θ
Encoder
replaces all non-ASCII characters
+ with numeric entities: θ
Encoder
transforms encoding back to
+ original (which is strictly unnecessary for 99% of encodings
+ out there): θ
(remember, it's all ASCII!)...which means that this is only good for an occasional foray into +the land of Unicode characters, and is totally unacceptable for Chinese +or Japanese texts. The even bigger kicker is that, supposing the +input encoding was actually ISO-8859-7, which does support +theta, the character would get entity-ized anyway! (The Encoder does +not discriminate).
+ +The current functionality is about where HTML Purifier will be for +the rest of eternity. HTML Purifier could attempt to preserve the original +form of the entities so that they could be substituted back in, only the +DOM extension kills them off irreversibly. HTML Purifier could also attempt +to be smart and only convert non-ASCII characters that weren't supported +by the target encoding, but that would require reimplementing iconv +with HTML awareness, something I will not do.
+ +So there: either it's UTF-8 or crippled international support. Your pick! (and I'm +not being sarcastic here: some people could care less about other languages)
So, you've decided to bite the bullet, and want to migrate to UTF-8. +Note that this is not for the faint-hearted, and you should expect +the process to take longer than you think it will take.
+ +The general idea is that you convert all existing text to UTF-8, +and then you set all the headers and META tags we discussed earlier +to UTF-8. There are many ways going about doing this: you could +write a conversion script that runs through the database and re-encodes +everything as UTF-8 or you could do the conversion on the fly when someone +reads the page. The details depend on your system, but I will cover +some of the more subtle points of migration that may trip you up.
Most modern databases, the most prominent open-source ones being MySQL +4.1+ and PostgreSQL, support character encodings. If you're switching +to UTF-8, logically speaking, you'd want to make sure your database +knows about the change too. There are some caveats though:
+ +Standardization in terms of SQL syntax for specifying character +encodings is notoriously spotty. Refer to your respective database's +documentation on how to do this properly.
+ +For MySQL, ALTER
will magically perform the
+character encoding conversion for you. However, you have
+to make sure that the text inside the column is what is says it is:
+if you had put Shift-JIS in an ISO 8859-1 column, MySQL will irreversibly mangle
+the text when you try to convert it to UTF-8. You'll have to convert
+it to a binary field, convert it to a Shift-JIS field (the real encoding),
+and then finally to UTF-8. Many a website had pages irreversibly mangled
+because they didn't realize that they'd been deluding themselves about
+the character encoding all along, don't become the next victim.
For PostgreSQL, there appears to be no direct way to change the +encoding of a database (as of 8.2). You will have to dump the data, and then reimport +it into a new table. Make sure that your client encoding is set properly: +this is how PostgreSQL knows to perform an encoding conversion.
+ +Many times, you will be also asked about the "collation" of +the new column. Collation is how a DBMS sorts text, like ordering +B, C and A into A, B and C (the problem gets surprisingly complicated +when you get to languages like Thai and Japanese). If in doubt, +going with the default setting is usually a safe bet.
+ +Once the conversion is all said and done, you still have to remember
+to set the client encoding (your encoding) properly on each database
+connection using SET NAMES
(which is standard SQL and is
+usually supported).
Due to the abovementioned compatibility issues, a more interoperable
+way of storing UTF-8 text is to stuff it in a binary datatype.
+CHAR
becomes BINARY
, VARCHAR
becomes
+VARBINARY
and TEXT
becomes BLOB
.
+Doing so can save you some huge headaches:
MediaWiki, a very prominent international application, uses binary fields +for storing their data because of point three.
+ +There are drawbacks, of course:
+ +Choose based on your circumstances.
+ +For more flat-file oriented systems, you will often be tasked with +converting reams of existing text and HTML files into UTF-8, as well as +making sure that all new files uploaded are properly encoded. Once again, +I can only point vaguely in the right direction for converting your +existing files: make sure you backup, make sure you use +iconv(), and +make sure you know what the original character encoding of the files +is (or are, depending on the tidiness of your system).
+ +However, I can proffer more specific advice on the subject of +text editors. Many text editors have notoriously spotty Unicode support. +To find out how your editor is doing, you can check out this list +or Wikipedia's list. +I personally use Notepad++, which works like a charm when it comes to UTF-8. +Usually, you will have to explicitly tell the editor through some dialogue +(usually Save as or Format) what encoding you want it to use. An editor +will often offer "Unicode" as a method of saving, which is +ambiguous. Make sure you know whether or not they really mean UTF-8 +or UTF-16 (which is another flavor of Unicode).
+ +The two things to look out for are whether or not the editor +supports font mixing (multiple +fonts in one document) and whether or not it adds a BOM. +Font mixing is important because fonts rarely have support for every +language known to mankind: in order to be flexible, an editor must +be able to take a little from here and a little from there, otherwise +all your Chinese characters will come as nice boxes. We'll discuss +BOM below.
The BOM, or Byte +Order Mark, is a magical, invisible character placed at +the beginning of UTF-8 files to tell people what the encoding is and +what the endianness of the text is. It is also unnecessary.
+ +Because it's invisible, it often +catches people by surprise when it starts doing things it shouldn't +be doing. For example, this PHP file:
+ +BOM<?php +header('Location: index.php'); +?>+ +
...will fail with the all too familiar Headers already sent +PHP error. And because the BOM is invisible, this culprit will go unnoticed. +My suggestion is to only use ASCII in PHP pages, but if you must, make +sure the page is saved WITHOUT the BOM.
+ +++ +The headers the error is referring to are HTTP headers, + which are sent to the browser before any HTML to tell it various + information. The moment any regular text (and yes, a BOM counts as + ordinary text) is output, the headers must be sent, and you are + not allowed to send anymore. Thus, the error.
+
If you are reading in text files to insert into the middle of another
+page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte
+sequence for BOM "\xEF\xBB\xBF"
before inserting it in,
+via:
$text = str_replace("\xEF\xBB\xBF", '', $text);+ +
Generally speaking, people who are having trouble with fonts fall +into two categories:
+ +Yes, there's always a chance where an English user happens across +a Sinhalese website and doesn't have the right font. But an English user +who happens not to have the right fonts probably has no business reading Sinhalese +anyway. So we'll deal with the other two edge cases.
+ +If you run a Bengali website, you may get comments from users who +would like to read your website but get heaps of question marks or +other meaningless characters. Fixing this problem requires the +installation of a font or language pack which is often highly +dependent on what the language is. Here is an example +of such a help file for the Bengali language, I am sure there are +others out there too. You just have to point users to the appropriate +help file.
+ +A prime example of when you'll see some very obscure Unicode +characters embedded in what otherwise would be very bland ASCII are +letters of the +International +Phonetic Alphabet (IPA), use to designate pronounciations in a very standard +manner (you probably see them all the time in your dictionary). Your +average font probably won't have support for all of the IPA characters +like ʘ (bilabial click) or ʒ (voiced postalveolar fricative). +So what's a poor browser to do? Font mix! Smart browsers like Mozilla Firefox +and Internet Explorer 7 will borrow glyphs from other fonts in order +to make sure that all the characters display properly.
+ +But what happens when the browser isn't smart and happens to be the +most widely used browser in the entire world? Microsoft IE 6 +is not smart enough to borrow from other fonts when a character isn't +present, so more often than not you'll be slapped with a nice big �. +To get things to work, MSIE 6 needs a little nudge. You could configure it +to use a different font to render the text, but you can acheive the same +effect by selectively changing the font for blocks of special characters +to known good Unicode fonts.
+ +Fortunantely, the folks over at Wikipedia have already done all the +heavy lifting for you. Get the CSS from the horses mouth here: +Common.css, +and search for ".IPA" There are also a smattering of +other classes you can use for other purposes, check out +this page +for more details. For you lazy ones, this should work:
+ +.Unicode { + font-family: Code2000, "TITUS Cyberbit Basic", "Doulos SIL", + "Chrysanthi Unicode", "Bitstream Cyberbit", + "Bitstream CyberBase", Thryomanes, Gentium, GentiumAlt, + "Lucida Grande", "Arial Unicode MS", "Microsoft Sans Serif", + "Lucida Sans Unicode"; + font-family /**/:inherit; /* resets fonts for everyone but IE6 */ +}+ +
The standard usage goes along the lines of <span class="Unicode">Crazy
+Unicode stuff here</span>
. Characters in the
+Windows Glyph List
+usually don't need to be fixed, but for anything else you probably
+want to play it safe. Unless, of course, you don't care about IE6
+users.
When people claim that PHP6 will solve all our Unicode problems, they're +misinformed. It will not fix any of the abovementioned troubles. It will, +however, fix the problem we are about to discuss: processing UTF-8 text +in PHP.
+ +PHP (as of PHP5) is blithely unaware of the existence of UTF-8 (with a few +notable exceptions). Sometimes, this will cause problems, other times, +this won't. So far, we've avoided discussing the architecture of +UTF-8, so, we must first ask, what is UTF-8? Yes, it supports Unicode, +and yes, it is variable width. Other traits:
+ +Each of these traits affect different domains of text processing +in different ways. It is beyond the scope of this document to explain +what precisely these implications are. PHPWact provides +a very good reference document +on what to expect from each functions, although coverage is spotty in +some areas. Their more general notes on +character sets +are also worth looking at for information on UTF-8. Some rules of thumb +when dealing with Unicode text:
+ +...and always think in bytes, not characters. If you use strpos() +to find the position of a character, it will be in bytes, but this +usually won't matter since substr() also operates with byte indices!
+ +You'll also need to make sure your UTF-8 is well-formed and will
+probably need replacements for some of these functions. I recommend
+using Harry Fuecks' PHP
+UTF-8 library, rather than use mb_string directly. HTML Purifier
+also defines a few useful UTF-8 compatible functions: check out
+Encoder.php
in the /library/HTMLPurifier/
+directory.
Well, that's it. Hopefully this document has served as a very +practical springboard into knowledge of how UTF-8 works. You may have +decided that you don't want to migrate yet: that's fine, just know +what will happen to your output and what bug reports you may recieve.
+Many other developers have already discussed the subject of Unicode, UTF-8 and internationalization, and I would like to defer to them for a more in-depth look into character sets and encodings.
diff --git a/docs/fixquotes.htc b/docs/fixquotes.htc new file mode 100644 index 00000000..bf2e7842 --- /dev/null +++ b/docs/fixquotes.htc @@ -0,0 +1,6 @@ +<blockquote><p>Foo</p></blockquote>
. The '.
- '<p>
tags can be replaced '.
- 'with whatever you desire, as long as it is a block level element. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Parent', 'div', 'string',
- 'String name of element that HTML fragment passed to library will be '.
- 'inserted in. An interesting variation would be using span as the '.
- 'parent element, meaning that only inline tags would be allowed. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedElements', null, 'lookup/null',
- 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
- 'can overload it with your own list of tags to allow. Note that this '.
- 'method is subtractive: it does its job by taking away from HTML Purifier '.
- 'usual feature set, so you cannot add a tag that HTML Purifier never '.
- 'supported in the first place (like embed, form or head). If you change this, you '.
- 'probably also want to change %HTML.AllowedAttributes. '.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedAttributes', null, 'lookup/null',
- 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
- 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
- '(style, id, class, dir, lang, xml:lang).'.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. For '.
- 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
- 'directive. You must set that directive to true before you can use '.
- 'IDs at all. This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DisableURI', false, 'bool',
- 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
- '(after all, the Internet\'s founded on the notion of a hyperlink). '.
- 'This directive has been available since 1.3.0.'
-);
-
-/**
- * Defines the purified HTML type with large amounts of objects.
- *
- * The main function of this object is its $info array, which is an
- * associative array of all the child and attribute definitions for
- * each allowed element. It also contains special use information (always
- * prefixed by info) for intelligent tag closing and global attributes.
- *
- * For optimization, the definition generation may be moved to
- * a maintenance script and stipulate that definition be created
- * by a factory method that unserializes a serialized version of Definition.
- * Customization would entail copying the maintenance script, making the
- * necessary changes, generating the serialized object, and then hooking it
- * in via the factory method. We would also offer a LiveDefinition for
- * automatic recompilation, suggesting that we would have a DefinitionGenerator.
- */
-
-class HTMLPurifier_HTMLDefinition
-{
-
- /**
- * Associative array of element names to HTMLPurifier_ElementDef
- * @public
- */
- var $info = array();
-
- /**
- * Associative array of global attribute name to attribute definition.
- * @public
- */
- var $info_global_attr = array();
-
- /**
- * String name of parent element HTML will be going into.
- * @public
- */
- var $info_parent = 'div';
-
- /**
- * Definition for parent element, allows parent element to be a
- * tag that's not allowed inside the HTML fragment.
- * @public
- */
- var $info_parent_def;
-
- /**
- * String name of element used to wrap inline elements in block context
- * @note This is rarely used except for BLOCKQUOTEs in strict mode
- * @public
- */
- var $info_block_wrapper = 'p';
-
- /**
- * Associative array of deprecated tag name to HTMLPurifier_TagTransform
- * @public
- */
- var $info_tag_transform = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed before validation.
- * @public
- */
- var $info_attr_transform_pre = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed after validation/
- * @public
- */
- var $info_attr_transform_post = array();
-
- /**
- * Lookup table of flow elements
- * @public
- */
- var $info_flow_elements = array();
-
- /**
- * Boolean is a strict definition?
- * @public
- */
- var $strict;
-
- /**
- * Initializes the definition, the meat of the class.
- */
- function setup($config) {
-
- // some cached config values
- $this->strict = $config->get('HTML', 'Strict');
-
- //////////////////////////////////////////////////////////////////////
- // info[] : initializes the definition objects
-
- // if you attempt to define rules later on for a tag not in this array
- // PHP will create an stdclass
-
- $allowed_tags =
- array(
- 'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
- 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
- 'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
- 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
- 'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
- 'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
- 'colgroup', 'col', 'td', 'th', 'tr'
- );
-
- if (!$this->strict) {
- $allowed_tags[] = 'u';
- $allowed_tags[] = 's';
- $allowed_tags[] = 'strike';
- }
-
- foreach ($allowed_tags as $tag) {
- $this->info[$tag] = new HTMLPurifier_ElementDef();
- }
-
- //////////////////////////////////////////////////////////////////////
- // info[]->child : defines allowed children for elements
-
- // emulates the structure of the DTD
- // however, these are condensed, with bad stuff taken out
- // screening process was done by hand
-
- // entities: prefixed with e_ and _ replaces . from DTD
- // double underlines are entities we made up
-
- // we don't use an array because that complicates interpolation
- // strings are used instead of arrays because if you use arrays,
- // you have to do some hideous manipulation with array_merge()
-
- // todo: determine whether or not having allowed children
- // that aren't allowed globally affects security (it shouldn't)
- // if above works out, extend children definitions to include all
- // possible elements (allowed elements will dictate which ones
- // get dropped
-
- $e_special_extra = 'img';
- $e_special_basic = 'br | span | bdo';
- $e_special = "$e_special_basic | $e_special_extra";
- $e_fontstyle_extra = 'big | small';
- $e_fontstyle_basic = 'tt | i | b | u | s | strike';
- $e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
- $e_phrase_extra = 'sub | sup';
- $e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
- ' | cite | abbr | acronym';
- $e_phrase = "$e_phrase_basic | $e_phrase_extra";
- $e_misc_inline = 'ins | del';
- $e_misc = "$e_misc_inline";
- $e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
- // pseudo-property we created for convenience, see later on
- $e__inline = "#PCDATA | $e_inline | $e_misc_inline";
- // note the casing
- $e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline);
- $e_heading = 'h1|h2|h3|h4|h5|h6';
- $e_lists = 'ul | ol | dl';
- $e_blocktext = 'pre | hr | blockquote | address';
- $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
- $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
- $e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
- $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
- $e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
- " | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
- $e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
- " | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
- " | $e_misc_inline");
- $e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
- $e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
-
- $this->info['ins']->child =
- $this->info['del']->child =
- new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
-
- $this->info['dd']->child =
- $this->info['li']->child =
- $this->info['div']->child = $e_Flow;
-
- if ($this->strict) {
- $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
- } else {
- $this->info['blockquote']->child = $e_Flow;
- }
-
- $this->info['caption']->child =
- $this->info['em']->child =
- $this->info['strong']->child =
- $this->info['dfn']->child =
- $this->info['code']->child =
- $this->info['samp']->child =
- $this->info['kbd']->child =
- $this->info['var']->child =
- $this->info['cite']->child =
- $this->info['abbr']->child =
- $this->info['acronym']->child =
- $this->info['q']->child =
- $this->info['sub']->child =
- $this->info['tt']->child =
- $this->info['sup']->child =
- $this->info['i']->child =
- $this->info['b']->child =
- $this->info['big']->child =
- $this->info['small']->child=
- $this->info['bdo']->child =
- $this->info['span']->child =
- $this->info['dt']->child =
- $this->info['p']->child =
- $this->info['h1']->child =
- $this->info['h2']->child =
- $this->info['h3']->child =
- $this->info['h4']->child =
- $this->info['h5']->child =
- $this->info['h6']->child = $e_Inline;
-
- if (!$this->strict) {
- $this->info['u']->child =
- $this->info['s']->child =
- $this->info['strike']->child = $e_Inline;
- }
-
- // the only three required definitions, besides custom table code
- $this->info['ol']->child =
- $this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
-
- $this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
-
- if ($this->strict) {
- $this->info['address']->child = $e_Inline;
- } else {
- $this->info['address']->child =
- new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
- " | $e_misc_inline");
- }
-
- $this->info['img']->child =
- $this->info['br']->child =
- $this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
-
- $this->info['pre']->child = $e_pre_content;
-
- $this->info['a']->child = $e_a_content;
-
- $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
-
- // not a real entity, watch the double underscore
- $e__row = new HTMLPurifier_ChildDef_Required('tr');
- $this->info['thead']->child = $e__row;
- $this->info['tfoot']->child = $e__row;
- $this->info['tbody']->child = $e__row;
- $this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
- $this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
- $this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
- $this->info['th']->child = $e_Flow;
- $this->info['td']->child = $e_Flow;
-
- //////////////////////////////////////////////////////////////////////
- // info[]->type : defines the type of the element (block or inline)
-
- // reuses $e_Inline and $e_Block
- foreach ($e_Inline->elements as $name => $bool) {
- if ($name == '#PCDATA') continue;
- if (!isset($this->info[$name])) continue;
- $this->info[$name]->type = 'inline';
- }
-
- foreach ($e_Block->elements as $name => $bool) {
- if (!isset($this->info[$name])) continue;
- $this->info[$name]->type = 'block';
- }
-
- foreach ($e_Flow->elements as $name => $bool) {
- $this->info_flow_elements[$name] = true;
- }
-
- //////////////////////////////////////////////////////////////////////
- // info[]->excludes : defines elements that aren't allowed in here
-
- // make sure you test using isset() and not !empty()
-
- $this->info['a']->excludes = array('a' => true);
- $this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
- // technically useless, but good to be indepth
- 'object', 'applet', 'font', 'basefont'));
-
- //////////////////////////////////////////////////////////////////////
- // info[]->attr : defines allowed attributes for elements
-
- // this doesn't include REQUIRED declarations, those are handled
- // by the transform classes. It will, however, do simple and slightly
- // complex attribute value substitution
-
- // the question of varying allowed attributes is more entangling.
-
- $e_Text = new HTMLPurifier_AttrDef_Text();
-
- // attrs, included in almost every single one except for a few,
- // which manually override these in their local definitions
- $this->info_global_attr = array(
- // core attrs
- 'class' => new HTMLPurifier_AttrDef_Class(),
- 'title' => $e_Text,
- 'style' => new HTMLPurifier_AttrDef_CSS(),
- // i18n
- 'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
- 'lang' => new HTMLPurifier_AttrDef_Lang(),
- 'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
- );
-
- if ($config->get('HTML', 'EnableAttrID')) {
- $this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
- }
-
- // required attribute stipulation handled in attribute transformation
- $this->info['bdo']->attr = array(); // nothing else
-
- $this->info['br']->attr['dir'] = false;
- $this->info['br']->attr['lang'] = false;
- $this->info['br']->attr['xml:lang'] = false;
-
- $this->info['td']->attr['abbr'] = $e_Text;
- $this->info['th']->attr['abbr'] = $e_Text;
-
- $this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum(
- array('left', 'center', 'right', 'justify', 'char'), false));
-
- $this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum(
- array('top', 'middle', 'bottom', 'baseline'), false));
-
- $this->info['img']->attr['alt'] = $e_Text;
-
- $e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above',
- 'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false);
- $this->info['table']->attr['frame'] = $e_TFrame;
-
- $e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups',
- 'rows', 'cols', 'all'), false);
- $this->info['table']->attr['rules'] = $e_TRules;
-
- $this->info['table']->attr['summary'] = $e_Text;
-
- $this->info['table']->attr['border'] =
- new HTMLPurifier_AttrDef_Pixels();
-
- $e_Length = new HTMLPurifier_AttrDef_Length();
- $this->info['table']->attr['cellpadding'] =
- $this->info['table']->attr['cellspacing'] =
- $this->info['table']->attr['width'] =
- $this->info['img']->attr['height'] =
- $this->info['img']->attr['width'] = $e_Length;
- $this->setAttrForTableElements('charoff', $e_Length);
-
- $e_MultiLength = new HTMLPurifier_AttrDef_MultiLength();
- $this->info['col']->attr['width'] =
- $this->info['colgroup']->attr['width'] = $e_MultiLength;
-
- $e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true);
- $this->info['colgroup']->attr['span'] =
- $this->info['col']->attr['span'] =
- $this->info['td']->attr['rowspan'] =
- $this->info['th']->attr['rowspan'] =
- $this->info['td']->attr['colspan'] =
- $this->info['th']->attr['colspan'] = $e__NumberSpan;
-
- if (!$config->get('Attr', 'DisableURI')) {
- $e_URI = new HTMLPurifier_AttrDef_URI();
- $this->info['a']->attr['href'] =
- $this->info['img']->attr['longdesc'] =
- $this->info['del']->attr['cite'] =
- $this->info['ins']->attr['cite'] =
- $this->info['blockquote']->attr['cite'] =
- $this->info['q']->attr['cite'] = $e_URI;
-
- // URI that causes HTTP request
- $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
- }
-
- if (!$this->strict) {
- $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
- $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_tag_transform : transformations of tags
-
- $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
- $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
-
- //////////////////////////////////////////////////////////////////////
- // info[]->auto_close : tags that automatically close another
-
- // todo: determine whether or not SGML-like modeling based on
- // mandatory/optional end tags would be a better policy
-
- // make sure you test using isset() not !empty()
-
- // these are all block elements: blocks aren't allowed in P
- $this->info['p']->auto_close = array_flip(array(
- 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
- 'table', 'ul'
- ));
-
- $this->info['li']->auto_close = array('li' => true);
-
- // we need TABLE and heading mismatch code
- // we may need to make this more flexible for heading mismatch,
- // or we can just create another info
-
- //////////////////////////////////////////////////////////////////////
- // info[]->attr_transform_* : attribute transformations in elements
- // pre is applied before any validation is done, post is done after
-
- $this->info['h1']->attr_transform_pre[] =
- $this->info['h2']->attr_transform_pre[] =
- $this->info['h3']->attr_transform_pre[] =
- $this->info['h4']->attr_transform_pre[] =
- $this->info['h5']->attr_transform_pre[] =
- $this->info['h6']->attr_transform_pre[] =
- $this->info['p'] ->attr_transform_pre[] =
- new HTMLPurifier_AttrTransform_TextAlign();
-
- $this->info['bdo']->attr_transform_post[] =
- new HTMLPurifier_AttrTransform_BdoDir();
-
- $this->info['img']->attr_transform_post[] =
- new HTMLPurifier_AttrTransform_ImgRequired();
-
- //////////////////////////////////////////////////////////////////////
- // info_attr_transform_* : global attribute transformation that is
- // unconditionally called. Good for transformations that have complex
- // start conditions
- // pre is applied before any validation is done, post is done after
-
- $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
-
- // protect against stdclasses floating around
- foreach ($this->info as $key => $obj) {
- if ($obj instanceof stdClass) {
- unset($this->info[$key]);
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_block_wrapper : wraps inline elements in block context
-
- $block_wrapper = $config->get('HTML', 'BlockWrapper');
- if (isset($e_Block->elements[$block_wrapper])) {
- $this->info_block_wrapper = $block_wrapper;
- } else {
- trigger_error('Cannot use non-block element as block wrapper.',
- E_USER_ERROR);
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_parent : parent element of the HTML fragment
-
- $parent = $config->get('HTML', 'Parent');
- if (isset($this->info[$parent])) {
- $this->info_parent = $parent;
- } else {
- trigger_error('Cannot use unrecognized element as parent.',
- E_USER_ERROR);
- }
- $this->info_parent_def = $this->info[$this->info_parent];
-
- //////////////////////////////////////////////////////////////////////
- // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
-
- $allowed_elements = $config->get('HTML', 'AllowedElements');
- if (is_array($allowed_elements)) {
- foreach ($this->info as $name => $d) {
- if(!isset($allowed_elements[$name])) unset($this->info[$name]);
- }
- }
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
- if (is_array($allowed_attributes)) {
- foreach ($this->info_global_attr as $attr_key => $info) {
- if (!isset($allowed_attributes["*.$attr_key"])) {
- unset($this->info_global_attr[$attr_key]);
- }
- }
- foreach ($this->info as $tag => $info) {
- foreach ($info->attr as $attr => $attr_info) {
- if (!isset($allowed_attributes["$tag.$attr"])) {
- unset($this->info[$tag]->attr[$attr]);
- }
- }
- }
- }
- }
-
- function setAttrForTableElements($attr, $def) {
- $this->info['col']->attr[$attr] =
- $this->info['colgroup']->attr[$attr] =
- $this->info['tbody']->attr[$attr] =
- $this->info['td']->attr[$attr] =
- $this->info['tfoot']->attr[$attr] =
- $this->info['th']->attr[$attr] =
- $this->info['thead']->attr[$attr] =
- $this->info['tr']->attr[$attr] = $def;
- }
-
-}
-
-/**
- * Structure that stores an element definition.
- */
-class HTMLPurifier_ElementDef
-{
-
- /**
- * Associative array of attribute name to HTMLPurifier_AttrDef
- * @public
- */
- var $attr = array();
-
- /**
- * List of tag's HTMLPurifier_AttrTransform to be done before validation
- * @public
- */
- var $attr_transform_pre = array();
-
- /**
- * List of tag's HTMLPurifier_AttrTransform to be done after validation
- * @public
- */
- var $attr_transform_post = array();
-
- /**
- * Lookup table of tags that close this tag.
- * @public
- */
- var $auto_close = array();
-
- /**
- * HTMLPurifier_ChildDef of this tag.
- * @public
- */
- var $child;
-
- /**
- * Type of the tag: inline or block or unknown?
- * @public
- */
- var $type = 'unknown';
-
- /**
- * Lookup table of tags excluded from all descendants of this tag.
- * @public
- */
- var $excludes = array();
-
-}
-
-?>
+<blockquote>Foo</blockquote> '.
+ 'would become <blockquote><p>Foo</p></blockquote>
. The '.
+ '<p>
tags can be replaced '.
+ 'with whatever you desire, as long as it is a block level element. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Parent', 'div', 'string',
+ 'String name of element that HTML fragment passed to library will be '.
+ 'inserted in. An interesting variation would be using span as the '.
+ 'parent element, meaning that only inline tags would be allowed. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedElements', null, 'lookup/null',
+ 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+ 'can overload it with your own list of tags to allow. Note that this '.
+ 'method is subtractive: it does its job by taking away from HTML Purifier '.
+ 'usual feature set, so you cannot add a tag that HTML Purifier never '.
+ 'supported in the first place (like embed, form or head). If you change this, you '.
+ 'probably also want to change %HTML.AllowedAttributes. '.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedAttributes', null, 'lookup/null',
+ 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+ 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+ '(style, id, class, dir, lang, xml:lang).'.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. For '.
+ 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+ 'directive. You must set that directive to true before you can use '.
+ 'IDs at all. This directive has been available since 1.3.0.'
+);
+
+/**
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ *
+ * Conventions:
+ *
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ *
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ *
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
+ */
+class HTMLPurifier_HTMLDefinition
+{
+
+ /** FULLY-PUBLIC VARIABLES */
+
+ /**
+ * Associative array of element names to HTMLPurifier_ElementDef
+ * @public
+ */
+ var $info = array();
+
+ /**
+ * Associative array of global attribute name to attribute definition.
+ * @public
+ */
+ var $info_global_attr = array();
+
+ /**
+ * String name of parent element HTML will be going into.
+ * @public
+ */
+ var $info_parent = 'div';
+
+ /**
+ * Definition for parent element, allows parent element to be a
+ * tag that's not allowed inside the HTML fragment.
+ * @public
+ */
+ var $info_parent_def;
+
+ /**
+ * String name of element used to wrap inline elements in block context
+ * @note This is rarely used except for BLOCKQUOTEs in strict mode
+ * @public
+ */
+ var $info_block_wrapper = 'p';
+
+ /**
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+ * @public
+ */
+ var $info_tag_transform = array();
+
+ /**
+ * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
+ * @public
+ */
+ var $info_attr_transform_pre = array();
+
+ /**
+ * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
+ * @public
+ */
+ var $info_attr_transform_post = array();
+
+ /**
+ * Nested lookup array of content set name (Block, Inline) to
+ * element name to whether or not it belongs in that content set.
+ * @public
+ */
+ var $info_content_sets = array();
+
+
+
+ /** PUBLIC BUT INTERNAL VARIABLES */
+
+ var $setup = false; /**< Has setup() been called yet? */
+ var $config; /**< Temporary instance of HTMLPurifier_Config */
+
+ var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+
+ /**
+ * Performs low-cost, preliminary initialization.
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function HTMLPurifier_HTMLDefinition(&$config) {
+ $this->config =& $config;
+ $this->manager = new HTMLPurifier_HTMLModuleManager();
+ }
+
+ /**
+ * Processes internals into form usable by HTMLPurifier internals.
+ * Modifying the definition after calling this function should not
+ * be done.
+ */
+ function setup() {
+
+ // multiple call guard
+ if ($this->setup) {return;} else {$this->setup = true;}
+
+ $this->processModules();
+ $this->setupConfigStuff();
+
+ unset($this->config);
+ unset($this->manager);
+
+ }
+
+ /**
+ * Extract out the information from the manager
+ */
+ function processModules() {
+
+ $this->manager->setup($this->config);
+
+ foreach ($this->manager->activeModules as $module) {
+ foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
+ foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
+ foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
+ }
+
+ $this->info = $this->manager->getElements($this->config);
+ $this->info_content_sets = $this->manager->contentSets->lookup;
+
+ }
+
+ /**
+ * Sets up stuff based on config. We need a better way of doing this.
+ */
+ function setupConfigStuff() {
+
+ $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
+ if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+ $this->info_block_wrapper = $block_wrapper;
+ } else {
+ trigger_error('Cannot use non-block element as block wrapper.',
+ E_USER_ERROR);
+ }
+
+ $parent = $this->config->get('HTML', 'Parent');
+ $def = $this->manager->getElement($parent, $this->config);
+ if ($def) {
+ $this->info_parent = $parent;
+ $this->info_parent_def = $def;
+ } else {
+ trigger_error('Cannot use unrecognized element as parent.',
+ E_USER_ERROR);
+ $this->info_parent_def = $this->manager->getElement(
+ $this->info_parent, $this->config);
+ }
+
+ // setup allowed elements, SubtractiveWhitelist module
+ $allowed_elements = $this->config->get('HTML', 'AllowedElements');
+ if (is_array($allowed_elements)) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+ }
+ }
+ $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
+ if (is_array($allowed_attributes)) {
+ foreach ($this->info_global_attr as $attr_key => $info) {
+ if (!isset($allowed_attributes["*.$attr_key"])) {
+ unset($this->info_global_attr[$attr_key]);
+ }
+ }
+ foreach ($this->info as $tag => $info) {
+ foreach ($info->attr as $attr => $attr_info) {
+ if (!isset($allowed_attributes["$tag.$attr"]) &&
+ !isset($allowed_attributes["*.$attr"])) {
+ unset($this->info[$tag]->attr[$attr]);
+ }
+ }
+ }
+ }
+
+ }
+
+
+}
+
+?>
diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php
new file mode 100644
index 00000000..930b605d
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule.php
@@ -0,0 +1,125 @@
+info, since the object's data is only info,
+ * with extra behavior associated with it.
+ * @public
+ */
+ var $attr_collections = array();
+
+ /**
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+ * @public
+ */
+ var $info_tag_transform = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed before validation.
+ * @public
+ */
+ var $info_attr_transform_pre = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed after validation.
+ * @public
+ */
+ var $info_attr_transform_post = array();
+
+ /**
+ * Boolean flag that indicates whether or not getChildDef is implemented.
+ * For optimization reasons: may save a call to a function. Be sure
+ * to set it if you do implement getChildDef(), otherwise it will have
+ * no effect!
+ * @public
+ */
+ var $defines_child_def = false;
+
+ /**
+ * Retrieves a proper HTMLPurifier_ChildDef subclass based on
+ * content_model and content_model_type member variables of
+ * the HTMLPurifier_ElementDef class. There is a similar function
+ * in HTMLPurifier_HTMLDefinition.
+ * @param $def HTMLPurifier_ElementDef instance
+ * @return HTMLPurifier_ChildDef subclass
+ * @public
+ */
+ function getChildDef($def) {return false;}
+
+ /**
+ * Hook method that lets module perform arbitrary operations on
+ * HTMLPurifier_HTMLDefinition before the module gets processed.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function preProcess(&$definition) {}
+
+ /**
+ * Hook method that lets module perform arbitrary operations
+ * on HTMLPurifier_HTMLDefinition after the module gets processed.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function postProcess(&$definition) {}
+
+ /**
+ * Hook method that is called when a module gets registered to
+ * the definition.
+ * @param $definition Reference to HTMLDefinition being setup
+ */
+ function setup(&$definition) {}
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Bdo.php b/library/HTMLPurifier/HTMLModule/Bdo.php
new file mode 100644
index 00000000..17e5e987
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Bdo.php
@@ -0,0 +1,43 @@
+ 'bdo');
+ var $attr_collections = array(
+ 'I18N' => array('dir' => false)
+ );
+
+ function HTMLPurifier_HTMLModule_Bdo() {
+ $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
+ $this->attr_collections['I18N']['dir'] = $dir;
+ $this->info['bdo'] = new HTMLPurifier_ElementDef();
+ $this->info['bdo']->attr = array(
+ 0 => array('Core', 'Lang'),
+ 'dir' => $dir, // required
+ // The Abstract Module specification has the attribute
+ // inclusions wrong for bdo: bdo allows
+ // xml:lang too (and we'll toss in lang for good measure,
+ // though it is not allowed for XHTML 1.1, this will
+ // be managed with a global attribute transform)
+ );
+ $this->info['bdo']->content_model = '#PCDATA | Inline';
+ $this->info['bdo']->content_model_type = 'optional';
+ // provides fallback behavior if dir's missing (dir is required)
+ $this->info['bdo']->attr_transform_post['required-dir'] =
+ new HTMLPurifier_AttrTransform_BdoDir();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/library/HTMLPurifier/HTMLModule/CommonAttributes.php
new file mode 100644
index 00000000..8f17c2f0
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php
@@ -0,0 +1,31 @@
+ array(
+ 0 => array('Style'),
+ // 'xml:space' => false,
+ 'class' => 'NMTOKENS',
+ 'id' => 'ID',
+ 'title' => 'CDATA',
+ ),
+ 'Lang' => array(
+ 'xml:lang' => false, // see constructor
+ ),
+ 'I18N' => array(
+ 0 => array('Lang'), // proprietary, for xml:lang/lang
+ ),
+ 'Common' => array(
+ 0 => array('Core', 'I18N')
+ )
+ );
+
+ function HTMLPurifier_HTMLModule_CommonAttributes() {
+ $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Edit.php b/library/HTMLPurifier/HTMLModule/Edit.php
new file mode 100644
index 00000000..6a415906
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Edit.php
@@ -0,0 +1,46 @@
+ 'del | ins');
+
+ function HTMLPurifier_HTMLModule_Edit() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(
+ 0 => array('Common'),
+ 'cite' => 'URI',
+ // 'datetime' => 'Datetime' // Datetime not implemented
+ );
+ // Inline context ! Block context (exclamation mark is
+ // separator, see getChildDef for parsing)
+ $this->info[$element]->content_model =
+ '#PCDATA | Inline ! #PCDATA | Flow';
+ // HTML 4.01 specifies that ins/del must not contain block
+ // elements when used in an inline context, chameleon is
+ // a complicated workaround to acheive this effect
+ $this->info[$element]->content_model_type = 'chameleon';
+ }
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'chameleon') return false;
+ $value = explode('!', $def->content_model);
+ return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Hypertext.php b/library/HTMLPurifier/HTMLModule/Hypertext.php
new file mode 100644
index 00000000..0b8a2e98
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Hypertext.php
@@ -0,0 +1,36 @@
+ 'a');
+
+ function HTMLPurifier_HTMLModule_Hypertext() {
+ $this->info['a'] = new HTMLPurifier_ElementDef();
+ $this->info['a']->attr = array(
+ 0 => array('Common'),
+ // 'accesskey' => 'Character',
+ // 'charset' => 'Charset',
+ 'href' => 'URI',
+ //'hreflang' => 'LanguageCode',
+ //'rel' => 'LinkTypes',
+ //'rev' => 'LinkTypes',
+ //'tabindex' => 'Number',
+ //'type' => 'ContentType',
+ );
+ $this->info['a']->content_model = '#PCDATA | Inline';
+ $this->info['a']->content_model_type = 'optional';
+ $this->info['a']->excludes = array('a' => true);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Image.php b/library/HTMLPurifier/HTMLModule/Image.php
new file mode 100644
index 00000000..3852836d
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Image.php
@@ -0,0 +1,38 @@
+ 'img');
+
+ function HTMLPurifier_HTMLModule_Image() {
+ $this->info['img'] = new HTMLPurifier_ElementDef();
+ $this->info['img']->attr = array(
+ 0 => array('Common'),
+ 'alt' => 'Text',
+ 'height' => 'Length',
+ 'longdesc' => 'URI',
+ 'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
+ 'width' => 'Length'
+ );
+ $this->info['img']->content_model_type = 'empty';
+ $this->info['img']->attr_transform_post[] =
+ new HTMLPurifier_AttrTransform_ImgRequired();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Legacy.php b/library/HTMLPurifier/HTMLModule/Legacy.php
new file mode 100644
index 00000000..a0613a2f
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Legacy.php
@@ -0,0 +1,60 @@
+elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ // for u, s, strike, as more elements get added, add
+ // conditionals as necessary
+ $this->info[$name]->content_model = 'Inline | #PCDATA';
+ $this->info[$name]->content_model_type = 'optional';
+ $this->info[$name]->attr[0] = array('Common');
+ }
+
+ // setup modifications to old elements
+ foreach ($this->non_standalone_elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ $this->info[$name]->standalone = false;
+ }
+
+ $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+ $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+
+ $this->info['address']->content_model = 'Inline | #PCDATA | p';
+ $this->info['address']->content_model_type = 'optional';
+ $this->info['address']->child = false;
+
+ $this->info['blockquote']->content_model = 'Flow | #PCDATA';
+ $this->info['blockquote']->content_model_type = 'optional';
+ $this->info['blockquote']->child = false;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/List.php b/library/HTMLPurifier/HTMLModule/List.php
new file mode 100644
index 00000000..c74982df
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/List.php
@@ -0,0 +1,46 @@
+ 'dl | ol | ul', 'Flow' => 'List');
+
+ function HTMLPurifier_HTMLModule_List() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(0 => array('Common'));
+ if ($element == 'li' || $element == 'dd') {
+ $this->info[$element]->content_model = '#PCDATA | Flow';
+ $this->info[$element]->content_model_type = 'optional';
+ } elseif ($element == 'ol' || $element == 'ul') {
+ $this->info[$element]->content_model = 'li';
+ $this->info[$element]->content_model_type = 'required';
+ }
+ }
+ $this->info['dt']->content_model = '#PCDATA | Inline';
+ $this->info['dt']->content_model_type = 'optional';
+ $this->info['dl']->content_model = 'dt | dd';
+ $this->info['dl']->content_model_type = 'required';
+ // this could be a LOT more robust
+ $this->info['li']->auto_close = array('li' => true);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Presentation.php b/library/HTMLPurifier/HTMLModule/Presentation.php
new file mode 100644
index 00000000..42d9c11e
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Presentation.php
@@ -0,0 +1,41 @@
+ 'hr',
+ 'Inline' => 'b | big | i | small | sub | sup | tt'
+ );
+
+ function HTMLPurifier_HTMLModule_Presentation() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ $this->info[$element]->attr = array(0 => array('Common'));
+ if ($element == 'hr') {
+ $this->info[$element]->content_model_type = 'empty';
+ } else {
+ $this->info[$element]->content_model = '#PCDATA | Inline';
+ $this->info[$element]->content_model_type = 'optional';
+ }
+ }
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/StyleAttribute.php b/library/HTMLPurifier/HTMLModule/StyleAttribute.php
new file mode 100644
index 00000000..5ee5d1cf
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/StyleAttribute.php
@@ -0,0 +1,27 @@
+ array('style' => false), // see constructor
+ 'Core' => array(0 => array('Style'))
+ );
+
+ function HTMLPurifier_HTMLModule_StyleAttribute() {
+ $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Tables.php b/library/HTMLPurifier/HTMLModule/Tables.php
new file mode 100644
index 00000000..ea41f5b1
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Tables.php
@@ -0,0 +1,88 @@
+ 'table');
+
+ function HTMLPurifier_HTMLModule_Tables() {
+ foreach ($this->elements as $e) {
+ $this->info[$e] = new HTMLPurifier_ElementDef();
+ $this->info[$e]->attr = array(0 => array('Common'));
+ $attr =& $this->info[$e]->attr;
+ if ($e == 'caption') continue;
+ if ($e == 'table'){
+ $attr['border'] = 'Pixels';
+ $attr['cellpadding'] = 'Length';
+ $attr['cellspacing'] = 'Length';
+ $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
+ 'vsides', 'box', 'border'
+ ), false);
+ $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'none', 'groups', 'rows', 'cols', 'all'
+ ), false);
+ $attr['summary'] = 'Text';
+ $attr['width'] = 'Length';
+ continue;
+ }
+ if ($e == 'col' || $e == 'colgroup') {
+ $attr['span'] = 'Number';
+ $attr['width'] = 'MultiLength';
+ }
+ if ($e == 'td' || $e == 'th') {
+ $attr['abbr'] = 'Text';
+ $attr['colspan'] = 'Number';
+ $attr['rowspan'] = 'Number';
+ }
+ $attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'left', 'center', 'right', 'justify', 'char'
+ ), false);
+ $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
+ 'top', 'middle', 'bottom', 'baseline'
+ ), false);
+ $attr['charoff'] = 'Length';
+ }
+ $this->info['caption']->content_model = '#PCDATA | Inline';
+ $this->info['caption']->content_model_type = 'optional';
+
+ // Is done directly because it doesn't leverage substitution
+ // mechanisms. True model is:
+ // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
+ $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
+
+ $this->info['td']->content_model =
+ $this->info['th']->content_model = '#PCDATA | Flow';
+ $this->info['td']->content_model_type =
+ $this->info['th']->content_model_type = 'optional';
+
+ $this->info['tr']->content_model = 'td | th';
+ $this->info['tr']->content_model_type = 'required';
+
+ $this->info['col']->content_model_type = 'empty';
+
+ $this->info['colgroup']->content_model = 'col';
+ $this->info['colgroup']->content_model_type = 'optional';
+
+ $this->info['tbody']->content_model =
+ $this->info['thead']->content_model =
+ $this->info['tfoot']->content_model = 'tr';
+ $this->info['tbody']->content_model_type =
+ $this->info['thead']->content_model_type =
+ $this->info['tfoot']->content_model_type = 'required';
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php
new file mode 100644
index 00000000..56361a39
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/Text.php
@@ -0,0 +1,78 @@
+ 'h1 | h2 | h3 | h4 | h5 | h6',
+ 'Block' => 'address | blockquote | div | p | pre',
+ 'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
+ 'Flow' => 'Heading | Block | Inline'
+ );
+
+ function HTMLPurifier_HTMLModule_Text() {
+ foreach ($this->elements as $element) {
+ $this->info[$element] = new HTMLPurifier_ElementDef();
+ // attributes
+ if ($element == 'br') {
+ $this->info[$element]->attr = array(0 => array('Core'));
+ } elseif ($element == 'blockquote' || $element == 'q') {
+ $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
+ } else {
+ $this->info[$element]->attr = array(0 => array('Common'));
+ }
+ // content models
+ if ($element == 'br') {
+ $this->info[$element]->content_model_type = 'empty';
+ } elseif ($element == 'blockquote') {
+ $this->info[$element]->content_model = 'Heading | Block | List';
+ $this->info[$element]->content_model_type = 'optional';
+ } elseif ($element == 'div') {
+ $this->info[$element]->content_model = '#PCDATA | Flow';
+ $this->info[$element]->content_model_type = 'optional';
+ } else {
+ $this->info[$element]->content_model = '#PCDATA | Inline';
+ $this->info[$element]->content_model_type = 'optional';
+ }
+ }
+ // SGML permits exclusions for all descendants, but this is
+ // not possible with DTDs or XML Schemas. W3C has elected to
+ // use complicated compositions of content_models to simulate
+ // exclusion for children, but we go the simpler, SGML-style
+ // route of flat-out exclusions. Note that the Abstract Module
+ // is blithely unaware of such distinctions.
+ $this->info['pre']->excludes = array_flip(array(
+ 'img', 'big', 'small',
+ 'object', 'applet', 'font', 'basefont' // generally not allowed
+ ));
+ $this->info['p']->auto_close = array_flip(array(
+ 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
+ 'table', 'ul'
+ ));
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php
new file mode 100644
index 00000000..d228f84f
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php
@@ -0,0 +1,86 @@
+ false,
+ 'menu' => false,
+ 'dir' => false,
+ 'center'=> false
+ );
+
+ var $attr_collections = array(
+ 'Lang' => array(
+ 'lang' => false // placeholder
+ )
+ );
+
+ var $info_attr_transform_post = array(
+ 'lang' => false // placeholder
+ );
+
+ function HTMLPurifier_HTMLModule_TransformToStrict() {
+
+ // deprecated tag transforms
+ $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
+ $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+
+ foreach ($this->elements as $name) {
+ $this->info[$name] = new HTMLPurifier_ElementDef();
+ $this->info[$name]->standalone = false;
+ }
+
+ // deprecated attribute transforms
+ $this->info['h1']->attr_transform_pre['align'] =
+ $this->info['h2']->attr_transform_pre['align'] =
+ $this->info['h3']->attr_transform_pre['align'] =
+ $this->info['h4']->attr_transform_pre['align'] =
+ $this->info['h5']->attr_transform_pre['align'] =
+ $this->info['h6']->attr_transform_pre['align'] =
+ $this->info['p'] ->attr_transform_pre['align'] =
+ new HTMLPurifier_AttrTransform_TextAlign();
+
+ // xml:lang <=> lang mirroring, implement in TransformToStrict,
+ // this is overridden in TransformToXHTML11
+ $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
+ $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
+
+ // this should not be applied to XHTML 1.0 Transitional, ONLY
+ // XHTML 1.0 Strict. We may need three classes
+ $this->info['blockquote']->content_model_type = 'strictblockquote';
+ $this->info['blockquote']->child = false; // recalculate please!
+
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'strictblockquote') return false;
+ return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
new file mode 100644
index 00000000..0915f5b6
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
@@ -0,0 +1,30 @@
+ array(
+ 'lang' => false // remove it
+ )
+ );
+
+ var $info_attr_transform_post = array(
+ 'lang' => false // remove it
+ );
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php
new file mode 100644
index 00000000..e0090472
--- /dev/null
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@@ -0,0 +1,558 @@
+attrTypes = new HTMLPurifier_AttrTypes();
+
+ if (!$blank) $this->initialize();
+
+ }
+
+ function initialize() {
+ $this->initialized = true;
+
+ // load default modules to the recognized modules list (not active)
+ $modules = array(
+ // define
+ 'CommonAttributes',
+ 'Text', 'Hypertext', 'List', 'Presentation',
+ 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
+ // define-redefine
+ 'Legacy',
+ // redefine
+ 'TransformToStrict', 'TransformToXHTML11'
+ );
+ foreach ($modules as $module) {
+ $this->addModule($module);
+ }
+
+ // Safe modules for supported doctypes. These are included
+ // in the valid and active module lists by default
+ $this->collections['Safe'] = array(
+ '_Common' => array( // leading _ indicates private
+ 'CommonAttributes', 'Text', 'Hypertext', 'List',
+ 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+ 'StyleAttribute'
+ ),
+ // HTML definitions, defer to XHTML definitions
+ 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+ 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+ // XHTML definitions
+ 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
+ 'XHTML 1.0 Strict' => array(array('_Common')),
+ 'XHTML 1.1' => array(array('_Common')),
+ );
+
+ // Modules that specify elements that are unsafe from untrusted
+ // third-parties. These should be registered in $validModules but
+ // almost never $activeModules unless you really know what you're
+ // doing.
+ $this->collections['Unsafe'] = array();
+
+ // Modules to import if lenient mode (attempt to convert everything
+ // to a valid representation) is on. These must not be in $validModules
+ // unless specified so.
+ $this->collections['Lenient'] = array(
+ 'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+ 'XHTML 1.0 Strict' => array('TransformToStrict'),
+ 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
+ );
+
+ // Modules to import if correctional mode (correct everything that
+ // is feasible to strict mode) is on. These must not be in $validModules
+ // unless specified so.
+ $this->collections['Correctional'] = array(
+ 'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+ 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
+ );
+
+ // User-space modules, custom code or whatever
+ $this->collections['Extension'] = array();
+
+ // setup active versus valid modules. ORDER IS IMPORTANT!
+ // definition modules
+ $this->makeCollectionActive('Safe');
+ $this->makeCollectionValid('Unsafe');
+ // redefinition modules
+ $this->makeCollectionActive('Lenient');
+ $this->makeCollectionActive('Correctional');
+
+ $this->autoDoctype = '*';
+ $this->autoCollection = 'Extension';
+
+ }
+
+ /**
+ * Adds a module to the recognized module list. This does not
+ * do anything else: the module must be added to a corresponding
+ * collection to be "activated".
+ * @param $module Mixed: string module name, with or without
+ * HTMLPurifier_HTMLModule prefix, or instance of
+ * subclass of HTMLPurifier_HTMLModule.
+ */
+ function addModule($module) {
+ if (is_string($module)) {
+ $original_module = $module;
+ if (!class_exists($module)) {
+ foreach ($this->prefixes as $prefix) {
+ $module = $prefix . $original_module;
+ if (class_exists($module)) break;
+ }
+ }
+ if (!class_exists($module)) {
+ trigger_error($original_module . ' module does not exist',
+ E_USER_ERROR);
+ return;
+ }
+ $module = new $module();
+ }
+ $module->order = $this->counter++; // assign then increment
+ $this->modules[$module->name] = $module;
+ if ($this->autoDoctype !== false && $this->autoCollection !== false) {
+ $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
+ }
+ }
+
+ /**
+ * Makes a collection active, while also making it valid if not
+ * already done so. See $activeModules for the semantics of "active".
+ * @param $collection_name Name of collection to activate
+ */
+ function makeCollectionActive($collection_name) {
+ if (!in_array($collection_name, $this->validCollections)) {
+ $this->makeCollectionValid($collection_name);
+ }
+ $this->activeCollections[] = $collection_name;
+ }
+
+ /**
+ * Makes a collection valid. See $validModules for the semantics of "valid"
+ */
+ function makeCollectionValid($collection_name) {
+ $this->validCollections[] = $collection_name;
+ }
+
+ /**
+ * Adds a class prefix that addModule() will use to resolve a
+ * string name to a concrete class
+ */
+ function addPrefix($prefix) {
+ $this->prefixes[] = (string) $prefix;
+ }
+
+ function setup($config) {
+
+ // load up the autocollection
+ if ($this->autoCollection !== false) {
+ $this->makeCollectionActive($this->autoCollection);
+ }
+
+ // retrieve the doctype
+ $this->doctype = $this->getDoctype($config);
+ if (isset($this->doctypeAliases[$this->doctype])) {
+ $this->doctype = $this->doctypeAliases[$this->doctype];
+ }
+
+ // process module collections to module name => module instance form
+ foreach ($this->collections as $col_i => $x) {
+ $this->processCollections($this->collections[$col_i]);
+ }
+
+ $this->validModules = $this->assembleModules($this->validCollections);
+ $this->activeModules = $this->assembleModules($this->activeCollections);
+
+ // setup lookup table based on all valid modules
+ foreach ($this->validModules as $module) {
+ foreach ($module->info as $name => $def) {
+ if (!isset($this->elementLookup[$name])) {
+ $this->elementLookup[$name] = array();
+ }
+ $this->elementLookup[$name][] = $module->name;
+ }
+ }
+
+ // note the different choice
+ $this->contentSets = new HTMLPurifier_ContentSets(
+ // content models that contain non-allowed elements are
+ // harmless because RemoveForeignElements will ensure
+ // they never get in anyway, and there is usually no
+ // reason why you should want to restrict a content
+ // model beyond what is mandated by the doctype.
+ // Note, however, that this means redefinitions of
+ // content models can't be tossed in validModels willy-nilly:
+ // that stuff still is regulated by configuration.
+ $this->validModules
+ );
+ $this->attrCollections = new HTMLPurifier_AttrCollections(
+ $this->attrTypes,
+ // only explicitly allowed modules are allowed to affect
+ // the global attribute collections. This mean's there's
+ // a distinction between loading the Bdo module, and the
+ // bdo element: Bdo will enable the dir attribute on all
+ // elements, while bdo will only define the bdo element,
+ // which will not have an editable directionality. This might
+ // catch people who are loading only elements by surprise, so
+ // we should consider loading an entire module if all the
+ // elements it defines are requested by the user, especially
+ // if it affects the global attribute collections.
+ $this->activeModules
+ );
+
+ }
+
+ /**
+ * Takes a list of collections and merges together all the defined
+ * modules for the current doctype from those collections.
+ * @param $collections List of collection suffixes we should grab
+ * modules from (like 'Safe' or 'Lenient')
+ */
+ function assembleModules($collections) {
+ $modules = array();
+ $numOfCollectionsUsed = 0;
+ foreach ($collections as $name) {
+ $disable_global = false;
+ if (!isset($this->collections[$name])) {
+ trigger_error("$name collection is undefined", E_USER_ERROR);
+ continue;
+ }
+ $cols = $this->collections[$name];
+ if (isset($cols[$this->doctype])) {
+ if (isset($cols[$this->doctype]['*'])) {
+ unset($cols[$this->doctype]['*']);
+ $disable_global = true;
+ }
+ $modules += $cols[$this->doctype];
+ $numOfCollectionsUsed++;
+ }
+ // accept catch-all doctype
+ if (
+ $this->doctype !== '*' &&
+ isset($cols['*']) &&
+ !$disable_global
+ ) {
+ $modules += $cols['*'];
+ }
+ }
+
+ if ($numOfCollectionsUsed < 1) {
+ // possible XSS injection if user-specified doctypes
+ // are allowed
+ trigger_error("Doctype {$this->doctype} does not exist, ".
+ "check for typos (if you desire a doctype that allows ".
+ "no elements, use an empty array collection)", E_USER_ERROR);
+ }
+ return $modules;
+ }
+
+ /**
+ * Takes a collection and performs inclusions and substitutions for it.
+ * @param $cols Reference to collections class member variable
+ */
+ function processCollections(&$cols) {
+
+ // $cols is the set of collections
+ // $col_i is the name (index) of a collection
+ // $col is a collection/list of modules
+
+ // perform inclusions
+ foreach ($cols as $col_i => $col) {
+ $seen = array();
+ if (!empty($col[0]) && is_array($col[0])) {
+ $seen[$col_i] = true; // recursion reporting
+ $includes = $col[0];
+ unset($cols[$col_i][0]); // remove inclusions value, recursion guard
+ } else {
+ $includes = array();
+ }
+ if (empty($includes)) continue;
+ for ($i = 0; isset($includes[$i]); $i++) {
+ $inc = $includes[$i];
+ if (isset($seen[$inc])) {
+ trigger_error(
+ "Circular inclusion detected in $col_i collection",
+ E_USER_ERROR
+ );
+ continue;
+ } else {
+ $seen[$inc] = true;
+ }
+ if (!isset($cols[$inc])) {
+ trigger_error(
+ "Collection $col_i tried to include undefined ".
+ "collection $inc", E_USER_ERROR);
+ continue;
+ }
+ foreach ($cols[$inc] as $module) {
+ if (is_array($module)) { // another inclusion!
+ foreach ($module as $inc2) $includes[] = $inc2;
+ continue;
+ }
+ $cols[$col_i][] = $module; // merge in the other modules
+ }
+ }
+ }
+
+ // replace with real modules, invert module from list to
+ // assoc array of module name to module instance
+ foreach ($cols as $col_i => $col) {
+ $ignore_global = false;
+ $order = array();
+ foreach ($col as $module_i => $module) {
+ unset($cols[$col_i][$module_i]);
+ if (is_array($module)) {
+ trigger_error("Illegal inclusion array at index".
+ " $module_i found collection $col_i, inclusion".
+ " arrays must be at start of collection (index 0)",
+ E_USER_ERROR);
+ continue;
+ }
+ if ($module_i === '*' && $module === false) {
+ $ignore_global = true;
+ continue;
+ }
+ if (!isset($this->modules[$module])) {
+ trigger_error(
+ "Collection $col_i references undefined ".
+ "module $module",
+ E_USER_ERROR
+ );
+ continue;
+ }
+ $module = $this->modules[$module];
+ $cols[$col_i][$module->name] = $module;
+ $order[$module->name] = $module->order;
+ }
+ array_multisort(
+ $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
+ );
+ if ($ignore_global) $cols[$col_i]['*'] = false;
+ }
+
+ // delete pseudo-collections
+ foreach ($cols as $col_i => $col) {
+ if ($col_i[0] == '_') unset($cols[$col_i]);
+ }
+
+ }
+
+ /**
+ * Retrieves the doctype from the configuration object
+ */
+ function getDoctype($config) {
+ $doctype = $config->get('HTML', 'Doctype');
+ if ($doctype !== null) {
+ return $doctype;
+ }
+ if (!$this->initialized) {
+ // don't do HTML-oriented backwards compatibility stuff
+ // use either the auto-doctype, or the catch-all doctype
+ return $this->autoDoctype ? $this->autoDoctype : '*';
+ }
+ // this is backwards-compatibility stuff
+ if ($config->get('Core', 'XHTML')) {
+ $doctype = 'XHTML 1.0';
+ } else {
+ $doctype = 'HTML 4.01';
+ }
+ if ($config->get('HTML', 'Strict')) {
+ $doctype .= ' Strict';
+ } else {
+ $doctype .= ' Transitional';
+ }
+ return $doctype;
+ }
+
+ /**
+ * Retrieves merged element definitions for all active elements.
+ * @note We may want to generate an elements array during setup
+ * and pass that on, because a specific combination of
+ * elements may trigger the loading of a module.
+ * @param $config Instance of HTMLPurifier_Config, for determining
+ * stray elements.
+ */
+ function getElements($config) {
+
+ $elements = array();
+ foreach ($this->activeModules as $module) {
+ foreach ($module->elements as $name) {
+ $elements[$name] = $this->getElement($name, $config);
+ }
+ }
+
+ // standalone elements now loaded
+
+ return $elements;
+
+ }
+
+ /**
+ * Retrieves a single merged element definition
+ * @param $name Name of element
+ * @param $config Instance of HTMLPurifier_Config, may not be necessary.
+ */
+ function getElement($name, $config) {
+
+ $def = false;
+
+ $modules = $this->validModules;
+
+ if (!isset($this->elementLookup[$name])) {
+ return false;
+ }
+
+ foreach($this->elementLookup[$name] as $module_name) {
+
+ $module = $modules[$module_name];
+ $new_def = $module->info[$name];
+
+ if (!$def && $new_def->standalone) {
+ $def = $new_def;
+ } elseif ($def) {
+ $def->mergeIn($new_def);
+ } else {
+ // could "save it for another day":
+ // non-standalone definitions that don't have a standalone
+ // to merge into could be deferred to the end
+ continue;
+ }
+
+ // attribute value expansions
+ $this->attrCollections->performInclusions($def->attr);
+ $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
+
+ // descendants_are_inline, for ChildDef_Chameleon
+ if (is_string($def->content_model) &&
+ strpos($def->content_model, 'Inline') !== false) {
+ if ($name != 'del' && $name != 'ins') {
+ // this is for you, ins/del
+ $def->descendants_are_inline = true;
+ }
+ }
+
+ $this->contentSets->generateChildDef($def, $module);
+ }
+
+ return $def;
+
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php
new file mode 100644
index 00000000..ca6fe031
--- /dev/null
+++ b/library/HTMLPurifier/Language.php
@@ -0,0 +1,56 @@
+_loaded) return;
+ $factory = HTMLPurifier_LanguageFactory::instance();
+ $factory->loadLanguage($this->code);
+ foreach ($factory->keys as $key) {
+ $this->$key = $factory->cache[$this->code][$key];
+ }
+ $this->_loaded = true;
+ }
+
+ /**
+ * Retrieves a localised message. Does not perform any operations.
+ * @param $key string identifier of message
+ * @return string localised message
+ */
+ function getMessage($key) {
+ if (!$this->_loaded) $this->load();
+ if (!isset($this->messages[$key])) return '';
+ return $this->messages[$key];
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/Language/classes/en-x-test.php b/library/HTMLPurifier/Language/classes/en-x-test.php
new file mode 100644
index 00000000..303ba4ba
--- /dev/null
+++ b/library/HTMLPurifier/Language/classes/en-x-test.php
@@ -0,0 +1,12 @@
+
\ No newline at end of file
diff --git a/library/HTMLPurifier/Language/messages/en-x-test.php b/library/HTMLPurifier/Language/messages/en-x-test.php
new file mode 100644
index 00000000..115662bd
--- /dev/null
+++ b/library/HTMLPurifier/Language/messages/en-x-test.php
@@ -0,0 +1,11 @@
+ 'HTML Purifier X'
+);
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php
new file mode 100644
index 00000000..7650b818
--- /dev/null
+++ b/library/HTMLPurifier/Language/messages/en.php
@@ -0,0 +1,12 @@
+ 'HTML Purifier',
+'pizza' => 'Pizza', // for unit testing purposes
+
+);
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php
new file mode 100644
index 00000000..5cdf1281
--- /dev/null
+++ b/library/HTMLPurifier/LanguageFactory.php
@@ -0,0 +1,196 @@
+cache[$language_code][$key] = $value
+ * @value array map
+ */
+ var $cache;
+
+ /**
+ * Valid keys in the HTMLPurifier_Language object. Designates which
+ * variables to slurp out of a message file.
+ * @value array list
+ */
+ var $keys = array('fallback', 'messages');
+
+ /**
+ * Instance of HTMLPurifier_AttrDef_Lang to validate language codes
+ * @value object HTMLPurifier_AttrDef_Lang
+ */
+ var $validator;
+
+ /**
+ * Cached copy of dirname(__FILE__), directory of current file without
+ * trailing slash
+ * @value string filename
+ */
+ var $dir;
+
+ /**
+ * Keys whose contents are a hash map and can be merged
+ * @value array lookup
+ */
+ var $mergeable_keys_map = array('messages' => true);
+
+ /**
+ * Keys whose contents are a list and can be merged
+ * @value array lookup
+ */
+ var $mergeable_keys_list = array();
+
+ /**
+ * Retrieve sole instance of the factory.
+ * @static
+ * @param $prototype Optional prototype to overload sole instance with,
+ * or bool true to reset to default factory.
+ */
+ static function &instance($prototype = null) {
+ static $instance = null;
+ if ($prototype !== null) {
+ $instance = $prototype;
+ } elseif ($instance === null || $prototype == true) {
+ $instance = new HTMLPurifier_LanguageFactory();
+ $instance->setup();
+ }
+ return $instance;
+ }
+
+ /**
+ * Sets up the singleton, much like a constructor
+ * @note Prevents people from getting this outside of the singleton
+ */
+ function setup() {
+ $this->validator = new HTMLPurifier_AttrDef_Lang();
+ $this->dir = dirname(__FILE__);
+ }
+
+ /**
+ * Creates a language object, handles class fallbacks
+ * @param $code string language code
+ */
+ function create($code) {
+
+ $config = $context = false; // hope it doesn't use these!
+ $code = $this->validator->validate($code, $config, $context);
+ if ($code === false) $code = 'en'; // malformed code becomes English
+
+ $pcode = str_replace('-', '_', $code); // make valid PHP classname
+ static $depth = 0; // recursion protection
+
+ if ($code == 'en') {
+ $class = 'HTMLPurifier_Language';
+ $file = $this->dir . '/Language.php';
+ } else {
+ $class = 'HTMLPurifier_Language_' . $pcode;
+ $file = $this->dir . '/Language/classes/' . $code . '.php';
+ // PHP5/APC deps bug workaround can go here
+ // you can bypass the conditional include by loading the
+ // file yourself
+ if (file_exists($file) && !class_exists($class)) {
+ include_once $file;
+ }
+ }
+
+ if (!class_exists($class)) {
+ // go fallback
+ $fallback = HTMLPurifier_Language::getFallbackFor($code);
+ $depth++;
+ $lang = Language::factory( $fallback );
+ $depth--;
+ } else {
+ $lang = new $class;
+ }
+ $lang->code = $code;
+
+ return $lang;
+
+ }
+
+ /**
+ * Returns the fallback language for language
+ * @note Loads the original language into cache
+ * @param $code string language code
+ */
+ function getFallbackFor($code) {
+ $this->loadLanguage($code);
+ return $this->cache[$code]['fallback'];
+ }
+
+ /**
+ * Loads language into the cache, handles message file and fallbacks
+ * @param $code string language code
+ */
+ function loadLanguage($code) {
+ static $languages_seen = array(); // recursion guard
+
+ // abort if we've already loaded it
+ if (isset($this->cache[$code])) return;
+
+ // generate filename
+ $filename = $this->dir . '/Language/messages/' . $code . '.php';
+
+ // default fallback : may be overwritten by the ensuing include
+ $fallback = ($code != 'en') ? 'en' : false;
+
+ // load primary localisation
+ if (!file_exists($filename)) {
+ // skip the include: will rely solely on fallback
+ $filename = $this->dir . '/Language/messages/en.php';
+ $cache = array();
+ } else {
+ include $filename;
+ $cache = compact($this->keys);
+ }
+
+ // load fallback localisation
+ if (!empty($fallback)) {
+
+ // infinite recursion guard
+ if (isset($languages_seen[$code])) {
+ trigger_error('Circular fallback reference in language ' .
+ $code, E_USER_ERROR);
+ $fallback = 'en';
+ }
+ $language_seen[$code] = true;
+
+ // load the fallback recursively
+ $this->loadLanguage($fallback);
+ $fallback_cache = $this->cache[$fallback];
+
+ // merge fallback with current language
+ foreach ( $this->keys as $key ) {
+ if (isset($cache[$key]) && isset($fallback_cache[$key])) {
+ if (isset($this->mergeable_keys_map[$key])) {
+ $cache[$key] = $cache[$key] + $fallback_cache[$key];
+ } elseif (isset($this->mergeable_keys_list[$key])) {
+ $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
+ }
+ } else {
+ $cache[$key] = $fallback_cache[$key];
+ }
+ }
+
+ }
+
+ // save to cache for later retrieval
+ $this->cache[$code] = $cache;
+
+ return;
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index ca5a5328..975fb65f 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -151,7 +151,8 @@ class HTMLPurifier_Lexer
$lexer = $prototype;
}
if (empty($lexer)) {
- if (class_exists('DOMDocument')) { // check for DOM support
+ if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
+ class_exists('DOMDocument')) { // check for DOM support
require_once 'HTMLPurifier/Lexer/DOMLex.php';
$lexer = new HTMLPurifier_Lexer_DOMLex();
} else {
diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index dcf3caee..9286b023 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -21,7 +21,7 @@ require_once 'HTMLPurifier/TokenFactory.php';
*
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
* If this is a huge problem, due to the fact that HTML is hand
- * edited and youa re unable to get a parser cache that caches the
+ * edited and you are unable to get a parser cache that caches the
* the output of HTML Purifier while keeping the original HTML lying
* around, you may want to run Tidy on the resulting output or use
* HTMLPurifier_DirectLex
@@ -54,7 +54,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$doc = new DOMDocument();
$doc->encoding = 'UTF-8'; // technically does nothing, but whatever
- @$doc->loadHTML($string); // mute all errors, handle it transparently
+
+ // DOM will toss errors if the HTML its parsing has really big
+ // problems, so we're going to mute them. This can cause problems
+ // if a custom error handler that doesn't implement error_reporting
+ // is set, as noted by a Drupal plugin of HTML Purifier. Consider
+ // making our own error reporter to temporarily load in
+ @$doc->loadHTML($string);
$tokens = array();
$this->tokenizeDOM(
diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php
index 2ec297e7..a677c58b 100644
--- a/library/HTMLPurifier/Printer/HTMLDefinition.php
+++ b/library/HTMLPurifier/Printer/HTMLDefinition.php
@@ -13,6 +13,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
function render($config) {
$ret = '';
$this->config =& $config;
+
$this->def = $config->getHTMLDefinition();
$def =& $this->def;
@@ -21,16 +22,14 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->element('caption', 'Environment');
$ret .= $this->row('Parent of fragment', $def->info_parent);
- $ret .= $this->row('Strict mode', $def->strict);
- if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
+ $ret .= $this->renderChildren($def->info_parent_def->child);
+ $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Global attributes');
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
$ret .= $this->end('tr');
- $ret .= $this->renderChildren($def->info_parent_def->child);
-
$ret .= $this->start('tr');
$ret .= $this->element('th', 'Tag transforms');
$list = array();
@@ -81,8 +80,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Type');
- $ret .= $this->element('td', ucfirst($def->type));
+ $ret .= $this->element('th', 'Inline content');
+ $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
$ret .= $this->end('tr');
if (!empty($def->excludes)) {
$ret .= $this->start('tr');
@@ -130,15 +129,17 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$elements = array();
$attr = array();
if (isset($def->elements)) {
- if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
+ if ($def->type == 'strictblockquote') {
+ $def->validateChildren(array(), $this->config, $context);
+ }
$elements = $def->elements;
} elseif ($def->type == 'chameleon') {
$attr['rowspan'] = 2;
} elseif ($def->type == 'empty') {
$elements = array();
} elseif ($def->type == 'table') {
- $elements = array('col', 'caption', 'colgroup', 'thead',
- 'tfoot', 'tbody', 'tr');
+ $elements = array_flip(array('col', 'caption', 'colgroup', 'thead',
+ 'tfoot', 'tbody', 'tr'));
}
$ret .= $this->element('th', 'Allowed children', $attr);
@@ -167,6 +168,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
* @param $array Tag lookup array in form of array('tagname' => true)
*/
function listifyTagLookup($array) {
+ ksort($array);
$list = array();
foreach ($array as $name => $discard) {
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
@@ -181,6 +183,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
* @todo Also add information about internal state
*/
function listifyObjectList($array) {
+ ksort($array);
$list = array();
foreach ($array as $discard => $obj) {
$list[] = $this->getClass($obj, 'AttrTransform_');
@@ -193,6 +196,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
*/
function listifyAttr($array) {
+ ksort($array);
$list = array();
foreach ($array as $name => $obj) {
if ($obj === false) continue;
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index dd5a920f..08f90756 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
$tokens[] = new HTMLPurifier_Token_End($parent_name);
// setup the context variables
- $parent_type = 'unknown'; // reference var that we alter
- $context->register('ParentType', $parent_type);
+ $is_inline = false; // reference var that we alter
+ $context->register('IsInline', $is_inline);
//####################################################################//
// Loop initialization
@@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
}
// calculate context
- if (isset($parent_def)) {
- $parent_type = $parent_def->type;
+ if ($is_inline === false) {
+ // check if conditions make it inline
+ if (!empty($parent_def) && $parent_def->descendants_are_inline) {
+ $is_inline = $count - 1;
+ }
} else {
- // generally found in specialized elements like UL
- $parent_type = 'unknown';
+ // check if we're out of inline
+ if ($count === $is_inline) {
+ $is_inline = false;
+ }
}
//################################################################//
@@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_pop($tokens);
// remove context variables
- $context->destroy('ParentType');
+ $context->destroy('IsInline');
//####################################################################//
// Return
diff --git a/library/HTMLPurifier/TagTransform.php b/library/HTMLPurifier/TagTransform.php
index be0555a0..f5dc5c97 100644
--- a/library/HTMLPurifier/TagTransform.php
+++ b/library/HTMLPurifier/TagTransform.php
@@ -1,6 +1,6 @@
transform_to = $transform_to;
- }
-
- function transform($tag, $config, &$context) {
- $new_tag = $tag->copy();
- $new_tag->name = $this->transform_to;
- return $new_tag;
- }
-
-}
-
-/**
- * Transforms CENTER tags into proper version (DIV with text-align CSS)
- *
- * Takes a CENTER tag, parses the align attribute, and then if it's valid
- * assigns it to the CSS property text-align.
- */
-class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
-{
- var $transform_to = 'div';
-
- function transform($tag, $config, &$context) {
- if ($tag->type == 'end') {
- $new_tag = new HTMLPurifier_Token_End($this->transform_to);
- return $new_tag;
- }
- $attr = $tag->attr;
- $prepend_css = 'text-align:center;';
- if (isset($attr['style'])) {
- $attr['style'] = $prepend_css . $attr['style'];
- } else {
- $attr['style'] = $prepend_css;
- }
- $new_tag = $tag->copy();
- $new_tag->name = $this->transform_to;
- $new_tag->attr = $attr;
- return $new_tag;
- }
-}
-
-/**
- * Transforms FONT tags to the proper form (SPAN with CSS styling)
- *
- * This transformation takes the three proprietary attributes of FONT and
- * transforms them into their corresponding CSS attributes. These are color,
- * face, and size.
- *
- * @note Size is an interesting case because it doesn't map cleanly to CSS.
- * Thanks to
- * http://style.cleverchimp.com/font_size_intervals/altintervals.html
- * for reasonable mappings.
- */
-class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
-{
-
- var $transform_to = 'span';
-
- var $_size_lookup = array(
- '1' => 'xx-small',
- '2' => 'small',
- '3' => 'medium',
- '4' => 'large',
- '5' => 'x-large',
- '6' => 'xx-large',
- '7' => '300%',
- '-1' => 'smaller',
- '+1' => 'larger',
- '-2' => '60%',
- '+2' => '150%',
- '+4' => '300%'
- );
-
- function transform($tag, $config, &$context) {
-
- if ($tag->type == 'end') {
- $new_tag = new HTMLPurifier_Token_End($this->transform_to);
- return $new_tag;
- }
-
- $attr = $tag->attr;
- $prepend_style = '';
-
- // handle color transform
- if (isset($attr['color'])) {
- $prepend_style .= 'color:' . $attr['color'] . ';';
- unset($attr['color']);
- }
-
- // handle face transform
- if (isset($attr['face'])) {
- $prepend_style .= 'font-family:' . $attr['face'] . ';';
- unset($attr['face']);
- }
-
- // handle size transform
- if (isset($attr['size'])) {
- if (isset($this->_size_lookup[$attr['size']])) {
- $prepend_style .= 'font-size:' .
- $this->_size_lookup[$attr['size']] . ';';
- }
- unset($attr['size']);
- }
-
- if ($prepend_style) {
- $attr['style'] = isset($attr['style']) ?
- $prepend_style . $attr['style'] :
- $prepend_style;
- }
-
- $new_tag = $tag->copy();
- $new_tag->name = $this->transform_to;
- $new_tag->attr = $attr;
-
- return $new_tag;
-
- }
-}
-
?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/TagTransform/Center.php b/library/HTMLPurifier/TagTransform/Center.php
new file mode 100644
index 00000000..571bb9df
--- /dev/null
+++ b/library/HTMLPurifier/TagTransform/Center.php
@@ -0,0 +1,34 @@
+type == 'end') {
+ $new_tag = new HTMLPurifier_Token_End($this->transform_to);
+ return $new_tag;
+ }
+ $attr = $tag->attr;
+ $prepend_css = 'text-align:center;';
+ if (isset($attr['style'])) {
+ $attr['style'] = $prepend_css . $attr['style'];
+ } else {
+ $attr['style'] = $prepend_css;
+ }
+ $new_tag = $tag->copy();
+ $new_tag->name = $this->transform_to;
+ $new_tag->attr = $attr;
+ return $new_tag;
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/TagTransform/Font.php b/library/HTMLPurifier/TagTransform/Font.php
new file mode 100644
index 00000000..ae6d7838
--- /dev/null
+++ b/library/HTMLPurifier/TagTransform/Font.php
@@ -0,0 +1,83 @@
+ 'xx-small',
+ '2' => 'small',
+ '3' => 'medium',
+ '4' => 'large',
+ '5' => 'x-large',
+ '6' => 'xx-large',
+ '7' => '300%',
+ '-1' => 'smaller',
+ '+1' => 'larger',
+ '-2' => '60%',
+ '+2' => '150%',
+ '+4' => '300%'
+ );
+
+ function transform($tag, $config, &$context) {
+
+ if ($tag->type == 'end') {
+ $new_tag = new HTMLPurifier_Token_End($this->transform_to);
+ return $new_tag;
+ }
+
+ $attr = $tag->attr;
+ $prepend_style = '';
+
+ // handle color transform
+ if (isset($attr['color'])) {
+ $prepend_style .= 'color:' . $attr['color'] . ';';
+ unset($attr['color']);
+ }
+
+ // handle face transform
+ if (isset($attr['face'])) {
+ $prepend_style .= 'font-family:' . $attr['face'] . ';';
+ unset($attr['face']);
+ }
+
+ // handle size transform
+ if (isset($attr['size'])) {
+ if (isset($this->_size_lookup[$attr['size']])) {
+ $prepend_style .= 'font-size:' .
+ $this->_size_lookup[$attr['size']] . ';';
+ }
+ unset($attr['size']);
+ }
+
+ if ($prepend_style) {
+ $attr['style'] = isset($attr['style']) ?
+ $prepend_style . $attr['style'] :
+ $prepend_style;
+ }
+
+ $new_tag = $tag->copy();
+ $new_tag->name = $this->transform_to;
+ $new_tag->attr = $attr;
+
+ return $new_tag;
+
+ }
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/TagTransform/Simple.php b/library/HTMLPurifier/TagTransform/Simple.php
new file mode 100644
index 00000000..6ffd0eab
--- /dev/null
+++ b/library/HTMLPurifier/TagTransform/Simple.php
@@ -0,0 +1,26 @@
+transform_to = $transform_to;
+ }
+
+ function transform($tag, $config, &$context) {
+ $new_tag = $tag->copy();
+ $new_tag->name = $this->transform_to;
+ return $new_tag;
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/library/HTMLPurifier/URISchemeRegistry.php b/library/HTMLPurifier/URISchemeRegistry.php
index 1d529ba5..1ece1a2a 100644
--- a/library/HTMLPurifier/URISchemeRegistry.php
+++ b/library/HTMLPurifier/URISchemeRegistry.php
@@ -10,7 +10,7 @@ HTMLPurifier_ConfigSchema::define(
'irc' => true, // "Internet Relay Chat", usually needs another app
// for Usenet, these two are similar, but distinct
'nntp' => true, // individual Netnews articles
- 'news' => true // newsgroup or individual Netnews articles),
+ 'news' => true // newsgroup or individual Netnews articles
), 'lookup',
'Whitelist that defines the schemes that a URI is allowed to have. This '.
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
diff --git a/smoketests/printDefinition.php b/smoketests/printDefinition.php
index a616f0d5..8e02d47d 100644
--- a/smoketests/printDefinition.php
+++ b/smoketests/printDefinition.php
@@ -22,6 +22,17 @@ foreach ($_GET as $key => $value) {
@$config->loadArray($get);
+/* // sample local definition, obviously needs to be less clunky
+$html_definition =& $config->getHTMLDefinition(true);
+$module = new HTMLPurifier_HTMLModule();
+$module->name = 'Marquee';
+$module->info['marquee'] = new HTMLPurifier_ElementDef();
+$module->info['marquee']->content_model = '#PCDATA | Inline';
+$module->info['marquee']->content_model_type = 'optional';
+$module->content_sets = array('Inline' => 'marquee');
+$html_definition->manager->addModule($module);
+*/
+
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
diff --git a/tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php b/tests/HTMLPurifier/AttrDef/CSS/BackgroundPositionTest.php
similarity index 90%
rename from tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/BackgroundPositionTest.php
index ce720841..911823f4 100644
--- a/tests/HTMLPurifier/AttrDef/BackgroundPositionTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BackgroundPositionTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_BackgroundPosition();
+ $this->def = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
// explicitly cited in spec
$this->assertDef('0% 0%');
diff --git a/tests/HTMLPurifier/AttrDef/BackgroundTest.php b/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php
similarity index 52%
rename from tests/HTMLPurifier/AttrDef/BackgroundTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php
index 69b3c1ba..d4db8493 100644
--- a/tests/HTMLPurifier/AttrDef/BackgroundTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php
@@ -1,14 +1,15 @@
def = new HTMLPurifier_AttrDef_Background(HTMLPurifier_Config::createDefault());
+ $config = HTMLPurifier_Config::createDefault();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
$valid = '#333 url(chess.png) repeat fixed 50% top';
$this->assertDef($valid);
diff --git a/tests/HTMLPurifier/AttrDef/BorderTest.php b/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php
similarity index 50%
rename from tests/HTMLPurifier/AttrDef/BorderTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/BorderTest.php
index b18bfe70..521588db 100644
--- a/tests/HTMLPurifier/AttrDef/BorderTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_Border(HTMLPurifier_Config::createDefault());
+ $config = HTMLPurifier_Config::createDefault();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->assertDef('thick solid red', 'thick solid #F00');
$this->assertDef('thick solid');
diff --git a/tests/HTMLPurifier/AttrDef/ColorTest.php b/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php
similarity index 83%
rename from tests/HTMLPurifier/AttrDef/ColorTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/ColorTest.php
index b44082c0..1c29ae68 100644
--- a/tests/HTMLPurifier/AttrDef/ColorTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_Color();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Color();
$this->assertDef('#F00');
$this->assertDef('#808080');
diff --git a/tests/HTMLPurifier/AttrDef/CompositeTest.php b/tests/HTMLPurifier/AttrDef/CSS/CompositeTest.php
similarity index 83%
rename from tests/HTMLPurifier/AttrDef/CompositeTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/CompositeTest.php
index 8ea7b5e8..3ec60e7d 100644
--- a/tests/HTMLPurifier/AttrDef/CompositeTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/CompositeTest.php
@@ -1,20 +1,20 @@
defs =& $defs;
}
}
-class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
+class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
{
var $def1, $def2;
@@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
$def1 = new HTMLPurifier_AttrDefMock($this);
$def2 = new HTMLPurifier_AttrDefMock($this);
$defs = array(&$def1, &$def2);
- $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+ $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
$input = 'FOOBAR';
$output = 'foobar';
$def1_params = array($input, $config, $context);
@@ -51,7 +51,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
$def1 = new HTMLPurifier_AttrDefMock($this);
$def2 = new HTMLPurifier_AttrDefMock($this);
$defs = array(&$def1, &$def2);
- $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+ $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
$input = 'BOOMA';
$output = 'booma';
$def_params = array($input, $config, $context);
@@ -71,7 +71,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
$def1 = new HTMLPurifier_AttrDefMock($this);
$def2 = new HTMLPurifier_AttrDefMock($this);
$defs = array(&$def1, &$def2);
- $def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
+ $def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
$input = 'BOOMA';
$output = false;
$def_params = array($input, $config, $context);
diff --git a/tests/HTMLPurifier/AttrDef/FontFamilyTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
similarity index 69%
rename from tests/HTMLPurifier/AttrDef/FontFamilyTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
index 47c0e779..a802d45f 100644
--- a/tests/HTMLPurifier/AttrDef/FontFamilyTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_FontFamily();
+ $this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
$this->assertDef('Gill, Helvetica, sans-serif');
$this->assertDef('\'Times New Roman\', serif');
diff --git a/tests/HTMLPurifier/AttrDef/FontTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontTest.php
similarity index 79%
rename from tests/HTMLPurifier/AttrDef/FontTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/FontTest.php
index 49b3652c..6bcb4fe2 100644
--- a/tests/HTMLPurifier/AttrDef/FontTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/FontTest.php
@@ -1,14 +1,15 @@
def = new HTMLPurifier_AttrDef_Font(HTMLPurifier_Config::createDefault());
+ $config = HTMLPurifier_Config::createDefault();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Font($config);
// hodgepodge of usage cases from W3C spec, but " -> '
$this->assertDef('12px/14px sans-serif');
diff --git a/tests/HTMLPurifier/AttrDef/CSSLengthTest.php b/tests/HTMLPurifier/AttrDef/CSS/LengthTest.php
similarity index 75%
rename from tests/HTMLPurifier/AttrDef/CSSLengthTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/LengthTest.php
index fabea20f..56129af2 100644
--- a/tests/HTMLPurifier/AttrDef/CSSLengthTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/LengthTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_CSSLength();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Length();
$this->assertDef('0');
$this->assertDef('0px');
@@ -31,7 +31,7 @@ class HTMLPurifier_AttrDef_CSSLengthTest extends HTMLPurifier_AttrDefHarness
function testNonNegative() {
- $this->def = new HTMLPurifier_AttrDef_CSSLength(true);
+ $this->def = new HTMLPurifier_AttrDef_CSS_Length(true);
$this->assertDef('3cm');
$this->assertDef('-3mm', false);
diff --git a/tests/HTMLPurifier/AttrDef/ListStyleTest.php b/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php
similarity index 77%
rename from tests/HTMLPurifier/AttrDef/ListStyleTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php
index 95ef9444..6863c489 100644
--- a/tests/HTMLPurifier/AttrDef/ListStyleTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php
@@ -1,14 +1,15 @@
def = new HTMLPurifier_AttrDef_ListStyle(HTMLPurifier_Config::createDefault());
+ $config = HTMLPurifier_Config::createDefault();
+ $this->def = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->assertDef('lower-alpha');
$this->assertDef('upper-roman inside');
diff --git a/tests/HTMLPurifier/AttrDef/MultipleTest.php b/tests/HTMLPurifier/AttrDef/CSS/MultipleTest.php
similarity index 77%
rename from tests/HTMLPurifier/AttrDef/MultipleTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/MultipleTest.php
index 8c102b39..075c56ad 100644
--- a/tests/HTMLPurifier/AttrDef/MultipleTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/MultipleTest.php
@@ -1,16 +1,16 @@
def = new HTMLPurifier_AttrDef_Multiple(
+ $this->def = new HTMLPurifier_AttrDef_CSS_Multiple(
new HTMLPurifier_AttrDef_Integer()
);
diff --git a/tests/HTMLPurifier/AttrDef/NumberTest.php b/tests/HTMLPurifier/AttrDef/CSS/NumberTest.php
similarity index 73%
rename from tests/HTMLPurifier/AttrDef/NumberTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/NumberTest.php
index 4ddea5e6..f8f714f6 100644
--- a/tests/HTMLPurifier/AttrDef/NumberTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/NumberTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_Number();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Number();
$this->assertDef('0');
$this->assertDef('34');
@@ -29,7 +29,7 @@ class HTMLPurifier_AttrDef_NumberTest extends HTMLPurifier_AttrDefHarness
function testNonNegative() {
- $this->def = new HTMLPurifier_AttrDef_Number(true);
+ $this->def = new HTMLPurifier_AttrDef_CSS_Number(true);
$this->assertDef('23');
$this->assertDef('-12', false);
diff --git a/tests/HTMLPurifier/AttrDef/PercentageTest.php b/tests/HTMLPurifier/AttrDef/CSS/PercentageTest.php
similarity index 66%
rename from tests/HTMLPurifier/AttrDef/PercentageTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/PercentageTest.php
index 6694296c..2aa0d401 100644
--- a/tests/HTMLPurifier/AttrDef/PercentageTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/PercentageTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_Percentage();
+ $this->def = new HTMLPurifier_AttrDef_CSS_Percentage();
$this->assertDef('10%');
$this->assertDef('1.607%');
diff --git a/tests/HTMLPurifier/AttrDef/TextDecorationTest.php b/tests/HTMLPurifier/AttrDef/CSS/TextDecorationTest.php
similarity index 72%
rename from tests/HTMLPurifier/AttrDef/TextDecorationTest.php
rename to tests/HTMLPurifier/AttrDef/CSS/TextDecorationTest.php
index f633177f..e5f3e0c7 100644
--- a/tests/HTMLPurifier/AttrDef/TextDecorationTest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/TextDecorationTest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_TextDecoration();
+ $this->def = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->assertDef('underline');
$this->assertDef('overline');
diff --git a/tests/HTMLPurifier/AttrDef/CSSURITest.php b/tests/HTMLPurifier/AttrDef/CSS/URITest.php
similarity index 83%
rename from tests/HTMLPurifier/AttrDef/CSSURITest.php
rename to tests/HTMLPurifier/AttrDef/CSS/URITest.php
index 1fe1a3dc..2a238d22 100644
--- a/tests/HTMLPurifier/AttrDef/CSSURITest.php
+++ b/tests/HTMLPurifier/AttrDef/CSS/URITest.php
@@ -1,14 +1,14 @@
def = new HTMLPurifier_AttrDef_CSSURI();
+ $this->def = new HTMLPurifier_AttrDef_CSS_URI();
$this->assertDef('', false);
diff --git a/tests/HTMLPurifier/AttrDef/Email/SimpleCheckTest.php b/tests/HTMLPurifier/AttrDef/Email/SimpleCheckTest.php
deleted file mode 100644
index 70a77f72..00000000
--- a/tests/HTMLPurifier/AttrDef/Email/SimpleCheckTest.php
+++ /dev/null
@@ -1,16 +0,0 @@
-def = new HTMLPurifier_AttrDef_Email_SimpleCheck();
- }
-
-}
-
-?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/IDTest.php b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php
similarity index 70%
rename from tests/HTMLPurifier/AttrDef/IDTest.php
rename to tests/HTMLPurifier/AttrDef/HTML/IDTest.php
index e47ad9af..a604ca0c 100644
--- a/tests/HTMLPurifier/AttrDef/IDTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/IDTest.php
@@ -1,10 +1,10 @@
context->register('IDAccumulator', $id_accumulator);
- $this->def = new HTMLPurifier_AttrDef_ID();
+ $this->config->set('Attr', 'EnableID', true);
+ $this->def = new HTMLPurifier_AttrDef_HTML_ID();
}
@@ -74,6 +75,26 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness
}
+ // reference functionality is disabled for now
+ function disabled_testIDReference() {
+
+ $this->def = new HTMLPurifier_AttrDef_HTML_ID(true);
+
+ $this->assertDef('good_id');
+ $this->assertDef('good_id'); // duplicates okay
+ $this->assertDef('', false);
+
+ $this->def = new HTMLPurifier_AttrDef_HTML_ID();
+
+ $this->assertDef('good_id');
+ $this->assertDef('good_id', false); // duplicate now not okay
+
+ $this->def = new HTMLPurifier_AttrDef_HTML_ID(true);
+
+ $this->assertDef('good_id'); // reference still okay
+
+ }
+
}
?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/LengthTest.php b/tests/HTMLPurifier/AttrDef/HTML/LengthTest.php
similarity index 67%
rename from tests/HTMLPurifier/AttrDef/LengthTest.php
rename to tests/HTMLPurifier/AttrDef/HTML/LengthTest.php
index f67c70b7..e5b89f22 100644
--- a/tests/HTMLPurifier/AttrDef/LengthTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/LengthTest.php
@@ -1,13 +1,13 @@
def = new HTMLPurifier_AttrDef_Length();
+ $this->def = new HTMLPurifier_AttrDef_HTML_Length();
}
function test() {
diff --git a/tests/HTMLPurifier/AttrDef/MultiLengthTest.php b/tests/HTMLPurifier/AttrDef/HTML/MultiLengthTest.php
similarity index 56%
rename from tests/HTMLPurifier/AttrDef/MultiLengthTest.php
rename to tests/HTMLPurifier/AttrDef/HTML/MultiLengthTest.php
index 6d9acd36..eaa34952 100644
--- a/tests/HTMLPurifier/AttrDef/MultiLengthTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/MultiLengthTest.php
@@ -1,13 +1,13 @@
def = new HTMLPurifier_AttrDef_MultiLength();
+ $this->def = new HTMLPurifier_AttrDef_HTML_MultiLength();
}
function test() {
@@ -16,7 +16,7 @@ class HTMLPurifier_AttrDef_MultiLengthTest extends HTMLPurifier_AttrDef_LengthTe
parent::test();
$this->assertDef('*');
- $this->assertDef('1*');
+ $this->assertDef('1*', '*');
$this->assertDef('56*');
$this->assertDef('**', false); // plain old bad
diff --git a/tests/HTMLPurifier/AttrDef/ClassTest.php b/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php
similarity index 76%
rename from tests/HTMLPurifier/AttrDef/ClassTest.php
rename to tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php
index 053e5134..00b55eec 100644
--- a/tests/HTMLPurifier/AttrDef/ClassTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/NmtokensTest.php
@@ -1,15 +1,14 @@
def = new HTMLPurifier_AttrDef_Class();
+ $this->def = new HTMLPurifier_AttrDef_HTML_Nmtokens();
$this->assertDef('valid');
$this->assertDef('a0-_');
diff --git a/tests/HTMLPurifier/AttrDef/PixelsTest.php b/tests/HTMLPurifier/AttrDef/HTML/PixelsTest.php
similarity index 79%
rename from tests/HTMLPurifier/AttrDef/PixelsTest.php
rename to tests/HTMLPurifier/AttrDef/HTML/PixelsTest.php
index cab43e86..414fa3ad 100644
--- a/tests/HTMLPurifier/AttrDef/PixelsTest.php
+++ b/tests/HTMLPurifier/AttrDef/HTML/PixelsTest.php
@@ -1,13 +1,13 @@
def = new HTMLPurifier_AttrDef_Pixels();
+ $this->def = new HTMLPurifier_AttrDef_HTML_Pixels();
}
function test() {
diff --git a/tests/HTMLPurifier/AttrDef/LangTest.php b/tests/HTMLPurifier/AttrDef/LangTest.php
index 7a0e4308..a5472e91 100644
--- a/tests/HTMLPurifier/AttrDef/LangTest.php
+++ b/tests/HTMLPurifier/AttrDef/LangTest.php
@@ -54,6 +54,8 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
// Also note that this test-case tests fix-behavior: chop
// off subtags until you get a valid language code.
$this->assertDef('en-a', 'en');
+ // however, x is a reserved single-letter subtag that is allowed
+ $this->assertDef('en-x', 'en-x');
// 2-8 chars are permitted, but have special meaning that cannot
// be checked without maintaining country code lookup tables (for
// two characters) or special registration tables (for all above).
diff --git a/tests/HTMLPurifier/AttrDef/URI/Email/SimpleCheckTest.php b/tests/HTMLPurifier/AttrDef/URI/Email/SimpleCheckTest.php
new file mode 100644
index 00000000..edbde119
--- /dev/null
+++ b/tests/HTMLPurifier/AttrDef/URI/Email/SimpleCheckTest.php
@@ -0,0 +1,16 @@
+def = new HTMLPurifier_AttrDef_URI_Email_SimpleCheck();
+ }
+
+}
+
+?>
\ No newline at end of file
diff --git a/tests/HTMLPurifier/AttrDef/EmailHarness.php b/tests/HTMLPurifier/AttrDef/URI/EmailHarness.php
similarity index 87%
rename from tests/HTMLPurifier/AttrDef/EmailHarness.php
rename to tests/HTMLPurifier/AttrDef/URI/EmailHarness.php
index 28bd06f1..b0398424 100644
--- a/tests/HTMLPurifier/AttrDef/EmailHarness.php
+++ b/tests/HTMLPurifier/AttrDef/URI/EmailHarness.php
@@ -1,9 +1,9 @@
def = new HTMLPurifier_AttrDef_Host();
+ $this->def = new HTMLPurifier_AttrDef_URI_Host();
$this->assertDef('[2001:DB8:0:0:8:800:200C:417A]'); // IPv6
$this->assertDef('124.15.6.89'); // IPv4
diff --git a/tests/HTMLPurifier/AttrDef/IPv4Test.php b/tests/HTMLPurifier/AttrDef/URI/IPv4Test.php
similarity index 78%
rename from tests/HTMLPurifier/AttrDef/IPv4Test.php
rename to tests/HTMLPurifier/AttrDef/URI/IPv4Test.php
index 59f560d9..aa05159c 100644
--- a/tests/HTMLPurifier/AttrDef/IPv4Test.php
+++ b/tests/HTMLPurifier/AttrDef/URI/IPv4Test.php
@@ -1,17 +1,17 @@
def = new HTMLPurifier_AttrDef_IPv4();
+ $this->def = new HTMLPurifier_AttrDef_URI_IPv4();
$this->assertDef('127.0.0.1'); // standard IPv4, loopback, non-routable
$this->assertDef('0.0.0.0'); // standard IPv4, unspecified, non-routable
diff --git a/tests/HTMLPurifier/AttrDef/IPv6Test.php b/tests/HTMLPurifier/AttrDef/URI/IPv6Test.php
similarity index 91%
rename from tests/HTMLPurifier/AttrDef/IPv6Test.php
rename to tests/HTMLPurifier/AttrDef/URI/IPv6Test.php
index 7ad3613f..8a6511b0 100644
--- a/tests/HTMLPurifier/AttrDef/IPv6Test.php
+++ b/tests/HTMLPurifier/AttrDef/URI/IPv6Test.php
@@ -1,17 +1,17 @@
def = new HTMLPurifier_AttrDef_IPv6();
+ $this->def = new HTMLPurifier_AttrDef_URI_IPv6();
$this->assertDef('2001:DB8:0:0:8:800:200C:417A'); // unicast, full
$this->assertDef('FF01:0:0:0:0:0:0:101'); // multicast, full
diff --git a/tests/HTMLPurifier/ChildDef/ChameleonTest.php b/tests/HTMLPurifier/ChildDef/ChameleonTest.php
index b4181196..529d9193 100644
--- a/tests/HTMLPurifier/ChildDef/ChameleonTest.php
+++ b/tests/HTMLPurifier/ChildDef/ChameleonTest.php
@@ -15,17 +15,17 @@ class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
$this->assertResult(
'Allowed.', true,
- array(), array('ParentType' => 'inline')
+ array(), array('IsInline' => true)
);
$this->assertResult(
'Valid
'); diff --git a/tests/HTMLPurifier/ConfigSchemaTest.php b/tests/HTMLPurifier/ConfigSchemaTest.php index 075a552c..1f1f7034 100644 --- a/tests/HTMLPurifier/ConfigSchemaTest.php +++ b/tests/HTMLPurifier/ConfigSchemaTest.php @@ -41,14 +41,14 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase function tearDown() { // testing is done, restore the old copy HTMLPurifier_ConfigSchema::instance($this->old_copy); - tally_errors(); + tally_errors($this); } function test_defineNamespace() { CS::defineNamespace('http', $d = 'This is an internet protocol.'); $this->assertIdentical($this->our_copy->info_namespace, array( - 'http' => new HTMLPurifier_ConfigEntity_Namespace($d) + 'http' => new HTMLPurifier_ConfigDef_Namespace($d) )); $this->expectError('Cannot redefine namespace'); @@ -68,7 +68,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertIdentical($this->our_copy->defaults['Car']['Seats'], 5); $this->assertIdentical($this->our_copy->info['Car']['Seats'], - new HTMLPurifier_ConfigEntity_Directive('int', + new HTMLPurifier_ConfigDef_Directive('int', array($this->file => array($l => $d)) ) ); @@ -77,7 +77,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertIdentical($this->our_copy->defaults['Car']['Age'], null); $this->assertIdentical($this->our_copy->info['Car']['Age'], - new HTMLPurifier_ConfigEntity_Directive('int', + new HTMLPurifier_ConfigDef_Directive('int', array($this->file => array($l => $d)), true ) ); @@ -106,7 +106,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertIdentical($this->our_copy->defaults['Cat']['Dead'], false); $this->assertIdentical($this->our_copy->info['Cat']['Dead'], - new HTMLPurifier_ConfigEntity_Directive('bool', + new HTMLPurifier_ConfigDef_Directive('bool', array($this->file => array($l1 => $d1, $l2 => $d2)) ) ); @@ -132,7 +132,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertIdentical($this->our_copy->defaults['QuantumNumber']['Difficulty'], null); $this->assertIdentical($this->our_copy->info['QuantumNumber']['Difficulty'], - new HTMLPurifier_ConfigEntity_Directive( + new HTMLPurifier_ConfigDef_Directive( 'string', array($this->file => array($l => $d)), true, @@ -184,7 +184,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertIdentical($this->our_copy->defaults['Abbrev']['HTH'], 'Happy to Help'); $this->assertIdentical($this->our_copy->info['Abbrev']['HTH'], - new HTMLPurifier_ConfigEntity_Directive( + new HTMLPurifier_ConfigDef_Directive( 'string', array($this->file => array($l => $d)), false, @@ -224,7 +224,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase $this->assertTrue(!isset($this->our_copy->defaults['Home']['Carpet'])); $this->assertIdentical($this->our_copy->info['Home']['Carpet'], - new HTMLPurifier_ConfigEntity_DirectiveAlias('Home', 'Rug') + new HTMLPurifier_ConfigDef_DirectiveAlias('Home', 'Rug') ); $this->expectError('Cannot define directive alias in undefined namespace'); diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php index e04ac416..f368f8c0 100644 --- a/tests/HTMLPurifier/ConfigTest.php +++ b/tests/HTMLPurifier/ConfigTest.php @@ -20,7 +20,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase function tearDown() { HTMLPurifier_ConfigSchema::instance($this->old_copy); - tally_errors(); + tally_errors($this); } // test functionality based on ConfigSchema @@ -216,7 +216,7 @@ class HTMLPurifier_ConfigTest extends UnitTestCase } - function test_getDefinition() { + function test_getHTMLDefinition() { // we actually want to use the old copy, because the definition // generation routines have dependencies on configuration values @@ -224,12 +224,41 @@ class HTMLPurifier_ConfigTest extends UnitTestCase $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy); $config = HTMLPurifier_Config::createDefault(); - $def = $config->getHTMLDefinition(); - $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); $def = $config->getCSSDefinition(); $this->assertIsA($def, 'HTMLPurifier_CSSDefinition'); + $def = $config->getHTMLDefinition(); + $def2 = $config->getHTMLDefinition(); + $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); + $this->assertEqual($def, $def2); + $this->assertTrue($def->setup); + + // test re-calculation if HTML changes + $config->set('HTML', 'Strict', true); + $def = $config->getHTMLDefinition(); + $this->assertIsA($def, 'HTMLPurifier_HTMLDefinition'); + $this->assertNotEqual($def, $def2); + $this->assertTrue($def->setup); + + // test retrieval of raw definition + $def =& $config->getHTMLDefinition(true); + $this->assertNotEqual($def, $def2); + $this->assertFalse($def->setup); + + // auto initialization + $config->getHTMLDefinition(); + $this->assertTrue($def->setup); + + } + + function test_getCSSDefinition() { + $this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy); + + $config = HTMLPurifier_Config::createDefault(); + + $def = $config->getCSSDefinition(); + $this->assertIsA($def, 'HTMLPurifier_CSSDefinition'); } function test_loadArray() { diff --git a/tests/HTMLPurifier/HTMLModuleManagerTest.php b/tests/HTMLPurifier/HTMLModuleManagerTest.php new file mode 100644 index 00000000..f3efa1c6 --- /dev/null +++ b/tests/HTMLPurifier/HTMLModuleManagerTest.php @@ -0,0 +1,274 @@ +manager = new HTMLPurifier_HTMLModuleManager(true); + } + + function teardown() { + tally_errors($this); + } + + function createModule($name) { + $module = new HTMLPurifier_HTMLModule(); + $module->name = $name; + return $module; + } + + function test_addModule_withAutoload() { + $this->manager->autoDoctype = 'Generic Document 0.1'; + $this->manager->autoCollection = 'Default'; + + $module = new HTMLPurifier_HTMLModule(); + $module->name = 'Module'; + + $module2 = new HTMLPurifier_HTMLModule(); + $module2->name = 'Module2'; + + // we need to grab the dynamically generated orders from + // the object since modules are not passed by reference + + $this->manager->addModule($module); + $module_order = $this->manager->modules['Module']->order; + $module->order = $module_order; + $this->assertEqual($module, $this->manager->modules['Module']); + + $this->manager->addModule($module2); + $module2_order = $this->manager->modules['Module2']->order; + $module2->order = $module2_order; + $this->assertEqual($module2, $this->manager->modules['Module2']); + $this->assertEqual($module_order + 1, $module2_order); + + $this->assertEqual( + $this->manager->collections['Default']['Generic Document 0.1'], + array('Module', 'Module2') + ); + + $this->manager->setup(HTMLPurifier_Config::createDefault()); + + $modules = array( + 'Module' => $this->manager->modules['Module'], + 'Module2' => $this->manager->modules['Module2'] + ); + + $this->assertIdentical( + $this->manager->collections['Default']['Generic Document 0.1'], + $modules + ); + $this->assertIdentical($this->manager->activeModules, $modules); + $this->assertIdentical($this->manager->activeCollections, array('Default')); + + } + + function test_addModule_undefinedClass() { + $this->expectError('TotallyCannotBeDefined module does not exist'); + $this->manager->addModule('TotallyCannotBeDefined'); + } + + function test_addModule_stringExpansion() { + $this->manager->addModule('ManagerTestModule'); + $this->assertIsA($this->manager->modules['ManagerTestModule'], + 'HTMLPurifier_HTMLModule_ManagerTestModule'); + } + + function test_addPrefix() { + $this->manager->addPrefix('HTMLPurifier_HTMLModuleManagerTest_'); + $this->manager->addModule('TestModule'); + $this->assertIsA($this->manager->modules['TestModule'], + 'HTMLPurifier_HTMLModuleManagerTest_TestModule'); + } + + function assertProcessCollections($input, $expect = false) { + if ($expect === false) $expect = $input; + $this->manager->processCollections($input); + // substitute in modules for $expect + foreach ($expect as $col_i => $col) { + $disable = false; + foreach ($col as $mod_i => $mod) { + unset($expect[$col_i][$mod_i]); + if ($mod_i === '*') { + $disable = true; + continue; + } + $expect[$col_i][$mod] = $this->manager->modules[$mod]; + } + if ($disable) $expect[$col_i]['*'] = false; + } + $this->assertIdentical($input, $expect); + } + + function testImpl_processCollections() { + $this->manager->initialize(); + $this->assertProcessCollections( + array() + ); + $this->assertProcessCollections( + array('HTML' => array('Text')) + ); + $this->assertProcessCollections( + array('HTML' => array('Text', 'Legacy')) + ); + $this->assertProcessCollections( // order is important! + array('HTML' => array('Legacy', 'Text')), + array('HTML' => array('Text', 'Legacy')) + ); + $this->assertProcessCollections( // privates removed after process + array('_Private' => array('Legacy', 'Text')), + array() + ); + $this->assertProcessCollections( // inclusions come first + array( + 'HTML' => array(array('XHTML'), 'Legacy'), + 'XHTML' => array('Text', 'Hypertext') + ), + array( + 'HTML' => array('Text', 'Hypertext', 'Legacy'), + 'XHTML' => array('Text', 'Hypertext') + ) + ); + $this->assertProcessCollections( + array( + 'HTML' => array(array('_Common'), 'Legacy'), + '_Common' => array('Text', 'Hypertext') + ), + array( + 'HTML' => array('Text', 'Hypertext', 'Legacy') + ) + ); + $this->assertProcessCollections( // nested inclusions + array( + 'Full' => array(array('Minimal'), 'Hypertext'), + 'Minimal' => array(array('Bare'), 'List'), + 'Bare' => array('Text') + ), + array( + 'Full' => array('Text', 'Hypertext', 'List'), + 'Minimal' => array('Text', 'List'), + 'Bare' => array('Text') + ) + ); + // strange but valid stuff that will be handled in assembleModules + $this->assertProcessCollections( + array( + 'Linky' => array('Hypertext'), + 'Listy' => array('List'), + '*' => array('Text') + ) + ); + $this->assertProcessCollections( + array( + 'Linky' => array('Hypertext'), + 'ListyOnly' => array('List', '*' => false), + '*' => array('Text') + ) + ); + } + + function testImpl_processCollections_error() { + $this->manager->initialize(); + + $this->expectError( // active variables, watch out! + 'Illegal inclusion array at index 1 found collection HTML, '. + 'inclusion arrays must be at start of collection (index 0)'); + $c = array( + 'HTML' => array('Legacy', array('XHTML')), + 'XHTML' => array('Text', 'Hypertext') + ); + $this->manager->processCollections($c); + unset($c); + + $this->expectError('Collection HTML references undefined '. + 'module Foobar'); + $c = array( + 'HTML' => array('Foobar') + ); + $this->manager->processCollections($c); + unset($c); + + $this->expectError('Collection HTML tried to include undefined '. + 'collection _Common'); + $c = array( + 'HTML' => array(array('_Common'), 'Legacy') + ); + $this->manager->processCollections($c); + unset($c); + + // reports the first circular inclusion it runs across + $this->expectError('Circular inclusion detected in HTML collection'); + $c = array( + 'HTML' => array(array('XHTML')), + 'XHTML' => array(array('HTML')) + ); + $this->manager->processCollections($c); + unset($c); + + } + + function test_makeCollection() { + $config = HTMLPurifier_Config::create(array( + 'HTML.Doctype' => 'Custom Doctype' + )); + $this->manager->addModule($this->createModule('ActiveModule')); + $this->manager->addModule($this->createModule('DudModule')); + $this->manager->addModule($this->createModule('ValidModule')); + $ActiveModule = $this->manager->modules['ActiveModule']; + $DudModule = $this->manager->modules['DudModule']; + $ValidModule = $this->manager->modules['ValidModule']; + $this->manager->collections['ToBeValid']['Custom Doctype'] = array('ValidModule'); + $this->manager->collections['ToBeActive']['Custom Doctype'] = array('ActiveModule'); + $this->manager->makeCollectionValid('ToBeValid'); + $this->manager->makeCollectionActive('ToBeActive'); + $this->manager->setup($config); + $this->assertIdentical($this->manager->validModules, array( + 'ValidModule' => $ValidModule, + 'ActiveModule' => $ActiveModule + )); + $this->assertIdentical($this->manager->activeModules, array( + 'ActiveModule' => $ActiveModule + )); + } + + function test_makeCollection_undefinedCollection() { + $config = HTMLPurifier_Config::create(array( + 'HTML.Doctype' => 'Sweets Document 1.0' + )); + $this->manager->addModule($this->createModule('DonutsModule')); + $this->manager->addModule($this->createModule('ChocolateModule')); + $this->manager->collections['CocoaBased']['Sweets Document 1.0'] = array('ChocolateModule'); + // notice how BreadBased collection is missing + $this->manager->makeCollectionActive('CocoaBased'); // to prevent other errors + $this->manager->makeCollectionValid('BreadBased'); + $this->expectError('BreadBased collection is undefined'); + $this->manager->setup($config); + } + + function untest_soupStuff() { + $config = HTMLPurifier_Config::create(array( + 'HTML.Doctype' => 'The Soup Specification 8.0' + )); + $this->manager->addModule($this->createModule('VegetablesModule')); + $this->manager->addModule($this->createModule('MeatModule')); + + } + + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/LanguageFactoryTest.php b/tests/HTMLPurifier/LanguageFactoryTest.php new file mode 100644 index 00000000..050d30d8 --- /dev/null +++ b/tests/HTMLPurifier/LanguageFactoryTest.php @@ -0,0 +1,47 @@ +create('en'); + + $this->assertIsA($language, 'HTMLPurifier_Language'); + $this->assertEqual($language->code, 'en'); + + // lazy loading test + $this->assertEqual(count($language->messages), 0); + $language->load(); + $this->assertNotEqual(count($language->messages), 0); + + // actual tests for content can be found in LanguageTest + + } + + function testFallback() { + + $factory = HTMLPurifier_LanguageFactory::instance(); + + $language = $factory->create('en-x-test'); + + $this->assertIsA($language, 'HTMLPurifier_Language_en_x_test'); + $this->assertEqual($language->code, 'en-x-test'); + + $language->load(); + + // test overloaded message + $this->assertEqual($language->getMessage('htmlpurifier'), 'HTML Purifier X'); + + // test inherited message + $this->assertEqual($language->getMessage('pizza'), 'Pizza'); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php new file mode 100644 index 00000000..dd88c90f --- /dev/null +++ b/tests/HTMLPurifier/LanguageTest.php @@ -0,0 +1,22 @@ +lang = $factory->create('en'); + } + + function test_getMessage() { + $this->assertIdentical($this->lang->getMessage('htmlpurifier'), 'HTML Purifier'); + $this->assertIdentical($this->lang->getMessage('totally-non-existent-key'), ''); + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index 38bd996b..20636614 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -70,19 +70,33 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness 'Not allowed!' ); - // block in inline ins not allowed - $this->assertResult( + $this->assertResult( // alt config 'Fiddly name | Super-duper-price | diff --git a/tests/HTMLPurifier/TagTransformTest.php b/tests/HTMLPurifier/TagTransformTest.php index f2cd18a3..760d85af 100644 --- a/tests/HTMLPurifier/TagTransformTest.php +++ b/tests/HTMLPurifier/TagTransformTest.php @@ -2,6 +2,11 @@ require_once 'HTMLPurifier/TagTransform.php'; +// needs to be seperated into files +require_once 'HTMLPurifier/TagTransform/Center.php'; +require_once 'HTMLPurifier/TagTransform/Font.php'; +require_once 'HTMLPurifier/TagTransform/Simple.php'; + class HTMLPurifier_TagTransformTest extends UnitTestCase { diff --git a/tests/HTMLPurifier/Test.php b/tests/HTMLPurifier/Test.php index daa39f53..3fa54173 100644 --- a/tests/HTMLPurifier/Test.php +++ b/tests/HTMLPurifier/Test.php @@ -83,6 +83,20 @@ class HTMLPurifier_Test extends UnitTestCase } + function testEnableAttrID() { + + $this->purifier = new HTMLPurifier(); + + $this->assertPurification( + 'foobar', + 'foobar' + ); + + $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true)); + $this->assertPurification('foobar'); + + } + } ?> \ No newline at end of file diff --git a/tests/index.php b/tests/index.php index b034d4d8..bc2e2414 100644 --- a/tests/index.php +++ b/tests/index.php @@ -51,6 +51,9 @@ $test_file_lookup = array_flip($test_files); // determine test file if (isset($_GET['f']) && isset($test_file_lookup[$_GET['f']])) { $GLOBALS['HTMLPurifierTest']['File'] = $_GET['f']; +} elseif (isset($argv[1]) && isset($test_file_lookup[$argv[1]])) { + // command-line + $GLOBALS['HTMLPurifierTest']['File'] = $argv[1]; } else { $GLOBALS['HTMLPurifierTest']['File'] = false; } diff --git a/tests/tally_errors.func.php b/tests/tally_errors.func.php index 84aaef04..cd945c3d 100644 --- a/tests/tally_errors.func.php +++ b/tests/tally_errors.func.php @@ -1,6 +1,6 @@ get('SimpleErrorQueue'); @@ -9,7 +9,7 @@ function tally_errors() { if (count($e) != 2) return; // fut-compat if (!isset($e[0])) return; // fut-compat $e[0]->_dumper = new SimpleDumper(); - $this->fail('Error expectation not fulfilled: ' . + $test->fail('Error expectation not fulfilled: ' . $e[0]->testMessage(null)); } $queue->_expectation_queue = array(); diff --git a/tests/test_files.php b/tests/test_files.php index ab83ec47..9a612181 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -2,68 +2,72 @@ if (!defined('HTMLPurifierTest')) exit; -// define callable test files -$test_files[] = 'ConfigTest.php'; -$test_files[] = 'ConfigSchemaTest.php'; -$test_files[] = 'LexerTest.php'; -$test_files[] = 'Lexer/DirectLexTest.php'; -$test_files[] = 'TokenTest.php'; -$test_files[] = 'ChildDef/RequiredTest.php'; -$test_files[] = 'ChildDef/OptionalTest.php'; -$test_files[] = 'ChildDef/ChameleonTest.php'; -$test_files[] = 'ChildDef/CustomTest.php'; -$test_files[] = 'ChildDef/TableTest.php'; -$test_files[] = 'ChildDef/StrictBlockquoteTest.php'; -$test_files[] = 'GeneratorTest.php'; -$test_files[] = 'EntityLookupTest.php'; -$test_files[] = 'Strategy/RemoveForeignElementsTest.php'; -$test_files[] = 'Strategy/MakeWellFormedTest.php'; -$test_files[] = 'Strategy/FixNestingTest.php'; -$test_files[] = 'Strategy/CompositeTest.php'; -$test_files[] = 'Strategy/CoreTest.php'; -$test_files[] = 'Strategy/ValidateAttributesTest.php'; -$test_files[] = 'AttrDefTest.php'; -$test_files[] = 'AttrDef/EnumTest.php'; -$test_files[] = 'AttrDef/IDTest.php'; -$test_files[] = 'AttrDef/ClassTest.php'; -$test_files[] = 'AttrDef/TextTest.php'; -$test_files[] = 'AttrDef/LangTest.php'; -$test_files[] = 'AttrDef/PixelsTest.php'; -$test_files[] = 'AttrDef/LengthTest.php'; -$test_files[] = 'AttrDef/URITest.php'; +// define callable test files (sorted alphabetically) +$test_files[] = 'AttrDef/CSS/BackgroundPositionTest.php'; +$test_files[] = 'AttrDef/CSS/BackgroundTest.php'; +$test_files[] = 'AttrDef/CSS/BorderTest.php'; +$test_files[] = 'AttrDef/CSS/ColorTest.php'; +$test_files[] = 'AttrDef/CSS/CompositeTest.php'; +$test_files[] = 'AttrDef/CSS/FontFamilyTest.php'; +$test_files[] = 'AttrDef/CSS/FontTest.php'; +$test_files[] = 'AttrDef/CSS/LengthTest.php'; +$test_files[] = 'AttrDef/CSS/ListStyleTest.php'; +$test_files[] = 'AttrDef/CSS/MultipleTest.php'; +$test_files[] = 'AttrDef/CSS/NumberTest.php'; +$test_files[] = 'AttrDef/CSS/PercentageTest.php'; +$test_files[] = 'AttrDef/CSS/TextDecorationTest.php'; +$test_files[] = 'AttrDef/CSS/URITest.php'; $test_files[] = 'AttrDef/CSSTest.php'; -$test_files[] = 'AttrDef/CompositeTest.php'; -$test_files[] = 'AttrDef/ColorTest.php'; +$test_files[] = 'AttrDef/EnumTest.php'; +$test_files[] = 'AttrDef/HTML/IDTest.php'; +$test_files[] = 'AttrDef/HTML/LengthTest.php'; +$test_files[] = 'AttrDef/HTML/MultiLengthTest.php'; +$test_files[] = 'AttrDef/HTML/NmtokensTest.php'; +$test_files[] = 'AttrDef/HTML/PixelsTest.php'; $test_files[] = 'AttrDef/IntegerTest.php'; -$test_files[] = 'AttrDef/NumberTest.php'; -$test_files[] = 'AttrDef/CSSLengthTest.php'; -$test_files[] = 'AttrDef/PercentageTest.php'; -$test_files[] = 'AttrDef/MultipleTest.php'; -$test_files[] = 'AttrDef/TextDecorationTest.php'; -$test_files[] = 'AttrDef/FontFamilyTest.php'; -$test_files[] = 'AttrDef/HostTest.php'; -$test_files[] = 'AttrDef/IPv4Test.php'; -$test_files[] = 'AttrDef/IPv6Test.php'; -$test_files[] = 'AttrDef/FontTest.php'; -$test_files[] = 'AttrDef/BorderTest.php'; -$test_files[] = 'AttrDef/ListStyleTest.php'; -$test_files[] = 'AttrDef/Email/SimpleCheckTest.php'; -$test_files[] = 'AttrDef/CSSURITest.php'; -$test_files[] = 'AttrDef/BackgroundPositionTest.php'; -$test_files[] = 'AttrDef/BackgroundTest.php'; -$test_files[] = 'IDAccumulatorTest.php'; -$test_files[] = 'TagTransformTest.php'; -$test_files[] = 'AttrTransform/LangTest.php'; -$test_files[] = 'AttrTransform/TextAlignTest.php'; +$test_files[] = 'AttrDef/LangTest.php'; +$test_files[] = 'AttrDef/TextTest.php'; +$test_files[] = 'AttrDef/URI/Email/SimpleCheckTest.php'; +$test_files[] = 'AttrDef/URI/HostTest.php'; +$test_files[] = 'AttrDef/URI/IPv4Test.php'; +$test_files[] = 'AttrDef/URI/IPv6Test.php'; +$test_files[] = 'AttrDef/URITest.php'; +$test_files[] = 'AttrDefTest.php'; $test_files[] = 'AttrTransform/BdoDirTest.php'; $test_files[] = 'AttrTransform/ImgRequiredTest.php'; +$test_files[] = 'AttrTransform/LangTest.php'; +$test_files[] = 'AttrTransform/TextAlignTest.php'; +$test_files[] = 'ChildDef/ChameleonTest.php'; +$test_files[] = 'ChildDef/CustomTest.php'; +$test_files[] = 'ChildDef/OptionalTest.php'; +$test_files[] = 'ChildDef/RequiredTest.php'; +$test_files[] = 'ChildDef/StrictBlockquoteTest.php'; +$test_files[] = 'ChildDef/TableTest.php'; +$test_files[] = 'ConfigSchemaTest.php'; +$test_files[] = 'ConfigTest.php'; +$test_files[] = 'ContextTest.php'; +$test_files[] = 'EncoderTest.php'; +$test_files[] = 'EntityLookupTest.php'; +$test_files[] = 'EntityParserTest.php'; +$test_files[] = 'GeneratorTest.php'; +$test_files[] = 'HTMLModuleManagerTest.php'; +$test_files[] = 'IDAccumulatorTest.php'; +$test_files[] = 'LanguageFactoryTest.php'; +$test_files[] = 'LanguageTest.php'; +$test_files[] = 'Lexer/DirectLexTest.php'; +$test_files[] = 'LexerTest.php'; +$test_files[] = 'PercentEncoderTest.php'; +$test_files[] = 'Strategy/CompositeTest.php'; +$test_files[] = 'Strategy/CoreTest.php'; +$test_files[] = 'Strategy/FixNestingTest.php'; +$test_files[] = 'Strategy/MakeWellFormedTest.php'; +$test_files[] = 'Strategy/RemoveForeignElementsTest.php'; +$test_files[] = 'Strategy/ValidateAttributesTest.php'; +$test_files[] = 'TagTransformTest.php'; +$test_files[] = 'Test.php'; +$test_files[] = 'TokenTest.php'; $test_files[] = 'URISchemeRegistryTest.php'; $test_files[] = 'URISchemeTest.php'; -$test_files[] = 'EncoderTest.php'; -$test_files[] = 'EntityParserTest.php'; -$test_files[] = 'Test.php'; -$test_files[] = 'ContextTest.php'; -$test_files[] = 'PercentEncoderTest.php'; if (version_compare(PHP_VERSION, '5', '>=')) { $test_files[] = 'TokenFactoryTest.php';
---|