mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-09 15:31:53 +00:00
Release 1.5.0, merged in r688-867.
- LanguageFactory::instance() declared static - HTMLModuleManagerTest pass by reference bug fixed, merge back into trunk scheduled git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@869 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
cec7a1c087
commit
dd2fd06591
2
Doxyfile
2
Doxyfile
@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 1.4.1
|
PROJECT_NUMBER = 1.5.0
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
|
30
NEWS
30
NEWS
@ -9,6 +9,36 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
1.5.0, released 2007-03-23
|
||||||
|
! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
|
||||||
|
doesn't actually do anything yet, but keep your eyes peeled.
|
||||||
|
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
|
||||||
|
! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
|
||||||
|
I am loathe to release beta quality APIs, but this is exactly that;
|
||||||
|
don't use the internal interfaces if you're not willing to do migration
|
||||||
|
later on.
|
||||||
|
- Allow 'x' subtag in language codes
|
||||||
|
- Fixed buggy chameleon-support for ins and del
|
||||||
|
. Added support for IDREF attributes (i.e. for)
|
||||||
|
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
|
||||||
|
. Removed context variable ParentType, replaced with IsInline, which
|
||||||
|
is false when you're not inline and an integer of the parent that
|
||||||
|
caused you to become inline when you are (so possibly zero)
|
||||||
|
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
|
||||||
|
and HTMLDefinition->content_sets
|
||||||
|
. StrictBlockquote now reports what elements its supposed to allow,
|
||||||
|
rather than what it does allow
|
||||||
|
. Removed HTMLDefinition->info_flow_elements in favor of
|
||||||
|
HTMLDefinition->content_sets['Flow']
|
||||||
|
. Removed redundant "exclusionary" definitions from DTD roster
|
||||||
|
. StrictBlockquote now requires a construction parameter as if it
|
||||||
|
were an Required ChildDef, this is the "real" set of allowed elements
|
||||||
|
. AttrDef partitioned into HTML, CSS and URI segments
|
||||||
|
. Modify Youtube filter regexp to be multiline
|
||||||
|
. Require both PHP5 and DOM extension in order to use DOMLex, fixes
|
||||||
|
some edge cases where a DOMDocument class exists in a PHP4 environment
|
||||||
|
due to DOM XML extension.
|
||||||
|
|
||||||
1.4.1, released 2007-01-21
|
1.4.1, released 2007-01-21
|
||||||
! docs/enduser-youtube.html updated according to new functionality
|
! docs/enduser-youtube.html updated according to new functionality
|
||||||
- YouTube IDs can have underscores and dashes
|
- YouTube IDs can have underscores and dashes
|
||||||
|
8
TODO
8
TODO
@ -7,7 +7,7 @@ TODO List
|
|||||||
? At-risk
|
? At-risk
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
1.5 release
|
1.6 release
|
||||||
# Implement all non-essential attribute transforms, configurable
|
# Implement all non-essential attribute transforms, configurable
|
||||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
@ -15,8 +15,9 @@ TODO List
|
|||||||
- Requires I18N facilities to be created first (COMPLEX)
|
- Requires I18N facilities to be created first (COMPLEX)
|
||||||
? Configuration profiles: sets of directives that get set with one func call
|
? Configuration profiles: sets of directives that get set with one func call
|
||||||
- XSS-attempt detection
|
- XSS-attempt detection
|
||||||
|
- Implement IDREF support
|
||||||
|
|
||||||
1.6 release
|
1.7 release
|
||||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||||
- More fine-grained control over escaping behavior
|
- More fine-grained control over escaping behavior
|
||||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||||
@ -29,7 +30,7 @@ TODO List
|
|||||||
tag or attribute that is not supported
|
tag or attribute that is not supported
|
||||||
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||||
|
|
||||||
1.7 release
|
1.8 release
|
||||||
# Additional support for poorly written HTML
|
# Additional support for poorly written HTML
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||||
- Friendly strict handling of <address> (block -> <br>)
|
- Friendly strict handling of <address> (block -> <br>)
|
||||||
@ -76,7 +77,6 @@ Ongoing
|
|||||||
- more! (look for ones that use WYSIWYGs)
|
- more! (look for ones that use WYSIWYGs)
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
- Upgrade SimpleTest testing code to newest versions
|
|
||||||
- Have 'lang' attribute be checked against official lists
|
- Have 'lang' attribute be checked against official lists
|
||||||
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||||
encoding all characters that have string entity equivalents
|
encoding all characters that have string entity equivalents
|
||||||
|
@ -7,6 +7,7 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
$LEXERS = array();
|
$LEXERS = array();
|
||||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
@ -93,11 +94,14 @@ function print_lexers() {
|
|||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS, $RUNS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$context = new HTMLPurifier_Context();
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,12 +5,15 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
|||||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
$input = file_get_contents('samples/Lexer/4.html');
|
$input = file_get_contents('samples/Lexer/4.html');
|
||||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$context = new HTMLPurifier_Context();
|
||||||
|
|
||||||
for ($i = 0; $i < 10; $i++) {
|
for ($i = 0; $i < 10; $i++) {
|
||||||
$tokens = $lexer->tokenizeHTML($input);
|
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -188,7 +188,7 @@ $xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
|||||||
$html_output = $xsl_processor->transformToXML($dom_document);
|
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||||
|
|
||||||
// some slight fudges to preserve backwards compatibility
|
// some slight fudges to preserve backwards compatibility
|
||||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br/>
|
||||||
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||||
|
|
||||||
if (class_exists('Tidy')) {
|
if (class_exists('Tidy')) {
|
||||||
|
188
docs/dev-advanced-api.html
Normal file
188
docs/dev-advanced-api.html
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||||
|
|
||||||
|
<title>Advanced API - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>Advanced API</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Development</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
|
<p>It makes no sense to adopt a <q>one-size-fits-all</q> approach to
|
||||||
|
filtersets: therefore, users must be able to define their own sets of
|
||||||
|
<q>allowed</q> elements, as well as switch in-between doctypes of HTML.</p>
|
||||||
|
|
||||||
|
<p>Our goals are to let the user:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Select</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Doctype</li>
|
||||||
|
<li>Filtersets: Rich / Plain / Full ...</li>
|
||||||
|
<li>Mode: Lenient / Correctional</li>
|
||||||
|
<li>Collections (?): Safe / Unsafe</li>
|
||||||
|
<li>Modules / Tags / Attributes</li>
|
||||||
|
</ul></dd>
|
||||||
|
<dt>Customize</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Tags / Attributes / Attribute Types</li>
|
||||||
|
<li>Filtersets</li>
|
||||||
|
<li>Root Node</li>
|
||||||
|
</ul></dd>
|
||||||
|
<dt>Create</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Modules / Tags / Attributes / Attribute Types</li>
|
||||||
|
<li>Filtersets</li>
|
||||||
|
<li>Doctype</li>
|
||||||
|
</ul></dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Select</h2>
|
||||||
|
|
||||||
|
<h3>Selecting a Doctype</h3>
|
||||||
|
|
||||||
|
<p>By default, users will use a doctype-based, permissive but secure
|
||||||
|
whitelist. They must define a <strong>doctype</strong>, and this serves
|
||||||
|
as the first method of determining a filterset.</p>
|
||||||
|
|
||||||
|
<p class="technical">This identifier is based
|
||||||
|
on the name the W3C has given to the document type and <em>not</em>
|
||||||
|
the DTD identifier.</p>
|
||||||
|
|
||||||
|
<p>This parameter is set via the configuration object:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||||
|
|
||||||
|
<h3>Selecting a Filterset</h3>
|
||||||
|
|
||||||
|
<p>However, selecting this doctype doesn't mean much, because if we
|
||||||
|
adhered exactly to the definition we would be letting XSS and other
|
||||||
|
nasties through. HTML Purifier must, in its filterset, allow a subset
|
||||||
|
of the doctype, which we shall call a <strong>filterset</strong>.</p>
|
||||||
|
|
||||||
|
<p>By default, HTML Purifier will use the <strong>Rich</strong>
|
||||||
|
filterset, which allows as many elements as possible with untrusted
|
||||||
|
sources. Other possible filtersets could be:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Full</dt>
|
||||||
|
<dd>Allows the full span of elements in the doctype, good if you want
|
||||||
|
HTML Purifier to work as a Tidy substitute but not to strip
|
||||||
|
anything out.</dd>
|
||||||
|
<dt>Plain</dt>
|
||||||
|
<dd>Provides a minimum set of tags for semantic markup of things
|
||||||
|
like blog comments.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<p>Extension-authors would be able to define custom filtersets for
|
||||||
|
other users to use.</p>
|
||||||
|
|
||||||
|
<p>A possible call to select a filterset would be:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Filterset', 'Rich');</pre>
|
||||||
|
|
||||||
|
<h3>Selecting Mode</h3>
|
||||||
|
|
||||||
|
<p>Within filtersets, there are various <strong>modes</strong> of operation.
|
||||||
|
These indicate variant behaviors that, while not strictly changing the
|
||||||
|
allowed set of elements and attributes, will definitely affect the output.
|
||||||
|
Currently, we have two modes, which may be used together:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Lenient</dt>
|
||||||
|
<dd>Deprecated elements and attributes will be transformed into
|
||||||
|
standards-compliant alternatives when explicitly disallowed. For
|
||||||
|
example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||||
|
tag would be turned into a <code>div</code> with the CSS property
|
||||||
|
<code>text-align:center;</code>, but in XHTML 1.0 Transitional
|
||||||
|
the tag would be preserved. This mode is on by default.</dd>
|
||||||
|
<dt>Correctional</dt>
|
||||||
|
<dd>Deprecated elements and attributes will be transformed into
|
||||||
|
standards-compliant alternatives whenever possible. Referring
|
||||||
|
back to the previous example, the <code>center</code> tag would
|
||||||
|
be transformed in both cases. However, tags without a
|
||||||
|
reasonable standards-compliant alternative will be preserved
|
||||||
|
in their form. This mode is on by default. It may have
|
||||||
|
various levels of operation.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<p>A possible call to select modes would be:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
|
||||||
|
|
||||||
|
<p>If modes have extra parameters, a hash might work well:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Mode', array(
|
||||||
|
'correctional' => 9, // strongest level
|
||||||
|
'lenient' => true // this one's just boolean
|
||||||
|
));</pre>
|
||||||
|
|
||||||
|
<p>Modes may possibly be wrapped up with the filterset declaration:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');</pre>
|
||||||
|
|
||||||
|
<p>Further investigation in this field is necessary.</p>
|
||||||
|
|
||||||
|
<h3>Selecting Modules / Tags / Attributes</h3>
|
||||||
|
|
||||||
|
<p>If this cookie cutter approach doesn't appeal to a user, they may
|
||||||
|
decide to roll their own filterset by selecting modules, tags and
|
||||||
|
attributes to allow.</p>
|
||||||
|
|
||||||
|
<p class="technical">This would make use of the same facilities
|
||||||
|
as a filterset author would use, except that it would go under an
|
||||||
|
<q>anonymous</q> filterset that would be auto-selected if any of the
|
||||||
|
relevant module/tag/attribute selection configuration directives were
|
||||||
|
non-null.</p>
|
||||||
|
|
||||||
|
<p>On the highest level, a user will usually be most interested in
|
||||||
|
directly specifying which elements and attributes are desired. For
|
||||||
|
example:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');</pre>
|
||||||
|
|
||||||
|
<p>Attribute declarations could be merged into this declaration as such:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');</pre>
|
||||||
|
|
||||||
|
<p>...or be kept separate:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');</pre>
|
||||||
|
|
||||||
|
<p class="technical">Considering that, internally speaking, as mandated by
|
||||||
|
the XHTML 1.1 Modularization specification, we have organized our
|
||||||
|
elements around modules, considerable gymnastics will be needed to
|
||||||
|
get this sort of functionality working.</p>
|
||||||
|
|
||||||
|
<p>A user may also specify a module to load a class of elements and attributes
|
||||||
|
into their filterest:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Allowed', 'Hypertext,Core');</pre>
|
||||||
|
|
||||||
|
<p class="fixme">The granularity of these modules is too coarse for
|
||||||
|
the average user (for example, the core module loads everything from
|
||||||
|
the essential <code>p</code> tag to the not-so-safe <code>h1</code>
|
||||||
|
tag). How do we make this still a viable solution?</p>
|
||||||
|
|
||||||
|
<h3>Unified selector</h3>
|
||||||
|
|
||||||
|
<p>Because selecting each and every one of these configuration options
|
||||||
|
is a chore, we may wish to offer a specialized configuration method
|
||||||
|
for selecting a filterset. Possibility:</p>
|
||||||
|
|
||||||
|
<pre>function selectFilter($doctype, $filterset, $mode)</pre>
|
||||||
|
|
||||||
|
<p>...which is simply a light wrapper over the individual configuration
|
||||||
|
calls. A custom config file format or text format could also be adopted.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
@ -36,7 +36,7 @@ forgiving lexer. You may also be interested in the unit tests located in the
|
|||||||
tests/ folder, which provide a living document on how exactly the filter deals
|
tests/ folder, which provide a living document on how exactly the filter deals
|
||||||
with malformed input.
|
with malformed input.
|
||||||
|
|
||||||
In summary:
|
In summary (see corresponding classes for more details):
|
||||||
|
|
||||||
1. Parse document into an array of tag and text tokens (Lexer)
|
1. Parse document into an array of tag and text tokens (Lexer)
|
||||||
2. Remove all elements not on whitelist and transform certain other elements
|
2. Remove all elements not on whitelist and transform certain other elements
|
||||||
|
@ -6,45 +6,17 @@ through negligence of people. This class will do its job: no more, no less,
|
|||||||
and it's up to you to provide it the proper information and proper context
|
and it's up to you to provide it the proper information and proper context
|
||||||
to be effective. Things to remember:
|
to be effective. Things to remember:
|
||||||
|
|
||||||
1. Character Encoding: UTF-8.
|
1. Character Encoding: see enduser-utf8.html for more info.
|
||||||
This segment will soon be obsoleted by enduser-utf8.html
|
|
||||||
Currently, the parser runs under the assumption that it is dealing
|
|
||||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
|
||||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
|
||||||
your character encoding, make sure you configure HTML Purifier or switch
|
|
||||||
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
|
||||||
the parser will mangle it badly (though it won't be a security risk if you're
|
|
||||||
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
|
||||||
issue, but do yourself a favor and learn about it:
|
|
||||||
<http://www.joelonsoftware.com/articles/Unicode.html>
|
|
||||||
|
|
||||||
2. Doctype: XHTML 1.0 Transitional
|
2. Doctype: document pending feature completion
|
||||||
This is what the parser is outputting. For the most
|
Not strictly necessary, actually. More in-depth discussion once we figure
|
||||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
out how to get strict loose mode working.
|
||||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
|
||||||
has waaaay too many quirks for a little parser to handle. We did not select
|
|
||||||
strict in order to prevent ourselves from being too draconic on users, but
|
|
||||||
this may be configurable in the future. Do you want standards compliance?
|
|
||||||
The doctype is a good place to start.
|
|
||||||
|
|
||||||
3. IDs
|
3. IDs: see enduser-id.html for more info
|
||||||
This segment is obsoleted by enduser-id.html
|
|
||||||
They need to be unique, but without some knowledge of the
|
|
||||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
|
||||||
needs to be set: we may want to consider disallowing IDs by default to
|
|
||||||
save lazy programmers.
|
|
||||||
|
|
||||||
4. [PROJECTED] Links
|
4. Links: document pending feature completion
|
||||||
We're not going to try for spam protection (although
|
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||||
some hooks for such a module might be nice) but we may offer the ability to
|
need a doc to explain the stuff.
|
||||||
only accept relative URLs. Pick the one that's right for you.
|
|
||||||
|
|
||||||
5. CSS
|
5. CSS: document pending
|
||||||
While we can prevent the most flagrant cases from affecting your
|
Explain which CSS styles we blocked and why.
|
||||||
layout (such as absolutely positioned elements), no amount of code is going
|
|
||||||
to protect your pages from being attacked by garish colors and plain old
|
|
||||||
bad taste. A neat feature would be the ability to define acceptable colors
|
|
||||||
in a document, but that's not likely to be implemented for a while. In the
|
|
||||||
meantime, be sure to make sure that floated elements (permitted, since they
|
|
||||||
can be quite useful) can't mess up your layout. Once again, we may want to
|
|
||||||
disable this by default to protect lazy developers.
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
.minor td {font-style:italic;}
|
.minor td {font-style:italic;}
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
<title>UTF-8 - HTML Purifier</title>
|
<title>UTF-8: The Secret of Character Encoding - HTML Purifier</title>
|
||||||
|
|
||||||
<!-- Note to users: this document, though professing to be UTF-8, attempts
|
<!-- Note to users: this document, though professing to be UTF-8, attempts
|
||||||
to use only ASCII characters, because most webservers are configured
|
to use only ASCII characters, because most webservers are configured
|
||||||
@ -19,21 +19,27 @@ own advice for sake of portability. -->
|
|||||||
|
|
||||||
</head><body>
|
</head><body>
|
||||||
|
|
||||||
<h1>UTF-8</h1>
|
<h1>UTF-8: The Secret of Character Encoding</h1>
|
||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
<div id="filing">Filed under End-User</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Character encoding and character sets, in truth, are not that
|
<p>Character encoding and character sets are not that
|
||||||
difficult to understand. But if you don't understand them, you are going
|
difficult to understand, but so many people blithely stumble
|
||||||
to be caught by surprise by some of HTML Purifier's behavior, namely
|
through the worlds of programming without knowing what to actually
|
||||||
the fact that it operates UTF-8 or the limitations of the character
|
do about it, or say "Ah, it's a job for those <em>internationalization</em>
|
||||||
encoding transformations it does. This document will walk you through
|
experts." No, it is not! This document will walk you through
|
||||||
determining the encoding of your system and how you should handle
|
determining the encoding of your system and how you should handle
|
||||||
this information. It will stay away from excessive discussion on
|
this information. It will stay away from excessive discussion on
|
||||||
the internals of character encoding, but offer the information in
|
the internals of character encoding.</p>
|
||||||
asides that can easily be skipped.</p>
|
|
||||||
|
<p>This document is not designed to be read in its entirety: it will
|
||||||
|
slowly introduce concepts that build on each other: you need not get to
|
||||||
|
the bottom to have learned something new. However, I strongly
|
||||||
|
recommend you read all the way to <strong>Why UTF-8?</strong>, because at least
|
||||||
|
at that point you'd have made a conscious decision not to migrate,
|
||||||
|
which can be a rewarding (but difficult) task.</p>
|
||||||
|
|
||||||
<blockquote class="aside">
|
<blockquote class="aside">
|
||||||
<div class="label">Asides</div>
|
<div class="label">Asides</div>
|
||||||
@ -43,6 +49,50 @@ asides that can easily be skipped.</p>
|
|||||||
with a greater understanding of the underlying issues.</p>
|
with a greater understanding of the underlying issues.</p>
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
|
<h2>Table of Contents</h2>
|
||||||
|
|
||||||
|
<ol id="toc">
|
||||||
|
<li><a href="#findcharset">Finding the real encoding</a></li>
|
||||||
|
<li><a href="#findmetacharset">Finding the embedded encoding</a></li>
|
||||||
|
<li><a href="#fixcharset">Fixing the encoding</a><ol>
|
||||||
|
<li><a href="#fixcharset-none">No embedded encoding</a></li>
|
||||||
|
<li><a href="#fixcharset-diff">Embedded encoding disagrees</a></li>
|
||||||
|
<li><a href="#fixcharset-server">Changing the server encoding</a><ol>
|
||||||
|
<li><a href="#fixcharset-server-php">PHP header() function</a></li>
|
||||||
|
<li><a href="#fixcharset-server-phpini">PHP ini directive</a></li>
|
||||||
|
<li><a href="#fixcharset-server-nophp">Non-PHP</a></li>
|
||||||
|
<li><a href="#fixcharset-server-htaccess">.htaccess</a></li>
|
||||||
|
<li><a href="#fixcharset-server-ext">File extensions</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#fixcharset-xml">XML</a></li>
|
||||||
|
<li><a href="#fixcharset-internals">Inside the process</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#whyutf8">Why UTF-8?</a><ol>
|
||||||
|
<li><a href="#whyutf8-i18n">Internationalization</a></li>
|
||||||
|
<li><a href="#whyutf8-user">User-friendly</a></li>
|
||||||
|
<li><a href="#whyutf8-forms">Forms</a><ol>
|
||||||
|
<li><a href="#whyutf8-forms-urlencoded">application/x-www-form-urlencoded</a></li>
|
||||||
|
<li><a href="#whyutf8-forms-multipart">multipart/form-data</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#whyutf8-support">Well supported</a></li>
|
||||||
|
<li><a href="#whyutf8-htmlpurifier">HTML Purifiers</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#migrate">Migrate to UTF-8</a><ol>
|
||||||
|
<li><a href="#migrate-db">Configuring your database</a><ol>
|
||||||
|
<li><a href="#migrate-db-legit">Legit method</a></li>
|
||||||
|
<li><a href="#migrate-db-binary">Binary</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#migrate-editor">Text editor</a></li>
|
||||||
|
<li><a href="#migrate-bom">Byte Order Mark (headers already sent!)</a></li>
|
||||||
|
<li><a href="#migrate-fonts">Fonts</a><ol>
|
||||||
|
<li><a href="#migrate-fonts-obscure">Obscure scripts</a></li>
|
||||||
|
<li><a href="#migrate-fonts-occasional">Occasional use</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#migrate-variablewidth">Dealing with variable width in functions</a></li>
|
||||||
|
</ol></li>
|
||||||
|
<li><a href="#externallinks">Further Reading</a></li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
<h2 id="findcharset">Finding the real encoding</h2>
|
<h2 id="findcharset">Finding the real encoding</h2>
|
||||||
|
|
||||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||||
@ -275,7 +325,7 @@ your own php.ini file, ask your support for details. Use:</p>
|
|||||||
|
|
||||||
<h4 id="fixcharset-server-nophp">Non-PHP</h4>
|
<h4 id="fixcharset-server-nophp">Non-PHP</h4>
|
||||||
|
|
||||||
<p>You may, for whatever reason, may need to set the character encoding
|
<p>You may, for whatever reason, need to set the character encoding
|
||||||
on non-PHP files, usually plain ol' HTML files. Doing this
|
on non-PHP files, usually plain ol' HTML files. Doing this
|
||||||
is more of a hit-or-miss process: depending on the software being
|
is more of a hit-or-miss process: depending on the software being
|
||||||
used as a webserver and the configuration of that software, certain
|
used as a webserver and the configuration of that software, certain
|
||||||
@ -386,8 +436,8 @@ processing instructions. They look like:</p>
|
|||||||
|
|
||||||
<p>For XHTML, this processing instruction theoretically
|
<p>For XHTML, this processing instruction theoretically
|
||||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||||
XHTML is actually served as legit XML and not HTML, which is almost
|
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||||
always never due to Internet Explorer's lack of support for
|
never due to Internet Explorer's lack of support for
|
||||||
<code>application/xhtml+xml</code> (even though doing so is often
|
<code>application/xhtml+xml</code> (even though doing so is often
|
||||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
||||||
|
|
||||||
@ -398,10 +448,10 @@ for XML files is UTF-8, which often butts heads with more common
|
|||||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||||
|
|
||||||
<p>In short, if you use XHTML and have gone through the
|
<p>In short, if you use XHTML and have gone through the
|
||||||
trouble of adding the XML header, be sure to make sure it jives
|
trouble of adding the XML header, make sure it jives
|
||||||
with your <code>META</code> tags and HTTP headers.</p>
|
with your <code>META</code> tags and HTTP headers.</p>
|
||||||
|
|
||||||
<h3>Inside the process</h3>
|
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||||
|
|
||||||
<p>This section is not required reading,
|
<p>This section is not required reading,
|
||||||
but may answer some of your questions on what's going on in all
|
but may answer some of your questions on what's going on in all
|
||||||
@ -572,7 +622,7 @@ Each method has deficiencies, especially the former.</p>
|
|||||||
the page, you still have the trouble of what to do with characters
|
the page, you still have the trouble of what to do with characters
|
||||||
that are outside of the character encoding's range. The behavior, once
|
that are outside of the character encoding's range. The behavior, once
|
||||||
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
||||||
7.0 mangles them beyond intelligibility. For serious I18N purposes,
|
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
||||||
this is not an option.</p>
|
this is not an option.</p>
|
||||||
|
|
||||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||||
@ -604,22 +654,374 @@ hounding you about broken pages.</p>
|
|||||||
|
|
||||||
<h3 id="whyutf8-htmlpurifier">HTML Purifier</h3>
|
<h3 id="whyutf8-htmlpurifier">HTML Purifier</h3>
|
||||||
|
|
||||||
<p>And finally, we get to HTML Purifier.</p>
|
<p>And finally, we get to HTML Purifier. HTML Purifier is built to
|
||||||
|
deal with UTF-8: any indications otherwise are the result of an
|
||||||
|
encoder that converts text from your preferred encoding to UTF-8, and
|
||||||
|
back again. HTML Purifier never touches anything else, and leaves
|
||||||
|
it up to the module iconv to do the dirty work.</p>
|
||||||
|
|
||||||
|
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||||
|
of HTML character entities. HTML Purifier, in order to
|
||||||
|
protect against sophisticated escaping schemes, normalizes all character
|
||||||
|
and numeric entities before processing the text. This leads to
|
||||||
|
one important ramification:</p>
|
||||||
|
|
||||||
|
<p><strong>Any character that is not supported by the target character
|
||||||
|
set, regardless of whether or not it is in the form of a character
|
||||||
|
entity or a raw character, will be silently ignored.</strong></p>
|
||||||
|
|
||||||
|
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||||
|
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||||
|
does not understand Greek), the following process will occur (assuming you've
|
||||||
|
set the encoding correctly using %Core.Encoding):</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||||
|
(note that theta is preserved since it doesn't actually use
|
||||||
|
any non-ASCII characters): <code>&theta;</code></li>
|
||||||
|
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||||
|
character entities to their corresponding raw UTF-8 equivalents:
|
||||||
|
<code>θ</code></li>
|
||||||
|
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||||
|
<li>The <code>Encoder</code> now transforms the text back from UTF-8
|
||||||
|
to ISO 8859-1. Since Greek is not supported by ISO 8859-1, it
|
||||||
|
will be either ignored or replaced with a question mark:
|
||||||
|
<code>?</code></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>This behaviour is quite unsatisfactory. It is a deal-breaker for
|
||||||
|
international applications, and it can be mildly annoying for the provincial
|
||||||
|
soul who occasionally needs a special character. Since 1.4.0, HTML
|
||||||
|
Purifier has provided a slightly more palatable workaround using
|
||||||
|
%Core.EscapeNonASCIICharacters. The process now looks like:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>The <code>Encoder</code> transforms encoding to UTF-8: <code>&theta;</code></li>
|
||||||
|
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||||
|
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||||
|
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||||
|
with numeric entities: <code>&#952;</code></li>
|
||||||
|
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||||
|
original (which is strictly unnecessary for 99% of encodings
|
||||||
|
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>...which means that this is only good for an occasional foray into
|
||||||
|
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||||
|
or Japanese texts. The even bigger kicker is that, supposing the
|
||||||
|
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||||
|
theta, the character would get entity-ized anyway! (The Encoder does
|
||||||
|
not discriminate).</p>
|
||||||
|
|
||||||
|
<p>The current functionality is about where HTML Purifier will be for
|
||||||
|
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||||
|
form of the entities so that they could be substituted back in, only the
|
||||||
|
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||||
|
to be smart and only convert non-ASCII characters that weren't supported
|
||||||
|
by the target encoding, but that would require reimplementing iconv
|
||||||
|
with HTML awareness, something I will not do.</p>
|
||||||
|
|
||||||
|
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||||
|
not being sarcastic here: some people could care less about other languages)</p>
|
||||||
|
|
||||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||||
|
|
||||||
<h3 id="migrate-editor">Text editor</h3>
|
<p>So, you've decided to bite the bullet, and want to migrate to UTF-8.
|
||||||
|
Note that this is not for the faint-hearted, and you should expect
|
||||||
|
the process to take longer than you think it will take.</p>
|
||||||
|
|
||||||
|
<p>The general idea is that you convert all existing text to UTF-8,
|
||||||
|
and then you set all the headers and META tags we discussed earlier
|
||||||
|
to UTF-8. There are many ways going about doing this: you could
|
||||||
|
write a conversion script that runs through the database and re-encodes
|
||||||
|
everything as UTF-8 or you could do the conversion on the fly when someone
|
||||||
|
reads the page. The details depend on your system, but I will cover
|
||||||
|
some of the more subtle points of migration that may trip you up.</p>
|
||||||
|
|
||||||
<h3 id="migrate-db">Configuring your database</h3>
|
<h3 id="migrate-db">Configuring your database</h3>
|
||||||
|
|
||||||
<h3 id="migrate-convert">Convert old text</h3>
|
<p>Most modern databases, the most prominent open-source ones being MySQL
|
||||||
|
4.1+ and PostgreSQL, support character encodings. If you're switching
|
||||||
|
to UTF-8, logically speaking, you'd want to make sure your database
|
||||||
|
knows about the change too. There are some caveats though:</p>
|
||||||
|
|
||||||
|
<h4 id="migrate-db-legit">Legit method</h4>
|
||||||
|
|
||||||
|
<p>Standardization in terms of SQL syntax for specifying character
|
||||||
|
encodings is notoriously spotty. Refer to your respective database's
|
||||||
|
documentation on how to do this properly.</p>
|
||||||
|
|
||||||
|
<p>For <a href="http://dev.mysql.com/doc/refman/5.0/en/charset-conversion.html">MySQL</a>, <code>ALTER</code> will magically perform the
|
||||||
|
character encoding conversion for you. However, you have
|
||||||
|
to make sure that the text inside the column is what is says it is:
|
||||||
|
if you had put Shift-JIS in an ISO 8859-1 column, MySQL will irreversibly mangle
|
||||||
|
the text when you try to convert it to UTF-8. You'll have to convert
|
||||||
|
it to a binary field, convert it to a Shift-JIS field (the real encoding),
|
||||||
|
and then finally to UTF-8. Many a website had pages irreversibly mangled
|
||||||
|
because they didn't realize that they'd been deluding themselves about
|
||||||
|
the character encoding all along, don't become the next victim.</p>
|
||||||
|
|
||||||
|
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
|
||||||
|
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
|
||||||
|
it into a new table. Make sure that your client encoding is set properly:
|
||||||
|
this is how PostgreSQL knows to perform an encoding conversion.</p>
|
||||||
|
|
||||||
|
<p>Many times, you will be also asked about the "collation" of
|
||||||
|
the new column. Collation is how a DBMS sorts text, like ordering
|
||||||
|
B, C and A into A, B and C (the problem gets surprisingly complicated
|
||||||
|
when you get to languages like Thai and Japanese). If in doubt,
|
||||||
|
going with the default setting is usually a safe bet.</p>
|
||||||
|
|
||||||
|
<p>Once the conversion is all said and done, you still have to remember
|
||||||
|
to set the client encoding (your encoding) properly on each database
|
||||||
|
connection using <code>SET NAMES</code> (which is standard SQL and is
|
||||||
|
usually supported).</p>
|
||||||
|
|
||||||
|
<h4 id="migrate-db-binary">Binary</h4>
|
||||||
|
|
||||||
|
<p>Due to the abovementioned compatibility issues, a more interoperable
|
||||||
|
way of storing UTF-8 text is to stuff it in a binary datatype.
|
||||||
|
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
|
||||||
|
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
|
||||||
|
Doing so can save you some huge headaches:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>The syntax for binary data types is very portable,</li>
|
||||||
|
<li>MySQL 4.0 has <em>no</em> support for character encodings, so
|
||||||
|
if you want to support it you <em>have</em> to use binary,</li>
|
||||||
|
<li>MySQL, as of 5.1, has no support for four byte UTF-8 characters,
|
||||||
|
which represent characters beyond the basic multilingual
|
||||||
|
plane, and</li>
|
||||||
|
<li>You will never have to worry about your DBMS being too smart
|
||||||
|
and attempting to convert your text when you don't want it to.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>MediaWiki, a very prominent international application, uses binary fields
|
||||||
|
for storing their data because of point three.</p>
|
||||||
|
|
||||||
|
<p>There are drawbacks, of course:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Database tools like PHPMyAdmin won't be able to offer you inline
|
||||||
|
text editing, since it is declared as binary,</li>
|
||||||
|
<li>It's not semantically correct: it's really text not binary
|
||||||
|
(lying to the database),</li>
|
||||||
|
<li>Unless you use the not-very-portable wizardry mentioned above,
|
||||||
|
you have to change the encoding yourself (usually, you'd do
|
||||||
|
it on the fly), and</li>
|
||||||
|
<li>You will not have collation.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Choose based on your circumstances.</p>
|
||||||
|
|
||||||
|
<h3 id="migrate-editor">Text editor</h3>
|
||||||
|
|
||||||
|
<p>For more flat-file oriented systems, you will often be tasked with
|
||||||
|
converting reams of existing text and HTML files into UTF-8, as well as
|
||||||
|
making sure that all new files uploaded are properly encoded. Once again,
|
||||||
|
I can only point vaguely in the right direction for converting your
|
||||||
|
existing files: make sure you backup, make sure you use
|
||||||
|
<a href="http://php.net/ref.iconv">iconv</a>(), and
|
||||||
|
make sure you know what the original character encoding of the files
|
||||||
|
is (or are, depending on the tidiness of your system).</p>
|
||||||
|
|
||||||
|
<p>However, I can proffer more specific advice on the subject of
|
||||||
|
text editors. Many text editors have notoriously spotty Unicode support.
|
||||||
|
To find out how your editor is doing, you can check out <a
|
||||||
|
href="http://www.alanwood.net/unicode/utilities_editors.html">this list</a>
|
||||||
|
or <a href="http://en.wikipedia.org/wiki/Comparison_of_text_editors#Encoding_support">Wikipedia's list.</a>
|
||||||
|
I personally use Notepad++, which works like a charm when it comes to UTF-8.
|
||||||
|
Usually, you will have to <strong>explicitly</strong> tell the editor through some dialogue
|
||||||
|
(usually Save as or Format) what encoding you want it to use. An editor
|
||||||
|
will often offer "Unicode" as a method of saving, which is
|
||||||
|
ambiguous. Make sure you know whether or not they really mean UTF-8
|
||||||
|
or UTF-16 (which is another flavor of Unicode).</p>
|
||||||
|
|
||||||
|
<p>The two things to look out for are whether or not the editor
|
||||||
|
supports <strong>font mixing</strong> (multiple
|
||||||
|
fonts in one document) and whether or not it adds a <strong>BOM</strong>.
|
||||||
|
Font mixing is important because fonts rarely have support for every
|
||||||
|
language known to mankind: in order to be flexible, an editor must
|
||||||
|
be able to take a little from here and a little from there, otherwise
|
||||||
|
all your Chinese characters will come as nice boxes. We'll discuss
|
||||||
|
BOM below.</p>
|
||||||
|
|
||||||
<h3 id="migrate-bom">Byte Order Mark (headers already sent!)</h3>
|
<h3 id="migrate-bom">Byte Order Mark (headers already sent!)</h3>
|
||||||
|
|
||||||
|
<p>The BOM, or <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte
|
||||||
|
Order Mark</a>, is a magical, invisible character placed at
|
||||||
|
the beginning of UTF-8 files to tell people what the encoding is and
|
||||||
|
what the endianness of the text is. It is also unnecessary.</p>
|
||||||
|
|
||||||
|
<p>Because it's invisible, it often
|
||||||
|
catches people by surprise when it starts doing things it shouldn't
|
||||||
|
be doing. For example, this PHP file:</p>
|
||||||
|
|
||||||
|
<pre><strong>BOM</strong><?php
|
||||||
|
header('Location: index.php');
|
||||||
|
?></pre>
|
||||||
|
|
||||||
|
<p>...will fail with the all too familiar <strong>Headers already sent</strong>
|
||||||
|
PHP error. And because the BOM is invisible, this culprit will go unnoticed.
|
||||||
|
My suggestion is to only use ASCII in PHP pages, but if you must, make
|
||||||
|
sure the page is saved WITHOUT the BOM.</p>
|
||||||
|
|
||||||
|
<blockquote class="aside">
|
||||||
|
<p>The headers the error is referring to are <strong>HTTP headers</strong>,
|
||||||
|
which are sent to the browser before any HTML to tell it various
|
||||||
|
information. The moment any regular text (and yes, a BOM counts as
|
||||||
|
ordinary text) is output, the headers must be sent, and you are
|
||||||
|
not allowed to send anymore. Thus, the error.</p>
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
<p>If you are reading in text files to insert into the middle of another
|
||||||
|
page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte
|
||||||
|
sequence for BOM <code>"\xEF\xBB\xBF"</code> before inserting it in,
|
||||||
|
via:</p>
|
||||||
|
|
||||||
|
<pre>$text = str_replace("\xEF\xBB\xBF", '', $text);</pre>
|
||||||
|
|
||||||
|
<h3 id="migrate-fonts">Fonts</h3>
|
||||||
|
|
||||||
|
<p>Generally speaking, people who are having trouble with fonts fall
|
||||||
|
into two categories:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Those who want to
|
||||||
|
use an extremely obscure language for which there is very little
|
||||||
|
support even among native speakers of the language, and</li>
|
||||||
|
<li>Those where the primary language of the text is
|
||||||
|
well-supported but there are occasional characters
|
||||||
|
that aren't supported.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Yes, there's always a chance where an English user happens across
|
||||||
|
a Sinhalese website and doesn't have the right font. But an English user
|
||||||
|
who happens not to have the right fonts probably has no business reading Sinhalese
|
||||||
|
anyway. So we'll deal with the other two edge cases.</p>
|
||||||
|
|
||||||
|
<h4 id="migrate-fonts-obscure">Obscure scripts</h4>
|
||||||
|
|
||||||
|
<p>If you run a Bengali website, you may get comments from users who
|
||||||
|
would like to read your website but get heaps of question marks or
|
||||||
|
other meaningless characters. Fixing this problem requires the
|
||||||
|
installation of a font or language pack which is often highly
|
||||||
|
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
|
||||||
|
of such a help file for the Bengali language, I am sure there are
|
||||||
|
others out there too. You just have to point users to the appropriate
|
||||||
|
help file.</p>
|
||||||
|
|
||||||
|
<h4 id="migrate-fonts-occasional">Occasional use</h4>
|
||||||
|
|
||||||
|
<p>A prime example of when you'll see some very obscure Unicode
|
||||||
|
characters embedded in what otherwise would be very bland ASCII are
|
||||||
|
letters of the
|
||||||
|
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
|
||||||
|
Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
|
||||||
|
manner (you probably see them all the time in your dictionary). Your
|
||||||
|
average font probably won't have support for all of the IPA characters
|
||||||
|
like ʘ (bilabial click) or ʒ (voiced postalveolar fricative).
|
||||||
|
So what's a poor browser to do? Font mix! Smart browsers like Mozilla Firefox
|
||||||
|
and Internet Explorer 7 will borrow glyphs from other fonts in order
|
||||||
|
to make sure that all the characters display properly.</p>
|
||||||
|
|
||||||
|
<p>But what happens when the browser isn't smart and happens to be the
|
||||||
|
most widely used browser in the entire world? Microsoft IE 6
|
||||||
|
is not smart enough to borrow from other fonts when a character isn't
|
||||||
|
present, so more often than not you'll be slapped with a nice big �.
|
||||||
|
To get things to work, MSIE 6 needs a little nudge. You could configure it
|
||||||
|
to use a different font to render the text, but you can acheive the same
|
||||||
|
effect by selectively changing the font for blocks of special characters
|
||||||
|
to known good Unicode fonts.</p>
|
||||||
|
|
||||||
|
<p>Fortunantely, the folks over at Wikipedia have already done all the
|
||||||
|
heavy lifting for you. Get the CSS from the horses mouth here:
|
||||||
|
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
|
||||||
|
and search for ".IPA" There are also a smattering of
|
||||||
|
other classes you can use for other purposes, check out
|
||||||
|
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters#Displaying_Special_Characters">this page</a>
|
||||||
|
for more details. For you lazy ones, this should work:</p>
|
||||||
|
|
||||||
|
<pre>.Unicode {
|
||||||
|
font-family: Code2000, "TITUS Cyberbit Basic", "Doulos SIL",
|
||||||
|
"Chrysanthi Unicode", "Bitstream Cyberbit",
|
||||||
|
"Bitstream CyberBase", Thryomanes, Gentium, GentiumAlt,
|
||||||
|
"Lucida Grande", "Arial Unicode MS", "Microsoft Sans Serif",
|
||||||
|
"Lucida Sans Unicode";
|
||||||
|
font-family /**/:inherit; /* resets fonts for everyone but IE6 */
|
||||||
|
}</pre>
|
||||||
|
|
||||||
|
<p>The standard usage goes along the lines of <code><span class="Unicode">Crazy
|
||||||
|
Unicode stuff here</span></code>. Characters in the
|
||||||
|
<a href="http://en.wikipedia.org/wiki/Windows_Glyph_List_4">Windows Glyph List</a>
|
||||||
|
usually don't need to be fixed, but for anything else you probably
|
||||||
|
want to play it safe. Unless, of course, you don't care about IE6
|
||||||
|
users.</p>
|
||||||
|
|
||||||
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
||||||
|
|
||||||
|
<p>When people claim that PHP6 will solve all our Unicode problems, they're
|
||||||
|
misinformed. It will not fix any of the abovementioned troubles. It will,
|
||||||
|
however, fix the problem we are about to discuss: processing UTF-8 text
|
||||||
|
in PHP.</p>
|
||||||
|
|
||||||
|
<p>PHP (as of PHP5) is blithely unaware of the existence of UTF-8 (with a few
|
||||||
|
notable exceptions). Sometimes, this will cause problems, other times,
|
||||||
|
this won't. So far, we've avoided discussing the architecture of
|
||||||
|
UTF-8, so, we must first ask, what is UTF-8? Yes, it supports Unicode,
|
||||||
|
and yes, it is variable width. Other traits:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Every character's byte sequence is unique and will never be found
|
||||||
|
inside the byte sequence of another character,</li>
|
||||||
|
<li>UTF-8 may use up to four bytes to encode a character,</li>
|
||||||
|
<li>UTF-8 text must be checked for well-formedness,</li>
|
||||||
|
<li>Pure ASCII is also valid UTF-8, and</li>
|
||||||
|
<li>Binary sorting will sort UTF-8 in the same order as Unicode.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Each of these traits affect different domains of text processing
|
||||||
|
in different ways. It is beyond the scope of this document to explain
|
||||||
|
what precisely these implications are. PHPWact provides
|
||||||
|
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||||
|
on what to expect from each functions, although coverage is spotty in
|
||||||
|
some areas. Their more general notes on
|
||||||
|
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||||
|
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||||
|
when dealing with Unicode text:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Do not EVER use functions that:<ul>
|
||||||
|
<li>...convert case (strtolower, strtoupper, ucfirst, ucwords)</li>
|
||||||
|
<li>...claim to be case-insensitive (str_ireplace, stristr, strcasecmp)</li>
|
||||||
|
</ul></li>
|
||||||
|
<li>Think twice before using functions that:<ul>
|
||||||
|
<li>...count characters (strlen will return bytes, not characters;
|
||||||
|
str_split and word_wrap may corrupt)</li>
|
||||||
|
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
||||||
|
<li>...do very complex string processing (*printf)</li>
|
||||||
|
</ul></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>...and always think in bytes, not characters. If you use strpos()
|
||||||
|
to find the position of a character, it will be in bytes, but this
|
||||||
|
usually won't matter since substr() also operates with byte indices!</p>
|
||||||
|
|
||||||
|
<p>You'll also need to make sure your UTF-8 is well-formed and will
|
||||||
|
probably need replacements for some of these functions. I recommend
|
||||||
|
using Harry Fuecks' <a href="http://phputf8.sourceforge.net/">PHP
|
||||||
|
UTF-8</a> library, rather than use mb_string directly. HTML Purifier
|
||||||
|
also defines a few useful UTF-8 compatible functions: check out
|
||||||
|
<code>Encoder.php</code> in the <code>/library/HTMLPurifier/</code>
|
||||||
|
directory.</p>
|
||||||
|
|
||||||
<h2 id="externallinks">Further Reading</h2>
|
<h2 id="externallinks">Further Reading</h2>
|
||||||
|
|
||||||
|
<p>Well, that's it. Hopefully this document has served as a very
|
||||||
|
practical springboard into knowledge of how UTF-8 works. You may have
|
||||||
|
decided that you don't want to migrate yet: that's fine, just know
|
||||||
|
what will happen to your output and what bug reports you may recieve.</p>
|
||||||
|
|
||||||
<p>Many other developers have already discussed the subject of Unicode,
|
<p>Many other developers have already discussed the subject of Unicode,
|
||||||
UTF-8 and internationalization, and I would like to defer to them for
|
UTF-8 and internationalization, and I would like to defer to them for
|
||||||
a more in-depth look into character sets and encodings.</p>
|
a more in-depth look into character sets and encodings.</p>
|
||||||
|
6
docs/fixquotes.htc
Normal file
6
docs/fixquotes.htc
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<public:attach event="oncontentready" onevent="init();" />
|
||||||
|
<script>
|
||||||
|
function init() {
|
||||||
|
element.innerHTML = '“'+element.innerHTML+'”';
|
||||||
|
}
|
||||||
|
</script>
|
@ -31,7 +31,7 @@ information for casual developers using HTML Purifier.</p>
|
|||||||
<dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
|
<dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
|
||||||
<dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>
|
<dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>
|
||||||
|
|
||||||
<dt><a href="enduser-utf8.html">UTF-8</a></dt>
|
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
||||||
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
@ -54,6 +54,10 @@ conventions.</p>
|
|||||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||||
|
|
||||||
|
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||||
|
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||||
|
custom filtering behavior.</dd>
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<h2>Proposals</h2>
|
<h2>Proposals</h2>
|
||||||
|
@ -7,7 +7,7 @@ value is used for. This means decentralized configuration declarations that
|
|||||||
are nevertheless error checking and a centralized configuration object.
|
are nevertheless error checking and a centralized configuration object.
|
||||||
|
|
||||||
Directives are divided into namespaces, indicating the major portion of
|
Directives are divided into namespaces, indicating the major portion of
|
||||||
functionality they cover (although there may be overlaps. Please consult
|
functionality they cover (although there may be overlaps). Please consult
|
||||||
the documentation in ConfigDef for more information on these namespaces.
|
the documentation in ConfigDef for more information on these namespaces.
|
||||||
|
|
||||||
Since configuration is dependant on context, internal classes require a
|
Since configuration is dependant on context, internal classes require a
|
||||||
@ -36,4 +36,5 @@ the definition, you'd have to force reconstruction.
|
|||||||
|
|
||||||
In practice, the pulling directives from the config object are
|
In practice, the pulling directives from the config object are
|
||||||
solely need-based, and the flex points are littered throughout the
|
solely need-based, and the flex points are littered throughout the
|
||||||
setup() function. Some sort of refactoring is likely in order.
|
setup() function. Some sort of refactoring is likely in order. See
|
||||||
|
ref-xhtml-1.1.txt for more info.
|
||||||
|
@ -1,42 +1,6 @@
|
|||||||
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||||
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||||
|
|
||||||
== Structure ==
|
|
||||||
|
|
||||||
First, you have a Language object. This object contains all the localisable
|
|
||||||
message strings, as well as other important language-specific settings and
|
|
||||||
custom behavior (uppercasing, lowercasing, printing dates, formatting
|
|
||||||
numbers, etc.)
|
|
||||||
|
|
||||||
The object is constructed from two sources: subclassed versions of itself
|
|
||||||
(classes) and Message files (messages).
|
|
||||||
|
|
||||||
== General use ==
|
|
||||||
|
|
||||||
You load a language object by calling the Language::factory() function.
|
|
||||||
This function the class file for the object (taking in account fallback
|
|
||||||
languages by using the fallback langauge's object but overloading the
|
|
||||||
language key) and returns that object. Nothing else happens.
|
|
||||||
|
|
||||||
When a message/etc is requested, a lazy load initializor is called. Now the
|
|
||||||
real work starts. We're first going to take the scenario that the language
|
|
||||||
is not cached. The system loads the Messages file by:
|
|
||||||
|
|
||||||
require( $filename );
|
|
||||||
$cache = compact( self::$mLocalisationKeys );
|
|
||||||
|
|
||||||
...where self::$mLocalisationKeys is the name of variables that could be used
|
|
||||||
in the localization file. This lets you use things like:
|
|
||||||
|
|
||||||
$fallback = false;
|
|
||||||
$rtl = false;
|
|
||||||
|
|
||||||
...and easily siphon them into arrays.
|
|
||||||
|
|
||||||
Then, we load the $fallback language (if not set, English) to fill in the gaps in
|
|
||||||
the messages. There is specialized behavior for certain keys, as they can be
|
|
||||||
mergeable maps, lists or alias lists (not sure what the last one is).
|
|
||||||
|
|
||||||
== Caching ==
|
== Caching ==
|
||||||
|
|
||||||
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||||
|
@ -32,6 +32,6 @@ A tag's attribute 'target' (for selecting frames) cut
|
|||||||
current behavior: no substitute, just delete when in strict, allow in loose
|
current behavior: no substitute, just delete when in strict, allow in loose
|
||||||
Attribute 'name' deprecated in favor of 'id'
|
Attribute 'name' deprecated in favor of 'id'
|
||||||
current behavior: dropped silently
|
current behavior: dropped silently
|
||||||
projected behavior: create proper AttrTransform (currently not allowed at all)
|
projected behavior: create proper AttrTransform
|
||||||
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||||
current behavior: disallow as usual
|
current behavior: disallow as usual
|
||||||
|
@ -1,21 +1,187 @@
|
|||||||
|
|
||||||
Getting XHTML 1.1 Working
|
XHTML 1.1 and HTML Purifier
|
||||||
|
|
||||||
It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
|
||||||
|
|
||||||
|
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||||
1. Scratch lang entirely in favor of xml:lang
|
1. Scratch lang entirely in favor of xml:lang
|
||||||
2. Scratch name entirely in favor of id (partially-done)
|
2. Scratch name entirely in favor of id (partially-done)
|
||||||
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||||
|
|
||||||
...but that's only an informative section. More things to do:
|
HTML Purifier uses the modularization of XHTML
|
||||||
|
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||||
|
of HTMLDefinition into a more manageable and extensible fashion. Rather
|
||||||
|
than have one super-object, HTMLDefinition is split into HTMLModules,
|
||||||
|
each of which are responsible for defining elements, their attributes,
|
||||||
|
and other properties (for a more indepth coverage, see
|
||||||
|
/library/HTMLPurifier/HTMLModule.php's docblock comments).
|
||||||
|
|
||||||
1. Scratch style attribute (it's deprecated)
|
The modules that W3C defines and we support are:
|
||||||
2. Be module-aware (this might entail intelligent grouping in the definition
|
|
||||||
and allowing users to specifically remove certain modules (see 5))
|
* 5.1. Attribute Collections (technically not a module
|
||||||
3. Cross-reference minimal content models with existing DTDs and determine
|
* 5.2. Core Modules
|
||||||
changes (todo)
|
o 5.2.2. Text Module
|
||||||
4. Watch out for the Legacy Module
|
o 5.2.3. Hypertext Module
|
||||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_legacymodule>
|
o 5.2.4. List Module
|
||||||
5. Let users specify their own custom modules
|
* 5.4. Text Extension Modules
|
||||||
6. Study Modularization document
|
o 5.4.1. Presentation Module
|
||||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/>
|
o 5.4.2. Edit Module
|
||||||
|
o 5.4.3. Bi-directional Text Module
|
||||||
|
* 5.6. Table Modules
|
||||||
|
o 5.6.2. Tables Module
|
||||||
|
* 5.7. Image Module
|
||||||
|
* 5.18. Style Attribute Module
|
||||||
|
|
||||||
|
Modules that we don't support but coul support are:
|
||||||
|
|
||||||
|
* 5.6. Table Modules
|
||||||
|
o 5.6.1. Basic Tables Module [?]
|
||||||
|
* 5.8. Client-side Image Map Module [?]
|
||||||
|
* 5.9. Server-side Image Map Module [?]
|
||||||
|
* 5.12. Target Module [?]
|
||||||
|
* 5.21. Name Identification Module [deprecated]
|
||||||
|
* 5.22. Legacy Module [deprecated]
|
||||||
|
|
||||||
|
These modules will not be implemented due to their dangerousness or
|
||||||
|
inapplicability as an XHTML fragment:
|
||||||
|
|
||||||
|
* 5.2. Core Modules
|
||||||
|
o 5.2.1. Structure Module
|
||||||
|
* 5.3. Applet Module
|
||||||
|
* 5.5. Forms Modules
|
||||||
|
o 5.5.1. Basic Forms Module
|
||||||
|
o 5.5.2. Forms Module
|
||||||
|
* 5.10. Object Module
|
||||||
|
* 5.11. Frames Module
|
||||||
|
* 5.13. Iframe Module
|
||||||
|
* 5.14. Intrinsic Events Module
|
||||||
|
* 5.15. Metainformation Module
|
||||||
|
* 5.16. Scripting Module
|
||||||
|
* 5.17. Style Sheet Module
|
||||||
|
* 5.19. Link Module
|
||||||
|
* 5.20. Base Module
|
||||||
|
|
||||||
|
We will not be using W3C's XML Schemas or DTDs directly due to the lack
|
||||||
|
of robust tools for handling them (the main problem is that all the
|
||||||
|
current parsers are usually PHP 5 only and solely-validating, not
|
||||||
|
correcting).
|
||||||
|
|
||||||
|
The abstraction of the HTMLDefinition creation process will also
|
||||||
|
contribute to a need for a caching system. Cache invalidation would be
|
||||||
|
difficult, but could be done by comparing the HTML and Attr config
|
||||||
|
namespaces with a copy that was packaged along with the serialized
|
||||||
|
HTMLDefinition object.
|
||||||
|
|
||||||
|
== General Use-Case ==
|
||||||
|
|
||||||
|
The outwards API of HTMLDefinition has been largely preserved, not
|
||||||
|
only for backwards-compatibility but also by design. Instead,
|
||||||
|
HTMLDefinition can be retrieved "raw", in which it loads a structure
|
||||||
|
that closely resembles the modules of XHTML 1.1. This structure is very
|
||||||
|
dynamic, making it easy to make cascading changes to global content
|
||||||
|
sets or remove elements in bulk.
|
||||||
|
|
||||||
|
However, once HTML Purifier needs the actual definition, it retrieves
|
||||||
|
a finalized version of HTMLDefinition. The finalized definition involves
|
||||||
|
processing the modules into a form that it is optimized for multiple
|
||||||
|
calls. This final version is immutable and, even if editable, would
|
||||||
|
be extremely hard to change.
|
||||||
|
|
||||||
|
So, some code taking advantage of the XHTML modularization may look
|
||||||
|
like this:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$def =& $config->getHTMLDefinition(true); // reference to raw
|
||||||
|
unset($def->modules['Hypertext']); // rm ''a'' link
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
$purifier->purify($html); // now the definition is finalized
|
||||||
|
?>
|
||||||
|
|
||||||
|
== Inclusions ==
|
||||||
|
|
||||||
|
One of the nice features of HTMLDefinition is that piggy-backing off
|
||||||
|
of global attribute and content sets is extremely easy to do.
|
||||||
|
|
||||||
|
=== Attributes ===
|
||||||
|
|
||||||
|
HTMLModule->elements[$element]->attr stores attribute information for the
|
||||||
|
specific attributes of $element. This is quite close to the final
|
||||||
|
API that HTML Purifier interfaces with, but there's an important
|
||||||
|
extra feature: attr may also contain a array with a member index zero.
|
||||||
|
|
||||||
|
<?php
|
||||||
|
HTMLModule->elements[$element]->attr[0] = array('AttrSet');
|
||||||
|
?>
|
||||||
|
|
||||||
|
Rather than map the attribute key 0 to an array (which should be
|
||||||
|
an AttrDef), it defines a number of attribute collections that should
|
||||||
|
be merged into this elements attribute array.
|
||||||
|
|
||||||
|
Furthermore, the value of an attribute key, attribute value pair need
|
||||||
|
not be a fully fledged AttrDef object. They can also be a string, which
|
||||||
|
signifies a AttrDef that is looked up from a centralized registry
|
||||||
|
AttrTypes. This allows more concise attribute definitions that look
|
||||||
|
more like W3C's declarations, as well as offering a centralized point
|
||||||
|
for modifying the behavior of one attribute type. And, of course, the
|
||||||
|
old method of manually instantiating an AttrDef still works.
|
||||||
|
|
||||||
|
=== Attribute Collections ===
|
||||||
|
|
||||||
|
Attribute collections are stored and processed in the AttrCollections
|
||||||
|
object, which is responsible for performing the inclusions signified
|
||||||
|
by the 0 index. These attribute collections, too, are mutable, by
|
||||||
|
using HTMLModule->attr_collections. You may add new attributes
|
||||||
|
to a collection or define an entirely new collection for your module's
|
||||||
|
use. Inclusions can also be cumulative.
|
||||||
|
|
||||||
|
Attribute collections allow us to get rid of so called "global attributes"
|
||||||
|
(which actually aren't so global).
|
||||||
|
|
||||||
|
=== Content Models and ChildDef ===
|
||||||
|
|
||||||
|
An implementation of the above-mentioned attributes and attribute
|
||||||
|
collections was applied to the ChildDef system. HTML Purifier uses
|
||||||
|
a proprietary system called ChildDef for performance and flexibility
|
||||||
|
reasons, but this does not line up very well with W3C's notion of
|
||||||
|
regexps for defining the allowed children of an element.
|
||||||
|
|
||||||
|
HTMLPurifier->elements[$element]->content_model and
|
||||||
|
HTMLPurifier->elements[$element]->content_model_type store information
|
||||||
|
about the final ChildDef that will be stored in
|
||||||
|
HTMLPurifier->elements[$element]->child (we use a different variable
|
||||||
|
because the two forms are sufficiently different).
|
||||||
|
|
||||||
|
$content_model is an abstract, string representation of the internal
|
||||||
|
state of ChildDef, while $content_model_type is a string identifier
|
||||||
|
of which ChildDef subclass to instantiate. $content_model is processed
|
||||||
|
by substituting all content set identifiers (capitalized element names)
|
||||||
|
with their contents. It is then parsed and passed into the appropriate
|
||||||
|
ChildDef class, as defined by the ContentSets->getChildDef() or the
|
||||||
|
custom fallback HTMLModule->getChildDef() for custom child definitions
|
||||||
|
not in the core.
|
||||||
|
|
||||||
|
You'll need to use these facilities if you plan on referencing a content
|
||||||
|
set like "Inline" or "Block", and using them is recommended even if you're
|
||||||
|
not due to their conciseness.
|
||||||
|
|
||||||
|
A few notes on $content_model: it's structure can be as complicated
|
||||||
|
as you want, but the pipe symbol (|) is reserved for defining possible
|
||||||
|
choices, due to the content sets implementation. For example, a content
|
||||||
|
model that looks like:
|
||||||
|
|
||||||
|
"Inline -> Block -> a"
|
||||||
|
|
||||||
|
...when the Inline content set is defined as "span | b" and the Block
|
||||||
|
content set is defined as "div | blockquote", will expand into:
|
||||||
|
|
||||||
|
"span | b -> div | blockquote -> a"
|
||||||
|
|
||||||
|
The custom HTMLModule->getChildDef() function will need to be able to
|
||||||
|
then feed this information to ChildDef in a usable manner.
|
||||||
|
|
||||||
|
=== Content Sets ===
|
||||||
|
|
||||||
|
Content sets can be altered using HTMLModule->content_sets, an associative
|
||||||
|
array of content set names to content set contents. If the content set
|
||||||
|
already exists, your values are appended on to it (great for, say,
|
||||||
|
registering the font tag as an inline element), otherwise it is
|
||||||
|
created. They are substituted into content_model.
|
@ -42,3 +42,27 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
|||||||
|
|
||||||
/* Contains, without exception, $Id$, for SVN version info. */
|
/* Contains, without exception, $Id$, for SVN version info. */
|
||||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
#version {text-align:right; font-style:italic; margin:2em 0;}
|
||||||
|
|
||||||
|
#toc ol ol {list-style-type:lower-roman;}
|
||||||
|
#toc ol {list-style-type:decimal;}
|
||||||
|
#toc {list-style-type:upper-alpha;}
|
||||||
|
|
||||||
|
q {
|
||||||
|
behavior: url(fixquotes.htc); /* IE fix */
|
||||||
|
quotes: '\201C' '\201D' '\2018' '\2019';
|
||||||
|
}
|
||||||
|
q:before {
|
||||||
|
content: open-quote;
|
||||||
|
}
|
||||||
|
q:after {
|
||||||
|
content: close-quote;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Marks off implementation details interesting only to the person writing
|
||||||
|
the class described in the spec. */
|
||||||
|
.technical {margin-left:2em; }
|
||||||
|
.technical:before {content:"Technical note: "; font-weight:bold; color:#061; }
|
||||||
|
|
||||||
|
/* Marks off sections that are lacking. */
|
||||||
|
.fixme {margin-left:2em; }
|
||||||
|
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||||
|
100
library/HTMLPurifier/AttrCollections.php
Normal file
100
library/HTMLPurifier/AttrCollections.php
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrTypes.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines common attribute collections that modules reference
|
||||||
|
*/
|
||||||
|
|
||||||
|
class HTMLPurifier_AttrCollections
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of attribute collections, indexed by name
|
||||||
|
* @note Technically, the composition of these is more complicated,
|
||||||
|
* but we bypass it using our own excludes property
|
||||||
|
*/
|
||||||
|
var $info = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs all expansions on internal data for use by other inclusions
|
||||||
|
* It also collects all attribute collection extensions from
|
||||||
|
* modules
|
||||||
|
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||||
|
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||||
|
$info =& $this->info;
|
||||||
|
// load extensions from the modules
|
||||||
|
foreach ($modules as $module) {
|
||||||
|
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||||
|
foreach ($coll as $attr_i => $attr) {
|
||||||
|
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
|
||||||
|
// merge in includes
|
||||||
|
$info[$coll_i][$attr_i] = array_merge(
|
||||||
|
$info[$coll_i][$attr_i], $attr);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$info[$coll_i][$attr_i] = $attr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// perform internal expansions and inclusions
|
||||||
|
foreach ($info as $name => $attr) {
|
||||||
|
// merge attribute collections that include others
|
||||||
|
$this->performInclusions($info[$name]);
|
||||||
|
// replace string identifiers with actual attribute objects
|
||||||
|
$this->expandIdentifiers($info[$name], $attr_types);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a reference to an attribute associative array and performs
|
||||||
|
* all inclusions specified by the zero index.
|
||||||
|
* @param &$attr Reference to attribute array
|
||||||
|
*/
|
||||||
|
function performInclusions(&$attr) {
|
||||||
|
if (!isset($attr[0])) return;
|
||||||
|
$merge = $attr[0];
|
||||||
|
// loop through all the inclusions
|
||||||
|
for ($i = 0; isset($merge[$i]); $i++) {
|
||||||
|
// foreach attribute of the inclusion, copy it over
|
||||||
|
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||||
|
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||||
|
$attr[$key] = $value;
|
||||||
|
}
|
||||||
|
if (isset($info[$merge[$i]][0])) {
|
||||||
|
// recursion
|
||||||
|
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unset($attr[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expands all string identifiers in an attribute array by replacing
|
||||||
|
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||||
|
* @param &$attr Reference to attribute array
|
||||||
|
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||||
|
*/
|
||||||
|
function expandIdentifiers(&$attr, $attr_types) {
|
||||||
|
foreach ($attr as $def_i => $def) {
|
||||||
|
if ($def_i === 0) continue;
|
||||||
|
if (!is_string($def)) continue;
|
||||||
|
if ($def === false) {
|
||||||
|
unset($attr[$def_i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isset($attr_types->info[$def])) {
|
||||||
|
$attr[$def_i] = $attr_types->info[$def];
|
||||||
|
} else {
|
||||||
|
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
|
||||||
|
unset($attr[$def_i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/CSSDefinition.php';
|
|||||||
* Validates shorthand CSS property background.
|
* Validates shorthand CSS property background.
|
||||||
* @warning Does not support url tokens that have internal spaces.
|
* @warning Does not support url tokens that have internal spaces.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -16,7 +16,7 @@ class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
var $info;
|
var $info;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Background($config) {
|
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||||
$def = $config->getCSSDefinition();
|
$def = $config->getCSSDefinition();
|
||||||
$this->info['background-color'] = $def->info['background-color'];
|
$this->info['background-color'] = $def->info['background-color'];
|
||||||
$this->info['background-image'] = $def->info['background-image'];
|
$this->info['background-image'] = $def->info['background-image'];
|
@ -1,8 +1,8 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||||
|
|
||||||
/* W3C says:
|
/* W3C says:
|
||||||
[ // adjective and number must be in correct order, even if
|
[ // adjective and number must be in correct order, even if
|
||||||
@ -45,15 +45,15 @@ require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
|||||||
/**
|
/**
|
||||||
* Validates the value of background-position.
|
* Validates the value of background-position.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_BackgroundPosition extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $length;
|
var $length;
|
||||||
var $percentage;
|
var $percentage;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_BackgroundPosition() {
|
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||||
$this->length = new HTMLPurifier_AttrDef_CSSLength();
|
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||||
$this->percentage = new HTMLPurifier_AttrDef_Percentage();
|
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
/**
|
/**
|
||||||
* Validates the border property as defined by CSS.
|
* Validates the border property as defined by CSS.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
var $info = array();
|
var $info = array();
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Border($config) {
|
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||||
$def = $config->getCSSDefinition();
|
$def = $config->getCSSDefinition();
|
||||||
$this->info['border-width'] = $def->info['border-width'];
|
$this->info['border-width'] = $def->info['border-width'];
|
||||||
$this->info['border-style'] = $def->info['border-style'];
|
$this->info['border-style'] = $def->info['border-style'];
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
/**
|
/**
|
||||||
* Validates Color as defined by CSS.
|
* Validates Color as defined by CSS.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Color extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
@ -9,7 +9,7 @@
|
|||||||
* especially useful for CSS values, which often are a choice between
|
* especially useful for CSS values, which often are a choice between
|
||||||
* an enumerated set of predefined values or a flexible data type.
|
* an enumerated set of predefined values or a flexible data type.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -21,7 +21,7 @@ class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
|||||||
/**
|
/**
|
||||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_Composite($defs) {
|
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||||
$this->defs = $defs;
|
$this->defs = $defs;
|
||||||
}
|
}
|
||||||
|
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
/**
|
/**
|
||||||
* Validates shorthand CSS property font.
|
* Validates shorthand CSS property font.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
|||||||
'status-bar' => true
|
'status-bar' => true
|
||||||
);
|
);
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Font($config) {
|
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||||
$def = $config->getCSSDefinition();
|
$def = $config->getCSSDefinition();
|
||||||
$this->info['font-style'] = $def->info['font-style'];
|
$this->info['font-style'] = $def->info['font-style'];
|
||||||
$this->info['font-variant'] = $def->info['font-variant'];
|
$this->info['font-variant'] = $def->info['font-variant'];
|
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
/**
|
/**
|
||||||
* Validates a font family list according to CSS spec
|
* Validates a font family list according to CSS spec
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
@ -1,13 +1,12 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a Length as defined by CSS.
|
* Represents a Length as defined by CSS.
|
||||||
* @warning Be sure not to confuse this with HTMLPurifier_AttrDef_Length!
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -26,8 +25,8 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
|||||||
* @param $non_negative Bool indication whether or not negative values are
|
* @param $non_negative Bool indication whether or not negative values are
|
||||||
* allowed.
|
* allowed.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_CSSLength($non_negative = false) {
|
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($length, $config, &$context) {
|
function validate($length, $config, &$context) {
|
@ -6,16 +6,16 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
* Validates shorthand CSS property list-style.
|
* Validates shorthand CSS property list-style.
|
||||||
* @warning Does not support url tokens that have internal spaces.
|
* @warning Does not support url tokens that have internal spaces.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Local copy of component validators.
|
* Local copy of component validators.
|
||||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||||
*/
|
*/
|
||||||
var $info;
|
var $info;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_ListStyle($config) {
|
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||||
$def = $config->getCSSDefinition();
|
$def = $config->getCSSDefinition();
|
||||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
$this->info['list-style-position'] = $def->info['list-style-position'];
|
@ -13,7 +13,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
* can only be used alone: it will never manifest as part of a multi
|
* can only be used alone: it will never manifest as part of a multi
|
||||||
* shorthand declaration. Thus, this class does not allow inherit.
|
* shorthand declaration. Thus, this class does not allow inherit.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
|||||||
* @param $single HTMLPurifier_AttrDef to multiply
|
* @param $single HTMLPurifier_AttrDef to multiply
|
||||||
* @param $max Max number of values allowed (usually four)
|
* @param $max Max number of values allowed (usually four)
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_Multiple($single, $max = 4) {
|
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||||
$this->single = $single;
|
$this->single = $single;
|
||||||
$this->max = $max;
|
$this->max = $max;
|
||||||
}
|
}
|
@ -3,7 +3,7 @@
|
|||||||
/**
|
/**
|
||||||
* Validates a number as defined by the CSS spec.
|
* Validates a number as defined by the CSS spec.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
|||||||
/**
|
/**
|
||||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_Number($non_negative = false) {
|
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||||
$this->non_negative = $non_negative;
|
$this->non_negative = $non_negative;
|
||||||
}
|
}
|
||||||
|
|
@ -1,24 +1,24 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a Percentage as defined by the CSS spec.
|
* Validates a Percentage as defined by the CSS spec.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation
|
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||||
*/
|
*/
|
||||||
var $number_def;
|
var $number_def;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param Bool indicating whether to forbid negative values
|
* @param Bool indicating whether to forbid negative values
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_Percentage($non_negative = false) {
|
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
* @note This class could be generalized into a version that acts sort of
|
* @note This class could be generalized into a version that acts sort of
|
||||||
* like Enum except you can compound the allowed values.
|
* like Enum except you can compound the allowed values.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_TextDecoration extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
@ -4,17 +4,17 @@ require_once 'HTMLPurifier/AttrDef/URI.php';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
||||||
* @note While theoretically speaking we a URI in a CSS document could
|
* @note While theoretically speaking a URI in a CSS document could
|
||||||
* be non-embedded, as of CSS2 there is no such usage so we're
|
* be non-embedded, as of CSS2 there is no such usage so we're
|
||||||
* generalizing it. This may need to be changed in the future.
|
* generalizing it. This may need to be changed in the future.
|
||||||
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
||||||
* the separator, you cannot put a literal semicolon in
|
* the separator, you cannot put a literal semicolon in
|
||||||
* in the URI. Try percent encoding it, in that case.
|
* in the URI. Try percent encoding it, in that case.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_CSSURI extends HTMLPurifier_AttrDef_URI
|
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||||
{
|
{
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_CSSURI() {
|
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||||
}
|
}
|
||||||
|
|
@ -25,8 +25,8 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
|||||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_Enum(
|
function HTMLPurifier_AttrDef_Enum(
|
||||||
$valid_values = array(), $case_sensitive = false) {
|
$valid_values = array(), $case_sensitive = false
|
||||||
|
) {
|
||||||
$this->valid_values = array_flip($valid_values);
|
$this->valid_values = array_flip($valid_values);
|
||||||
$this->case_sensitive = $case_sensitive;
|
$this->case_sensitive = $case_sensitive;
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,22 @@
|
|||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Attr', 'EnableID', false, 'bool',
|
||||||
|
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||||
|
'due to the fact that without proper configuration user input can '.
|
||||||
|
'easily break the validation of a webpage by specifying an ID that is '.
|
||||||
|
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||||
|
'the wind, enable this directive, but I strongly recommend you also '.
|
||||||
|
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||||
|
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||||
|
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||||
|
'versions.'
|
||||||
|
);
|
||||||
|
HTMLPurifier_ConfigSchema::defineAlias(
|
||||||
|
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
|
||||||
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'IDPrefix', '', 'string',
|
'Attr', 'IDPrefix', '', 'string',
|
||||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||||
@ -36,11 +52,16 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
* blacklist. If you're hacking around, make sure you use load()!
|
* blacklist. If you're hacking around, make sure you use load()!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// ref functionality disabled, since we also have to verify
|
||||||
|
// whether or not the ID it refers to exists
|
||||||
|
|
||||||
function validate($id, $config, &$context) {
|
function validate($id, $config, &$context) {
|
||||||
|
|
||||||
|
if (!$config->get('Attr', 'EnableID')) return false;
|
||||||
|
|
||||||
$id = trim($id); // trim it first
|
$id = trim($id); // trim it first
|
||||||
|
|
||||||
if ($id === '') return false;
|
if ($id === '') return false;
|
||||||
@ -55,8 +76,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
|||||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
$id_accumulator =& $context->get('IDAccumulator');
|
//if (!$this->ref) {
|
||||||
if (isset($id_accumulator->ids[$id])) return false;
|
$id_accumulator =& $context->get('IDAccumulator');
|
||||||
|
if (isset($id_accumulator->ids[$id])) return false;
|
||||||
|
//}
|
||||||
|
|
||||||
// we purposely avoid using regex, hopefully this is faster
|
// we purposely avoid using regex, hopefully this is faster
|
||||||
|
|
||||||
@ -71,7 +94,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
|||||||
$result = ($trim === '');
|
$result = ($trim === '');
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($result) $id_accumulator->add($id);
|
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||||
|
|
||||||
// if no change was made to the ID, return the result
|
// if no change was made to the ID, return the result
|
||||||
// else, return the new id if stripping whitespace made it
|
// else, return the new id if stripping whitespace made it
|
@ -1,18 +1,16 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates the HTML type length (not to be confused with CSS's length).
|
* Validates the HTML type length (not to be confused with CSS's length).
|
||||||
*
|
*
|
||||||
* This accepts integer pixels or percentages as lengths for certain
|
* This accepts integer pixels or percentages as lengths for certain
|
||||||
* HTML attributes. Don't use this for CSS: that's
|
* HTML attributes.
|
||||||
* HTMLPurifier_AttrDef_CSSLength which requires prefixes and allows a lot
|
|
||||||
* more different types.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
|
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a MultiLength as defined by the HTML spec.
|
* Validates a MultiLength as defined by the HTML spec.
|
||||||
@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/Length.php';
|
|||||||
* A multilength is either a integer (pixel count), a percentage, or
|
* A multilength is either a integer (pixel count), a percentage, or
|
||||||
* a relative number.
|
* a relative number.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
||||||
@ -27,12 +27,14 @@ class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
|||||||
|
|
||||||
$int = substr($string, 0, $length - 1);
|
$int = substr($string, 0, $length - 1);
|
||||||
|
|
||||||
|
if ($int == '') return '*';
|
||||||
if (!is_numeric($int)) return false;
|
if (!is_numeric($int)) return false;
|
||||||
|
|
||||||
$int = (int) $int;
|
$int = (int) $int;
|
||||||
|
|
||||||
if ($int < 0) return '0*';
|
if ($int < 0) return false;
|
||||||
|
if ($int == 0) return '0';
|
||||||
|
if ($int == 1) return '*';
|
||||||
return ((string) $int) . '*';
|
return ((string) $int) . '*';
|
||||||
|
|
||||||
}
|
}
|
@ -4,9 +4,13 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates the contents of the global HTML attribute class.
|
* Validates contents based on NMTOKENS attribute type.
|
||||||
|
* @note The only current use for this is the class attribute in HTML
|
||||||
|
* @note Could have some functionality factored out into Nmtoken class
|
||||||
|
* @warning We cannot assume this class will be used only for 'class'
|
||||||
|
* attributes. Not sure how to hook in magic behavior, then.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
||||||
@ -31,10 +35,10 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
if (empty($matches[1])) return false;
|
if (empty($matches[1])) return false;
|
||||||
|
|
||||||
// reconstruct class string
|
// reconstruct string
|
||||||
$new_string = '';
|
$new_string = '';
|
||||||
foreach ($matches[1] as $class_names) {
|
foreach ($matches[1] as $token) {
|
||||||
$new_string .= $class_names . ' ';
|
$new_string .= $token . ' ';
|
||||||
}
|
}
|
||||||
$new_string = rtrim($new_string);
|
$new_string = rtrim($new_string);
|
||||||
|
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
/**
|
/**
|
||||||
* Validates an integer representation of pixels according to the HTML spec.
|
* Validates an integer representation of pixels according to the HTML spec.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -46,7 +46,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
// process second subtag : $subtags[1]
|
// process second subtag : $subtags[1]
|
||||||
$length = strlen($subtags[1]);
|
$length = strlen($subtags[1]);
|
||||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||||
return $new_string;
|
return $new_string;
|
||||||
}
|
}
|
||||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/URIScheme.php';
|
require_once 'HTMLPurifier/URIScheme.php';
|
||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
@ -77,6 +77,14 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'This directive has been available since 1.3.0.'
|
'This directive has been available since 1.3.0.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'Disable', false, 'bool',
|
||||||
|
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||||
|
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a URI as defined by RFC 3986.
|
* Validates a URI as defined by RFC 3986.
|
||||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||||
@ -92,7 +100,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
$this->embeds_resource = (bool) $embeds_resource;
|
$this->embeds_resource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
@ -102,6 +110,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// We'll write stack-based parsers later, for now, use regexps to
|
// We'll write stack-based parsers later, for now, use regexps to
|
||||||
// get things working as fast as possible (irony)
|
// get things working as fast as possible (irony)
|
||||||
|
|
||||||
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// parse as CDATA
|
// parse as CDATA
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
@ -1,12 +1,12 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Email.php';
|
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Primitive email validation class based on the regexp found at
|
* Primitive email validation class based on the regexp found at
|
||||||
* http://www.regular-expressions.info/email.html
|
* http://www.regular-expressions.info/email.html
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
|
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -1,28 +1,28 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv4;
|
var $ipv4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv6;
|
var $ipv6;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Host() {
|
function HTMLPurifier_AttrDef_URI_Host() {
|
||||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
|
||||||
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
|
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
@ -6,7 +6,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
* Validates an IPv4 address
|
* Validates an IPv4 address
|
||||||
* @author Feyd @ forums.devnetwork.net (public domain)
|
* @author Feyd @ forums.devnetwork.net (public domain)
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
var $ip4;
|
var $ip4;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_IPv4() {
|
function HTMLPurifier_AttrDef_URI_IPv4() {
|
||||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||||
}
|
}
|
@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates an IPv6 address.
|
* Validates an IPv6 address.
|
||||||
@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
|||||||
* @note This function requires brackets to have been removed from address
|
* @note This function requires brackets to have been removed from address
|
||||||
* in URI.
|
* in URI.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
|
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||||
{
|
{
|
||||||
|
|
||||||
function validate($aIP, $config, &$context) {
|
function validate($aIP, $config, &$context) {
|
41
library/HTMLPurifier/AttrTypes.php
Normal file
41
library/HTMLPurifier/AttrTypes.php
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Text.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_AttrTypes
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Lookup array of attribute string identifiers to concrete implementations
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the info array
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_AttrTypes() {
|
||||||
|
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
|
||||||
|
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
|
||||||
|
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
|
||||||
|
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
|
||||||
|
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
|
||||||
|
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
|
||||||
|
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
|
||||||
|
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||||
|
|
||||||
|
// number is really a positive integer (one or more digits)
|
||||||
|
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -1,19 +1,19 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Color.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Composite.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Multiple.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/TextDecoration.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/FontFamily.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Font.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Border.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/ListStyle.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/CSSURI.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Background.php';
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines allowed CSS attributes and what their values are.
|
* Defines allowed CSS attributes and what their values are.
|
||||||
@ -43,7 +43,7 @@ class HTMLPurifier_CSSDefinition
|
|||||||
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
|
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
|
||||||
'groove', 'ridge', 'inset', 'outset'), false);
|
'groove', 'ridge', 'inset', 'outset'), false);
|
||||||
|
|
||||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_Multiple($border_style);
|
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
|
||||||
|
|
||||||
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
array('none', 'left', 'right', 'both'), false);
|
array('none', 'left', 'right', 'both'), false);
|
||||||
@ -54,10 +54,10 @@ class HTMLPurifier_CSSDefinition
|
|||||||
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
array('normal', 'small-caps'), false);
|
array('normal', 'small-caps'), false);
|
||||||
|
|
||||||
$uri_or_none = new HTMLPurifier_AttrDef_Composite(
|
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
|
||||||
array(
|
array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('none')),
|
new HTMLPurifier_AttrDef_Enum(array('none')),
|
||||||
new HTMLPurifier_AttrDef_CSSURI()
|
new HTMLPurifier_AttrDef_CSS_URI()
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -68,11 +68,11 @@ class HTMLPurifier_CSSDefinition
|
|||||||
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
|
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
|
||||||
$this->info['list-style-image'] = $uri_or_none;
|
$this->info['list-style-image'] = $uri_or_none;
|
||||||
|
|
||||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_ListStyle($config);
|
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
|
||||||
|
|
||||||
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
|
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
|
||||||
$this->info['color'] = new HTMLPurifier_AttrDef_Color();
|
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||||
|
|
||||||
$this->info['background-image'] = $uri_or_none;
|
$this->info['background-image'] = $uri_or_none;
|
||||||
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
@ -81,96 +81,96 @@ class HTMLPurifier_CSSDefinition
|
|||||||
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
array('scroll', 'fixed')
|
array('scroll', 'fixed')
|
||||||
);
|
);
|
||||||
$this->info['background-position'] = new HTMLPurifier_AttrDef_BackgroundPosition();
|
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
|
||||||
|
|
||||||
$border_color =
|
$border_color =
|
||||||
$this->info['border-top-color'] =
|
$this->info['border-top-color'] =
|
||||||
$this->info['border-bottom-color'] =
|
$this->info['border-bottom-color'] =
|
||||||
$this->info['border-left-color'] =
|
$this->info['border-left-color'] =
|
||||||
$this->info['border-right-color'] =
|
$this->info['border-right-color'] =
|
||||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('transparent')),
|
new HTMLPurifier_AttrDef_Enum(array('transparent')),
|
||||||
new HTMLPurifier_AttrDef_Color()
|
new HTMLPurifier_AttrDef_CSS_Color()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['background'] = new HTMLPurifier_AttrDef_Background($config);
|
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||||
|
|
||||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_Multiple($border_color);
|
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
|
||||||
|
|
||||||
$border_width =
|
$border_width =
|
||||||
$this->info['border-top-width'] =
|
$this->info['border-top-width'] =
|
||||||
$this->info['border-bottom-width'] =
|
$this->info['border-bottom-width'] =
|
||||||
$this->info['border-left-width'] =
|
$this->info['border-left-width'] =
|
||||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
||||||
new HTMLPurifier_AttrDef_CSSLength(true) //disallow negative
|
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_Multiple($border_width);
|
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
||||||
|
|
||||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||||
new HTMLPurifier_AttrDef_CSSLength()
|
new HTMLPurifier_AttrDef_CSS_Length()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||||
new HTMLPurifier_AttrDef_CSSLength()
|
new HTMLPurifier_AttrDef_CSS_Length()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
|
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
|
||||||
'small', 'medium', 'large', 'x-large', 'xx-large',
|
'small', 'medium', 'large', 'x-large', 'xx-large',
|
||||||
'larger', 'smaller')),
|
'larger', 'smaller')),
|
||||||
new HTMLPurifier_AttrDef_Percentage(),
|
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||||
new HTMLPurifier_AttrDef_CSSLength()
|
new HTMLPurifier_AttrDef_CSS_Length()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||||
new HTMLPurifier_AttrDef_Number(true), // no negatives
|
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
||||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||||
new HTMLPurifier_AttrDef_Percentage(true)
|
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||||
));
|
));
|
||||||
|
|
||||||
$margin =
|
$margin =
|
||||||
$this->info['margin-top'] =
|
$this->info['margin-top'] =
|
||||||
$this->info['margin-bottom'] =
|
$this->info['margin-bottom'] =
|
||||||
$this->info['margin-left'] =
|
$this->info['margin-left'] =
|
||||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_CSSLength(),
|
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||||
new HTMLPurifier_AttrDef_Percentage(),
|
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['margin'] = new HTMLPurifier_AttrDef_Multiple($margin);
|
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
|
||||||
|
|
||||||
// non-negative
|
// non-negative
|
||||||
$padding =
|
$padding =
|
||||||
$this->info['padding-top'] =
|
$this->info['padding-top'] =
|
||||||
$this->info['padding-bottom'] =
|
$this->info['padding-bottom'] =
|
||||||
$this->info['padding-left'] =
|
$this->info['padding-left'] =
|
||||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||||
new HTMLPurifier_AttrDef_Percentage(true)
|
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['padding'] = new HTMLPurifier_AttrDef_Multiple($padding);
|
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
|
||||||
|
|
||||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_CSSLength(),
|
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||||
new HTMLPurifier_AttrDef_Percentage()
|
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['width'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||||
new HTMLPurifier_AttrDef_Percentage(true),
|
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_TextDecoration();
|
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
||||||
|
|
||||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_FontFamily();
|
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||||
|
|
||||||
// this could use specialized code
|
// this could use specialized code
|
||||||
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
|
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
|
||||||
@ -179,14 +179,14 @@ class HTMLPurifier_CSSDefinition
|
|||||||
|
|
||||||
// MUST be called after other font properties, as it references
|
// MUST be called after other font properties, as it references
|
||||||
// a CSSDefinition object
|
// a CSSDefinition object
|
||||||
$this->info['font'] = new HTMLPurifier_AttrDef_Font($config);
|
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
|
||||||
|
|
||||||
// same here
|
// same here
|
||||||
$this->info['border'] =
|
$this->info['border'] =
|
||||||
$this->info['border-bottom'] =
|
$this->info['border-bottom'] =
|
||||||
$this->info['border-top'] =
|
$this->info['border-top'] =
|
||||||
$this->info['border-left'] =
|
$this->info['border-left'] =
|
||||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_Border($config);
|
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||||
|
|
||||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'collapse', 'seperate'));
|
'collapse', 'seperate'));
|
||||||
@ -197,11 +197,11 @@ class HTMLPurifier_CSSDefinition
|
|||||||
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'auto', 'fixed'));
|
'auto', 'fixed'));
|
||||||
|
|
||||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_Composite(array(
|
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
|
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
|
||||||
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
|
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
|
||||||
new HTMLPurifier_AttrDef_CSSLength(),
|
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||||
new HTMLPurifier_AttrDef_Percentage()
|
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||||
));
|
));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
|
|
||||||
function validateChildren($tokens_of_children, $config, &$context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
$parent_type = $context->get('ParentType');
|
if ($context->get('IsInline') === false) {
|
||||||
switch ($parent_type) {
|
return $this->block->validateChildren(
|
||||||
case 'unknown':
|
$tokens_of_children, $config, $context);
|
||||||
case 'inline':
|
} else {
|
||||||
$result = $this->inline->validateChildren(
|
return $this->inline->validateChildren(
|
||||||
$tokens_of_children, $config, $context);
|
$tokens_of_children, $config, $context);
|
||||||
break;
|
|
||||||
case 'block':
|
|
||||||
$result = $this->block->validateChildren(
|
|
||||||
$tokens_of_children, $config, $context);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
trigger_error('Invalid context', E_USER_ERROR);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return $result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,10 +20,13 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
|||||||
$elements = str_replace(' ', '', $elements);
|
$elements = str_replace(' ', '', $elements);
|
||||||
$elements = explode('|', $elements);
|
$elements = explode('|', $elements);
|
||||||
}
|
}
|
||||||
$elements = array_flip($elements);
|
$keys = array_keys($elements);
|
||||||
foreach ($elements as $i => $x) {
|
if ($keys == array_keys($keys)) {
|
||||||
$elements[$i] = true;
|
$elements = array_flip($elements);
|
||||||
if (empty($i)) unset($elements[$i]);
|
foreach ($elements as $i => $x) {
|
||||||
|
$elements[$i] = true;
|
||||||
|
if (empty($i)) unset($elements[$i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
$this->elements = $elements;
|
$this->elements = $elements;
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
|
@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||||
*
|
|
||||||
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||||
extends HTMLPurifier_ChildDef_Required
|
extends HTMLPurifier_ChildDef_Required
|
||||||
{
|
{
|
||||||
|
var $real_elements;
|
||||||
|
var $fake_elements;
|
||||||
var $allow_empty = true;
|
var $allow_empty = true;
|
||||||
var $type = 'strictblockquote';
|
var $type = 'strictblockquote';
|
||||||
var $init = false;
|
var $init = false;
|
||||||
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
|
||||||
function validateChildren($tokens_of_children, $config, &$context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
|
||||||
$def = $config->getHTMLDefinition();
|
$def = $config->getHTMLDefinition();
|
||||||
if (!$this->init) {
|
if (!$this->init) {
|
||||||
// allow all inline elements
|
// allow all inline elements
|
||||||
$this->elements = $def->info_flow_elements;
|
$this->real_elements = $this->elements;
|
||||||
$this->elements['#PCDATA'] = true;
|
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||||
|
$this->fake_elements['#PCDATA'] = true;
|
||||||
$this->init = true;
|
$this->init = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trick the parent class into thinking it allows more
|
||||||
|
$this->elements = $this->fake_elements;
|
||||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||||
|
$this->elements = $this->real_elements;
|
||||||
|
|
||||||
if ($result === false) return array();
|
if ($result === false) return array();
|
||||||
if ($result === true) $result = $tokens_of_children;
|
if ($result === true) $result = $tokens_of_children;
|
||||||
|
|
||||||
@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required
|
|||||||
// ifs are nested for readability
|
// ifs are nested for readability
|
||||||
if (!$is_inline) {
|
if (!$is_inline) {
|
||||||
if (!$depth) {
|
if (!$depth) {
|
||||||
if (($token->type == 'text') ||
|
if (
|
||||||
($def->info[$token->name]->type == 'inline')) {
|
$token->type == 'text' ||
|
||||||
|
!isset($this->elements[$token->name])
|
||||||
|
) {
|
||||||
$is_inline = true;
|
$is_inline = true;
|
||||||
$ret[] = $block_wrap_start;
|
$ret[] = $block_wrap_start;
|
||||||
}
|
}
|
||||||
@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required
|
|||||||
if (!$depth) {
|
if (!$depth) {
|
||||||
// starting tokens have been inline text / empty
|
// starting tokens have been inline text / empty
|
||||||
if ($token->type == 'start' || $token->type == 'empty') {
|
if ($token->type == 'start' || $token->type == 'empty') {
|
||||||
if ($def->info[$token->name]->type == 'block') {
|
if (isset($this->elements[$token->name])) {
|
||||||
// ended
|
// ended
|
||||||
$ret[] = $block_wrap_end;
|
$ret[] = $block_wrap_end;
|
||||||
$is_inline = false;
|
$is_inline = false;
|
||||||
|
@ -149,23 +149,36 @@ class HTMLPurifier_Config
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$this->conf[$namespace][$key] = $value;
|
$this->conf[$namespace][$key] = $value;
|
||||||
|
if ($namespace == 'HTML' || $namespace == 'Attr') {
|
||||||
|
// reset HTML definition if relevant attributes changed
|
||||||
|
$this->html_definition = null;
|
||||||
|
}
|
||||||
|
if ($namespace == 'CSS') {
|
||||||
|
$this->css_definition = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves a copy of the HTML definition.
|
* Retrieves reference to the HTML definition.
|
||||||
|
* @param $raw Return a copy that has not been setup yet. Must be
|
||||||
|
* called before it's been setup, otherwise won't work.
|
||||||
*/
|
*/
|
||||||
function getHTMLDefinition() {
|
function &getHTMLDefinition($raw = false) {
|
||||||
if ($this->html_definition === null) {
|
if (
|
||||||
$this->html_definition = new HTMLPurifier_HTMLDefinition();
|
empty($this->html_definition) || // hasn't ever been setup
|
||||||
$this->html_definition->setup($this);
|
($raw && $this->html_definition->setup) // requesting new one
|
||||||
|
) {
|
||||||
|
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||||
|
if ($raw) return $this->html_definition; // no setup!
|
||||||
}
|
}
|
||||||
|
if (!$this->html_definition->setup) $this->html_definition->setup();
|
||||||
return $this->html_definition;
|
return $this->html_definition;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves a copy of the CSS definition
|
* Retrieves reference to the CSS definition
|
||||||
*/
|
*/
|
||||||
function getCSSDefinition() {
|
function &getCSSDefinition() {
|
||||||
if ($this->css_definition === null) {
|
if ($this->css_definition === null) {
|
||||||
$this->css_definition = new HTMLPurifier_CSSDefinition();
|
$this->css_definition = new HTMLPurifier_CSSDefinition();
|
||||||
$this->css_definition->setup($this);
|
$this->css_definition->setup($this);
|
||||||
|
10
library/HTMLPurifier/ConfigDef.php
Normal file
10
library/HTMLPurifier/ConfigDef.php
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for configuration entity
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ConfigDef {
|
||||||
|
var $class = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ConfigDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure object containing definition of a directive.
|
||||||
|
* @note This structure does not contain default values
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||||
|
{
|
||||||
|
|
||||||
|
var $class = 'directive';
|
||||||
|
|
||||||
|
function HTMLPurifier_ConfigDef_Directive(
|
||||||
|
$type = null,
|
||||||
|
$descriptions = null,
|
||||||
|
$allow_null = null,
|
||||||
|
$allowed = null,
|
||||||
|
$aliases = null
|
||||||
|
) {
|
||||||
|
if ( $type !== null) $this->type = $type;
|
||||||
|
if ($descriptions !== null) $this->descriptions = $descriptions;
|
||||||
|
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
||||||
|
if ( $allowed !== null) $this->allowed = $allowed;
|
||||||
|
if ( $aliases !== null) $this->aliases = $aliases;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allowed type of the directive. Values are:
|
||||||
|
* - string
|
||||||
|
* - istring (case insensitive string)
|
||||||
|
* - int
|
||||||
|
* - float
|
||||||
|
* - bool
|
||||||
|
* - lookup (array of value => true)
|
||||||
|
* - list (regular numbered index array)
|
||||||
|
* - hash (array of key => value)
|
||||||
|
* - mixed (anything goes)
|
||||||
|
*/
|
||||||
|
var $type = 'mixed';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Plaintext descriptions of the configuration entity is. Organized by
|
||||||
|
* file and line number, so multiple descriptions are allowed.
|
||||||
|
*/
|
||||||
|
var $descriptions = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is null allowed? Has no effect for mixed type.
|
||||||
|
* @bool
|
||||||
|
*/
|
||||||
|
var $allow_null = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||||
|
*/
|
||||||
|
var $allowed = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hash of value aliases, i.e. values that are equivalent.
|
||||||
|
*/
|
||||||
|
var $aliases = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a description to the array
|
||||||
|
*/
|
||||||
|
function addDescription($file, $line, $description) {
|
||||||
|
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||||
|
$this->descriptions[$file][$line] = $description;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ConfigDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure object describing a directive alias
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||||
|
{
|
||||||
|
var $class = 'alias';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Namespace being aliased to
|
||||||
|
*/
|
||||||
|
var $namespace;
|
||||||
|
/**
|
||||||
|
* Directive being aliased to
|
||||||
|
*/
|
||||||
|
var $name;
|
||||||
|
|
||||||
|
function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
|
||||||
|
$this->namespace = $namespace;
|
||||||
|
$this->name = $name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ConfigDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure object describing of a namespace
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||||
|
|
||||||
|
function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||||
|
$this->description = $description;
|
||||||
|
}
|
||||||
|
|
||||||
|
var $class = 'namespace';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String description of what kinds of directives go in this namespace.
|
||||||
|
*/
|
||||||
|
var $description;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -1,6 +1,10 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Error.php';
|
require_once 'HTMLPurifier/Error.php';
|
||||||
|
require_once 'HTMLPurifier/ConfigDef.php';
|
||||||
|
require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
||||||
|
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||||
|
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration definition, defines directives and their defaults.
|
* Configuration definition, defines directives and their defaults.
|
||||||
@ -138,7 +142,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$def->info[$namespace][$name] =
|
$def->info[$namespace][$name] =
|
||||||
new HTMLPurifier_ConfigEntity_Directive();
|
new HTMLPurifier_ConfigDef_Directive();
|
||||||
$def->info[$namespace][$name]->type = $type;
|
$def->info[$namespace][$name]->type = $type;
|
||||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||||
$def->defaults[$namespace][$name] = $default;
|
$def->defaults[$namespace][$name] = $default;
|
||||||
@ -172,7 +176,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$def->info[$namespace] = array();
|
$def->info[$namespace] = array();
|
||||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||||
$def->info_namespace[$namespace]->description = $description;
|
$def->info_namespace[$namespace]->description = $description;
|
||||||
$def->defaults[$namespace] = array();
|
$def->defaults[$namespace] = array();
|
||||||
}
|
}
|
||||||
@ -284,7 +288,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$def->info[$namespace][$name] =
|
$def->info[$namespace][$name] =
|
||||||
new HTMLPurifier_ConfigEntity_DirectiveAlias(
|
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||||
$new_namespace, $new_name);
|
$new_namespace, $new_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -379,120 +383,4 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Base class for configuration entity
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ConfigEntity {
|
|
||||||
var $class = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure object describing of a namespace
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
|
||||||
|
|
||||||
function HTMLPurifier_ConfigEntity_Namespace($description = null) {
|
|
||||||
$this->description = $description;
|
|
||||||
}
|
|
||||||
|
|
||||||
var $class = 'namespace';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* String description of what kinds of directives go in this namespace.
|
|
||||||
*/
|
|
||||||
var $description;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure object containing definition of a directive.
|
|
||||||
* @note This structure does not contain default values
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
|
||||||
{
|
|
||||||
|
|
||||||
var $class = 'directive';
|
|
||||||
|
|
||||||
function HTMLPurifier_ConfigEntity_Directive(
|
|
||||||
$type = null,
|
|
||||||
$descriptions = null,
|
|
||||||
$allow_null = null,
|
|
||||||
$allowed = null,
|
|
||||||
$aliases = null
|
|
||||||
) {
|
|
||||||
if ( $type !== null) $this->type = $type;
|
|
||||||
if ($descriptions !== null) $this->descriptions = $descriptions;
|
|
||||||
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
|
||||||
if ( $allowed !== null) $this->allowed = $allowed;
|
|
||||||
if ( $aliases !== null) $this->aliases = $aliases;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allowed type of the directive. Values are:
|
|
||||||
* - string
|
|
||||||
* - istring (case insensitive string)
|
|
||||||
* - int
|
|
||||||
* - float
|
|
||||||
* - bool
|
|
||||||
* - lookup (array of value => true)
|
|
||||||
* - list (regular numbered index array)
|
|
||||||
* - hash (array of key => value)
|
|
||||||
* - mixed (anything goes)
|
|
||||||
*/
|
|
||||||
var $type = 'mixed';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Plaintext descriptions of the configuration entity is. Organized by
|
|
||||||
* file and line number, so multiple descriptions are allowed.
|
|
||||||
*/
|
|
||||||
var $descriptions = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Is null allowed? Has no effect for mixed type.
|
|
||||||
* @bool
|
|
||||||
*/
|
|
||||||
var $allow_null = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
|
||||||
*/
|
|
||||||
var $allowed = true;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Hash of value aliases, i.e. values that are equivalent.
|
|
||||||
*/
|
|
||||||
var $aliases = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a description to the array
|
|
||||||
*/
|
|
||||||
function addDescription($file, $line, $description) {
|
|
||||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
|
||||||
$this->descriptions[$file][$line] = $description;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure object describing a directive alias
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ConfigEntity_DirectiveAlias extends HTMLPurifier_ConfigEntity
|
|
||||||
{
|
|
||||||
var $class = 'alias';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Namespace being aliased to
|
|
||||||
*/
|
|
||||||
var $namespace;
|
|
||||||
/**
|
|
||||||
* Directive being aliased to
|
|
||||||
*/
|
|
||||||
var $name;
|
|
||||||
|
|
||||||
function HTMLPurifier_ConfigEntity_DirectiveAlias($namespace, $name) {
|
|
||||||
$this->namespace = $namespace;
|
|
||||||
$this->name = $name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
148
library/HTMLPurifier/ContentSets.php
Normal file
148
library/HTMLPurifier/ContentSets.php
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// common defs that we'll support by default
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_ContentSets
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of content set strings (pipe seperators) indexed by name.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of content set lookups (element => true) indexed by name.
|
||||||
|
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $lookup = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronized list of defined content sets (keys of info)
|
||||||
|
*/
|
||||||
|
var $keys = array();
|
||||||
|
/**
|
||||||
|
* Synchronized list of defined content values (values of info)
|
||||||
|
*/
|
||||||
|
var $values = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges in module's content sets, expands identifiers in the content
|
||||||
|
* sets and populates the keys, values and lookup member variables.
|
||||||
|
* @param $modules List of HTMLPurifier_HTMLModule
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_ContentSets($modules) {
|
||||||
|
if (!is_array($modules)) $modules = array($modules);
|
||||||
|
// populate content_sets based on module hints
|
||||||
|
// sorry, no way of overloading
|
||||||
|
foreach ($modules as $module_i => $module) {
|
||||||
|
foreach ($module->content_sets as $key => $value) {
|
||||||
|
if (isset($this->info[$key])) {
|
||||||
|
// add it into the existing content set
|
||||||
|
$this->info[$key] = $this->info[$key] . ' | ' . $value;
|
||||||
|
} else {
|
||||||
|
$this->info[$key] = $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// perform content_set expansions
|
||||||
|
$this->keys = array_keys($this->info);
|
||||||
|
foreach ($this->info as $i => $set) {
|
||||||
|
// only performed once, so infinite recursion is not
|
||||||
|
// a problem
|
||||||
|
$this->info[$i] =
|
||||||
|
str_replace(
|
||||||
|
$this->keys,
|
||||||
|
// must be recalculated each time due to
|
||||||
|
// changing substitutions
|
||||||
|
array_values($this->info),
|
||||||
|
$set);
|
||||||
|
}
|
||||||
|
$this->values = array_values($this->info);
|
||||||
|
|
||||||
|
// generate lookup tables
|
||||||
|
foreach ($this->info as $name => $set) {
|
||||||
|
$this->lookup[$name] = $this->convertToLookup($set);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accepts a definition; generates and assigns a ChildDef for it
|
||||||
|
* @param $def HTMLPurifier_ElementDef reference
|
||||||
|
* @param $module Module that defined the ElementDef
|
||||||
|
*/
|
||||||
|
function generateChildDef(&$def, $module) {
|
||||||
|
if (!empty($def->child)) return; // already done!
|
||||||
|
$content_model = $def->content_model;
|
||||||
|
if (is_string($content_model)) {
|
||||||
|
$def->content_model = str_replace(
|
||||||
|
$this->keys, $this->values, $content_model);
|
||||||
|
}
|
||||||
|
$def->child = $this->getChildDef($def, $module);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiates a ChildDef based on content_model and content_model_type
|
||||||
|
* member variables in HTMLPurifier_ElementDef
|
||||||
|
* @note This will also defer to modules for custom HTMLPurifier_ChildDef
|
||||||
|
* subclasses that need content set expansion
|
||||||
|
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
|
||||||
|
* @return HTMLPurifier_ChildDef corresponding to ElementDef
|
||||||
|
*/
|
||||||
|
function getChildDef($def, $module) {
|
||||||
|
$value = $def->content_model;
|
||||||
|
if (is_object($value)) {
|
||||||
|
trigger_error(
|
||||||
|
'Literal object child definitions should be stored in '.
|
||||||
|
'ElementDef->child not ElementDef->content_model',
|
||||||
|
E_USER_NOTICE
|
||||||
|
);
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
switch ($def->content_model_type) {
|
||||||
|
case 'required':
|
||||||
|
return new HTMLPurifier_ChildDef_Required($value);
|
||||||
|
case 'optional':
|
||||||
|
return new HTMLPurifier_ChildDef_Optional($value);
|
||||||
|
case 'empty':
|
||||||
|
return new HTMLPurifier_ChildDef_Empty();
|
||||||
|
case 'custom':
|
||||||
|
return new HTMLPurifier_ChildDef_Custom($value);
|
||||||
|
}
|
||||||
|
// defer to its module
|
||||||
|
$return = false;
|
||||||
|
if ($module->defines_child_def) { // save a func call
|
||||||
|
$return = $module->getChildDef($def);
|
||||||
|
}
|
||||||
|
if ($return !== false) return $return;
|
||||||
|
// error-out
|
||||||
|
trigger_error(
|
||||||
|
'Could not determine which ChildDef class to instantiate',
|
||||||
|
E_USER_ERROR
|
||||||
|
);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a string list of elements separated by pipes into
|
||||||
|
* a lookup array.
|
||||||
|
* @param $string List of elements
|
||||||
|
* @return Lookup array of elements
|
||||||
|
*/
|
||||||
|
function convertToLookup($string) {
|
||||||
|
$array = explode('|', str_replace(' ', '', $string));
|
||||||
|
$ret = array();
|
||||||
|
foreach ($array as $i => $k) {
|
||||||
|
$ret[$k] = true;
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
122
library/HTMLPurifier/ElementDef.php
Normal file
122
library/HTMLPurifier/ElementDef.php
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure that stores an HTML element definition. Used by
|
||||||
|
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ElementDef
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the definition work by itself, or is it created solely
|
||||||
|
* for the purpose of merging into another definition?
|
||||||
|
*/
|
||||||
|
var $standalone = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of attribute name to HTMLPurifier_AttrDef
|
||||||
|
* @note Before being processed by HTMLPurifier_AttrCollections
|
||||||
|
* when modules are finalized during
|
||||||
|
* HTMLPurifier_HTMLDefinition->setup(), this array may also
|
||||||
|
* contain an array at index 0 that indicates which attribute
|
||||||
|
* collections to load into the full array. It may also
|
||||||
|
* contain string indentifiers in lieu of HTMLPurifier_AttrDef,
|
||||||
|
* see HTMLPurifier_AttrTypes on how they are expanded during
|
||||||
|
* HTMLPurifier_HTMLDefinition->setup() processing.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $attr = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $attr_transform_pre = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $attr_transform_post = array();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HTMLPurifier_ChildDef of this tag.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $child;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract string representation of internal ChildDef rules. See
|
||||||
|
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||||
|
* into an HTMLPurifier_ChildDef.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $content_model;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Value of $child->type, used to determine which ChildDef to use,
|
||||||
|
* used in combination with $content_model.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $content_model_type;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup table of tags that close this tag. Used during parsing
|
||||||
|
* to make sure we don't attempt to nest unclosed tags.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $auto_close = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the element have a content model (#PCDATA | Inline)*? This
|
||||||
|
* is important for chameleon ins and del processing in
|
||||||
|
* HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
|
||||||
|
* have to worry about this one.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $descendants_are_inline;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup table of tags excluded from all descendants of this tag.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $excludes = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the values of another element definition into this one.
|
||||||
|
* Values from the new element def take precedence if a value is
|
||||||
|
* not mergeable.
|
||||||
|
*/
|
||||||
|
function mergeIn($def) {
|
||||||
|
|
||||||
|
// later keys takes precedence
|
||||||
|
foreach($def->attr as $k => $v) {
|
||||||
|
if ($k == 0) {
|
||||||
|
// merge in the includes
|
||||||
|
// sorry, no way to override an include
|
||||||
|
foreach ($v as $v2) {
|
||||||
|
$def->attr[0][] = $v2;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$this->attr[$k] = $v;
|
||||||
|
}
|
||||||
|
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
|
||||||
|
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
|
||||||
|
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
|
||||||
|
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
|
||||||
|
|
||||||
|
if(!is_null($def->child)) $this->child = $def->child;
|
||||||
|
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
|
||||||
|
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
|
||||||
|
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -9,7 +9,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
|||||||
|
|
||||||
function preFilter($html, $config, &$context) {
|
function preFilter($html, $config, &$context) {
|
||||||
$pre_regex = '#<object[^>]+>.+?'.
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#';
|
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
|
||||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||||
return preg_replace($pre_regex, $pre_replace, $html);
|
return preg_replace($pre_regex, $pre_replace, $html);
|
||||||
}
|
}
|
||||||
|
@ -1,46 +1,12 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
// components
|
||||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
require_once 'HTMLPurifier/HTMLModuleManager.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/ID.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Class.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Text.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/MultiLength.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS.php';
|
|
||||||
require_once 'HTMLPurifier/AttrTransform.php';
|
|
||||||
require_once 'HTMLPurifier/AttrTransform/Lang.php';
|
|
||||||
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
|
||||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
|
||||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
|
||||||
require_once 'HTMLPurifier/Token.php';
|
|
||||||
require_once 'HTMLPurifier/TagTransform.php';
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
// this definition and its modules MUST NOT define configuration directives
|
||||||
'HTML', 'EnableAttrID', false, 'bool',
|
// outside of the HTML or Attr namespaces
|
||||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
|
||||||
'due to the fact that without proper configuration user input can '.
|
|
||||||
'easily break the validation of a webpage by specifying an ID that is '.
|
|
||||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
|
||||||
'the wind, enable this directive, but I strongly recommend you also '.
|
|
||||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
|
||||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
|
||||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
|
||||||
'versions.'
|
|
||||||
);
|
|
||||||
|
|
||||||
|
// will be superceded by more accurate doctype declaration schemes
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'HTML', 'Strict', false, 'bool',
|
'HTML', 'Strict', false, 'bool',
|
||||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||||
@ -91,33 +57,31 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'IDs at all. This directive has been available since 1.3.0.'
|
'IDs at all. This directive has been available since 1.3.0.'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'Attr', 'DisableURI', false, 'bool',
|
|
||||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
|
||||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
|
||||||
'This directive has been available since 1.3.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the purified HTML type with large amounts of objects.
|
* Definition of the purified HTML that describes allowed children,
|
||||||
|
* attributes, and many other things.
|
||||||
*
|
*
|
||||||
* The main function of this object is its $info array, which is an
|
* Conventions:
|
||||||
* associative array of all the child and attribute definitions for
|
|
||||||
* each allowed element. It also contains special use information (always
|
|
||||||
* prefixed by info) for intelligent tag closing and global attributes.
|
|
||||||
*
|
*
|
||||||
* For optimization, the definition generation may be moved to
|
* All member variables that are prefixed with info
|
||||||
* a maintenance script and stipulate that definition be created
|
* (including the main $info array) are used by HTML Purifier internals
|
||||||
* by a factory method that unserializes a serialized version of Definition.
|
* and should not be directly edited when customizing the HTMLDefinition.
|
||||||
* Customization would entail copying the maintenance script, making the
|
* They can usually be set via configuration directives or custom
|
||||||
* necessary changes, generating the serialized object, and then hooking it
|
* modules.
|
||||||
* in via the factory method. We would also offer a LiveDefinition for
|
*
|
||||||
* automatic recompilation, suggesting that we would have a DefinitionGenerator.
|
* On the other hand, member variables without the info prefix are used
|
||||||
|
* internally by the HTMLDefinition and MUST NOT be used by other HTML
|
||||||
|
* Purifier internals. Many of them, however, are public, and may be
|
||||||
|
* edited by userspace code to tweak the behavior of HTMLDefinition.
|
||||||
|
*
|
||||||
|
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
|
||||||
|
* rule: in the interest of comprehensiveness, it will sniff everything.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_HTMLDefinition
|
class HTMLPurifier_HTMLDefinition
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/** FULLY-PUBLIC VARIABLES */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Associative array of element names to HTMLPurifier_ElementDef
|
* Associative array of element names to HTMLPurifier_ElementDef
|
||||||
* @public
|
* @public
|
||||||
@ -157,423 +121,111 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
var $info_tag_transform = array();
|
var $info_tag_transform = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $info_attr_transform_pre = array();
|
var $info_attr_transform_pre = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of HTMLPurifier_AttrTransform to be performed after validation/
|
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $info_attr_transform_post = array();
|
var $info_attr_transform_post = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lookup table of flow elements
|
* Nested lookup array of content set name (Block, Inline) to
|
||||||
|
* element name to whether or not it belongs in that content set.
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $info_flow_elements = array();
|
var $info_content_sets = array();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** PUBLIC BUT INTERNAL VARIABLES */
|
||||||
|
|
||||||
|
var $setup = false; /**< Has setup() been called yet? */
|
||||||
|
var $config; /**< Temporary instance of HTMLPurifier_Config */
|
||||||
|
|
||||||
|
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Boolean is a strict definition?
|
* Performs low-cost, preliminary initialization.
|
||||||
* @public
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
*/
|
*/
|
||||||
var $strict;
|
function HTMLPurifier_HTMLDefinition(&$config) {
|
||||||
|
$this->config =& $config;
|
||||||
|
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the definition, the meat of the class.
|
* Processes internals into form usable by HTMLPurifier internals.
|
||||||
|
* Modifying the definition after calling this function should not
|
||||||
|
* be done.
|
||||||
*/
|
*/
|
||||||
function setup($config) {
|
function setup() {
|
||||||
|
|
||||||
// some cached config values
|
// multiple call guard
|
||||||
$this->strict = $config->get('HTML', 'Strict');
|
if ($this->setup) {return;} else {$this->setup = true;}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
$this->processModules();
|
||||||
// info[] : initializes the definition objects
|
$this->setupConfigStuff();
|
||||||
|
|
||||||
// if you attempt to define rules later on for a tag not in this array
|
unset($this->config);
|
||||||
// PHP will create an stdclass
|
unset($this->manager);
|
||||||
|
|
||||||
$allowed_tags =
|
}
|
||||||
array(
|
|
||||||
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
|
|
||||||
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
|
|
||||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
|
|
||||||
'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
|
||||||
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
|
||||||
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
|
||||||
'colgroup', 'col', 'td', 'th', 'tr'
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!$this->strict) {
|
/**
|
||||||
$allowed_tags[] = 'u';
|
* Extract out the information from the manager
|
||||||
$allowed_tags[] = 's';
|
*/
|
||||||
$allowed_tags[] = 'strike';
|
function processModules() {
|
||||||
|
|
||||||
|
$this->manager->setup($this->config);
|
||||||
|
|
||||||
|
foreach ($this->manager->activeModules as $module) {
|
||||||
|
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
|
||||||
|
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
|
||||||
|
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($allowed_tags as $tag) {
|
$this->info = $this->manager->getElements($this->config);
|
||||||
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
$this->info_content_sets = $this->manager->contentSets->lookup;
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
}
|
||||||
// info[]->child : defines allowed children for elements
|
|
||||||
|
|
||||||
// emulates the structure of the DTD
|
/**
|
||||||
// however, these are condensed, with bad stuff taken out
|
* Sets up stuff based on config. We need a better way of doing this.
|
||||||
// screening process was done by hand
|
*/
|
||||||
|
function setupConfigStuff() {
|
||||||
|
|
||||||
// entities: prefixed with e_ and _ replaces . from DTD
|
$block_wrapper = $this->config->get('HTML', 'BlockWrapper');
|
||||||
// double underlines are entities we made up
|
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
|
||||||
|
|
||||||
// we don't use an array because that complicates interpolation
|
|
||||||
// strings are used instead of arrays because if you use arrays,
|
|
||||||
// you have to do some hideous manipulation with array_merge()
|
|
||||||
|
|
||||||
// todo: determine whether or not having allowed children
|
|
||||||
// that aren't allowed globally affects security (it shouldn't)
|
|
||||||
// if above works out, extend children definitions to include all
|
|
||||||
// possible elements (allowed elements will dictate which ones
|
|
||||||
// get dropped
|
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
|
||||||
$e_special_basic = 'br | span | bdo';
|
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
|
||||||
$e_fontstyle_extra = 'big | small';
|
|
||||||
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
|
||||||
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
|
||||||
$e_phrase_extra = 'sub | sup';
|
|
||||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
|
||||||
' | cite | abbr | acronym';
|
|
||||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
|
||||||
$e_misc_inline = 'ins | del';
|
|
||||||
$e_misc = "$e_misc_inline";
|
|
||||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
|
|
||||||
// pseudo-property we created for convenience, see later on
|
|
||||||
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
|
||||||
// note the casing
|
|
||||||
$e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline);
|
|
||||||
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
|
||||||
$e_lists = 'ul | ol | dl';
|
|
||||||
$e_blocktext = 'pre | hr | blockquote | address';
|
|
||||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
|
||||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
|
||||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
|
||||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
|
||||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
|
||||||
" | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
|
|
||||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
|
||||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
|
||||||
" | $e_misc_inline");
|
|
||||||
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
|
||||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
|
||||||
|
|
||||||
$this->info['ins']->child =
|
|
||||||
$this->info['del']->child =
|
|
||||||
new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
|
||||||
|
|
||||||
$this->info['dd']->child =
|
|
||||||
$this->info['li']->child =
|
|
||||||
$this->info['div']->child = $e_Flow;
|
|
||||||
|
|
||||||
if ($this->strict) {
|
|
||||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
|
||||||
} else {
|
|
||||||
$this->info['blockquote']->child = $e_Flow;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->info['caption']->child =
|
|
||||||
$this->info['em']->child =
|
|
||||||
$this->info['strong']->child =
|
|
||||||
$this->info['dfn']->child =
|
|
||||||
$this->info['code']->child =
|
|
||||||
$this->info['samp']->child =
|
|
||||||
$this->info['kbd']->child =
|
|
||||||
$this->info['var']->child =
|
|
||||||
$this->info['cite']->child =
|
|
||||||
$this->info['abbr']->child =
|
|
||||||
$this->info['acronym']->child =
|
|
||||||
$this->info['q']->child =
|
|
||||||
$this->info['sub']->child =
|
|
||||||
$this->info['tt']->child =
|
|
||||||
$this->info['sup']->child =
|
|
||||||
$this->info['i']->child =
|
|
||||||
$this->info['b']->child =
|
|
||||||
$this->info['big']->child =
|
|
||||||
$this->info['small']->child=
|
|
||||||
$this->info['bdo']->child =
|
|
||||||
$this->info['span']->child =
|
|
||||||
$this->info['dt']->child =
|
|
||||||
$this->info['p']->child =
|
|
||||||
$this->info['h1']->child =
|
|
||||||
$this->info['h2']->child =
|
|
||||||
$this->info['h3']->child =
|
|
||||||
$this->info['h4']->child =
|
|
||||||
$this->info['h5']->child =
|
|
||||||
$this->info['h6']->child = $e_Inline;
|
|
||||||
|
|
||||||
if (!$this->strict) {
|
|
||||||
$this->info['u']->child =
|
|
||||||
$this->info['s']->child =
|
|
||||||
$this->info['strike']->child = $e_Inline;
|
|
||||||
}
|
|
||||||
|
|
||||||
// the only three required definitions, besides custom table code
|
|
||||||
$this->info['ol']->child =
|
|
||||||
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
|
|
||||||
|
|
||||||
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
|
||||||
|
|
||||||
if ($this->strict) {
|
|
||||||
$this->info['address']->child = $e_Inline;
|
|
||||||
} else {
|
|
||||||
$this->info['address']->child =
|
|
||||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
|
||||||
" | $e_misc_inline");
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->info['img']->child =
|
|
||||||
$this->info['br']->child =
|
|
||||||
$this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
|
|
||||||
|
|
||||||
$this->info['pre']->child = $e_pre_content;
|
|
||||||
|
|
||||||
$this->info['a']->child = $e_a_content;
|
|
||||||
|
|
||||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
|
||||||
|
|
||||||
// not a real entity, watch the double underscore
|
|
||||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
|
||||||
$this->info['thead']->child = $e__row;
|
|
||||||
$this->info['tfoot']->child = $e__row;
|
|
||||||
$this->info['tbody']->child = $e__row;
|
|
||||||
$this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
|
|
||||||
$this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
|
|
||||||
$this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
|
|
||||||
$this->info['th']->child = $e_Flow;
|
|
||||||
$this->info['td']->child = $e_Flow;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info[]->type : defines the type of the element (block or inline)
|
|
||||||
|
|
||||||
// reuses $e_Inline and $e_Block
|
|
||||||
foreach ($e_Inline->elements as $name => $bool) {
|
|
||||||
if ($name == '#PCDATA') continue;
|
|
||||||
if (!isset($this->info[$name])) continue;
|
|
||||||
$this->info[$name]->type = 'inline';
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($e_Block->elements as $name => $bool) {
|
|
||||||
if (!isset($this->info[$name])) continue;
|
|
||||||
$this->info[$name]->type = 'block';
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($e_Flow->elements as $name => $bool) {
|
|
||||||
$this->info_flow_elements[$name] = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info[]->excludes : defines elements that aren't allowed in here
|
|
||||||
|
|
||||||
// make sure you test using isset() and not !empty()
|
|
||||||
|
|
||||||
$this->info['a']->excludes = array('a' => true);
|
|
||||||
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
|
||||||
// technically useless, but good to be indepth
|
|
||||||
'object', 'applet', 'font', 'basefont'));
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info[]->attr : defines allowed attributes for elements
|
|
||||||
|
|
||||||
// this doesn't include REQUIRED declarations, those are handled
|
|
||||||
// by the transform classes. It will, however, do simple and slightly
|
|
||||||
// complex attribute value substitution
|
|
||||||
|
|
||||||
// the question of varying allowed attributes is more entangling.
|
|
||||||
|
|
||||||
$e_Text = new HTMLPurifier_AttrDef_Text();
|
|
||||||
|
|
||||||
// attrs, included in almost every single one except for a few,
|
|
||||||
// which manually override these in their local definitions
|
|
||||||
$this->info_global_attr = array(
|
|
||||||
// core attrs
|
|
||||||
'class' => new HTMLPurifier_AttrDef_Class(),
|
|
||||||
'title' => $e_Text,
|
|
||||||
'style' => new HTMLPurifier_AttrDef_CSS(),
|
|
||||||
// i18n
|
|
||||||
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
|
|
||||||
'lang' => new HTMLPurifier_AttrDef_Lang(),
|
|
||||||
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
|
|
||||||
);
|
|
||||||
|
|
||||||
if ($config->get('HTML', 'EnableAttrID')) {
|
|
||||||
$this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
|
|
||||||
}
|
|
||||||
|
|
||||||
// required attribute stipulation handled in attribute transformation
|
|
||||||
$this->info['bdo']->attr = array(); // nothing else
|
|
||||||
|
|
||||||
$this->info['br']->attr['dir'] = false;
|
|
||||||
$this->info['br']->attr['lang'] = false;
|
|
||||||
$this->info['br']->attr['xml:lang'] = false;
|
|
||||||
|
|
||||||
$this->info['td']->attr['abbr'] = $e_Text;
|
|
||||||
$this->info['th']->attr['abbr'] = $e_Text;
|
|
||||||
|
|
||||||
$this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum(
|
|
||||||
array('left', 'center', 'right', 'justify', 'char'), false));
|
|
||||||
|
|
||||||
$this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum(
|
|
||||||
array('top', 'middle', 'bottom', 'baseline'), false));
|
|
||||||
|
|
||||||
$this->info['img']->attr['alt'] = $e_Text;
|
|
||||||
|
|
||||||
$e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above',
|
|
||||||
'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false);
|
|
||||||
$this->info['table']->attr['frame'] = $e_TFrame;
|
|
||||||
|
|
||||||
$e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups',
|
|
||||||
'rows', 'cols', 'all'), false);
|
|
||||||
$this->info['table']->attr['rules'] = $e_TRules;
|
|
||||||
|
|
||||||
$this->info['table']->attr['summary'] = $e_Text;
|
|
||||||
|
|
||||||
$this->info['table']->attr['border'] =
|
|
||||||
new HTMLPurifier_AttrDef_Pixels();
|
|
||||||
|
|
||||||
$e_Length = new HTMLPurifier_AttrDef_Length();
|
|
||||||
$this->info['table']->attr['cellpadding'] =
|
|
||||||
$this->info['table']->attr['cellspacing'] =
|
|
||||||
$this->info['table']->attr['width'] =
|
|
||||||
$this->info['img']->attr['height'] =
|
|
||||||
$this->info['img']->attr['width'] = $e_Length;
|
|
||||||
$this->setAttrForTableElements('charoff', $e_Length);
|
|
||||||
|
|
||||||
$e_MultiLength = new HTMLPurifier_AttrDef_MultiLength();
|
|
||||||
$this->info['col']->attr['width'] =
|
|
||||||
$this->info['colgroup']->attr['width'] = $e_MultiLength;
|
|
||||||
|
|
||||||
$e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
|
||||||
$this->info['colgroup']->attr['span'] =
|
|
||||||
$this->info['col']->attr['span'] =
|
|
||||||
$this->info['td']->attr['rowspan'] =
|
|
||||||
$this->info['th']->attr['rowspan'] =
|
|
||||||
$this->info['td']->attr['colspan'] =
|
|
||||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
|
||||||
|
|
||||||
if (!$config->get('Attr', 'DisableURI')) {
|
|
||||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
|
||||||
$this->info['a']->attr['href'] =
|
|
||||||
$this->info['img']->attr['longdesc'] =
|
|
||||||
$this->info['del']->attr['cite'] =
|
|
||||||
$this->info['ins']->attr['cite'] =
|
|
||||||
$this->info['blockquote']->attr['cite'] =
|
|
||||||
$this->info['q']->attr['cite'] = $e_URI;
|
|
||||||
|
|
||||||
// URI that causes HTTP request
|
|
||||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$this->strict) {
|
|
||||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
|
||||||
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info_tag_transform : transformations of tags
|
|
||||||
|
|
||||||
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
|
||||||
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
|
||||||
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
|
|
||||||
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info[]->auto_close : tags that automatically close another
|
|
||||||
|
|
||||||
// todo: determine whether or not SGML-like modeling based on
|
|
||||||
// mandatory/optional end tags would be a better policy
|
|
||||||
|
|
||||||
// make sure you test using isset() not !empty()
|
|
||||||
|
|
||||||
// these are all block elements: blocks aren't allowed in P
|
|
||||||
$this->info['p']->auto_close = array_flip(array(
|
|
||||||
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
|
||||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
|
||||||
'table', 'ul'
|
|
||||||
));
|
|
||||||
|
|
||||||
$this->info['li']->auto_close = array('li' => true);
|
|
||||||
|
|
||||||
// we need TABLE and heading mismatch code
|
|
||||||
// we may need to make this more flexible for heading mismatch,
|
|
||||||
// or we can just create another info
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info[]->attr_transform_* : attribute transformations in elements
|
|
||||||
// pre is applied before any validation is done, post is done after
|
|
||||||
|
|
||||||
$this->info['h1']->attr_transform_pre[] =
|
|
||||||
$this->info['h2']->attr_transform_pre[] =
|
|
||||||
$this->info['h3']->attr_transform_pre[] =
|
|
||||||
$this->info['h4']->attr_transform_pre[] =
|
|
||||||
$this->info['h5']->attr_transform_pre[] =
|
|
||||||
$this->info['h6']->attr_transform_pre[] =
|
|
||||||
$this->info['p'] ->attr_transform_pre[] =
|
|
||||||
new HTMLPurifier_AttrTransform_TextAlign();
|
|
||||||
|
|
||||||
$this->info['bdo']->attr_transform_post[] =
|
|
||||||
new HTMLPurifier_AttrTransform_BdoDir();
|
|
||||||
|
|
||||||
$this->info['img']->attr_transform_post[] =
|
|
||||||
new HTMLPurifier_AttrTransform_ImgRequired();
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info_attr_transform_* : global attribute transformation that is
|
|
||||||
// unconditionally called. Good for transformations that have complex
|
|
||||||
// start conditions
|
|
||||||
// pre is applied before any validation is done, post is done after
|
|
||||||
|
|
||||||
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
|
|
||||||
|
|
||||||
// protect against stdclasses floating around
|
|
||||||
foreach ($this->info as $key => $obj) {
|
|
||||||
if ($obj instanceof stdClass) {
|
|
||||||
unset($this->info[$key]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// info_block_wrapper : wraps inline elements in block context
|
|
||||||
|
|
||||||
$block_wrapper = $config->get('HTML', 'BlockWrapper');
|
|
||||||
if (isset($e_Block->elements[$block_wrapper])) {
|
|
||||||
$this->info_block_wrapper = $block_wrapper;
|
$this->info_block_wrapper = $block_wrapper;
|
||||||
} else {
|
} else {
|
||||||
trigger_error('Cannot use non-block element as block wrapper.',
|
trigger_error('Cannot use non-block element as block wrapper.',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
$parent = $this->config->get('HTML', 'Parent');
|
||||||
// info_parent : parent element of the HTML fragment
|
$def = $this->manager->getElement($parent, $this->config);
|
||||||
|
if ($def) {
|
||||||
$parent = $config->get('HTML', 'Parent');
|
|
||||||
if (isset($this->info[$parent])) {
|
|
||||||
$this->info_parent = $parent;
|
$this->info_parent = $parent;
|
||||||
|
$this->info_parent_def = $def;
|
||||||
} else {
|
} else {
|
||||||
trigger_error('Cannot use unrecognized element as parent.',
|
trigger_error('Cannot use unrecognized element as parent.',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
|
$this->info_parent_def = $this->manager->getElement(
|
||||||
|
$this->info_parent, $this->config);
|
||||||
}
|
}
|
||||||
$this->info_parent_def = $this->info[$this->info_parent];
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
// setup allowed elements, SubtractiveWhitelist module
|
||||||
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
|
||||||
|
|
||||||
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
|
||||||
if (is_array($allowed_elements)) {
|
if (is_array($allowed_elements)) {
|
||||||
foreach ($this->info as $name => $d) {
|
foreach ($this->info as $name => $d) {
|
||||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
|
||||||
if (is_array($allowed_attributes)) {
|
if (is_array($allowed_attributes)) {
|
||||||
foreach ($this->info_global_attr as $attr_key => $info) {
|
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||||
if (!isset($allowed_attributes["*.$attr_key"])) {
|
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||||
@ -582,74 +234,16 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
}
|
}
|
||||||
foreach ($this->info as $tag => $info) {
|
foreach ($this->info as $tag => $info) {
|
||||||
foreach ($info->attr as $attr => $attr_info) {
|
foreach ($info->attr as $attr => $attr_info) {
|
||||||
if (!isset($allowed_attributes["$tag.$attr"])) {
|
if (!isset($allowed_attributes["$tag.$attr"]) &&
|
||||||
|
!isset($allowed_attributes["*.$attr"])) {
|
||||||
unset($this->info[$tag]->attr[$attr]);
|
unset($this->info[$tag]->attr[$attr]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function setAttrForTableElements($attr, $def) {
|
|
||||||
$this->info['col']->attr[$attr] =
|
|
||||||
$this->info['colgroup']->attr[$attr] =
|
|
||||||
$this->info['tbody']->attr[$attr] =
|
|
||||||
$this->info['td']->attr[$attr] =
|
|
||||||
$this->info['tfoot']->attr[$attr] =
|
|
||||||
$this->info['th']->attr[$attr] =
|
|
||||||
$this->info['thead']->attr[$attr] =
|
|
||||||
$this->info['tr']->attr[$attr] = $def;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure that stores an element definition.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ElementDef
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Associative array of attribute name to HTMLPurifier_AttrDef
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $attr = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* List of tag's HTMLPurifier_AttrTransform to be done before validation
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $attr_transform_pre = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* List of tag's HTMLPurifier_AttrTransform to be done after validation
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $attr_transform_post = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lookup table of tags that close this tag.
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $auto_close = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HTMLPurifier_ChildDef of this tag.
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $child;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Type of the tag: inline or block or unknown?
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $type = 'unknown';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lookup table of tags excluded from all descendants of this tag.
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $excludes = array();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
125
library/HTMLPurifier/HTMLModule.php
Normal file
125
library/HTMLPurifier/HTMLModule.php
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an XHTML 1.1 module, with information on elements, tags
|
||||||
|
* and attributes.
|
||||||
|
* @note Even though this is technically XHTML 1.1, it is also used for
|
||||||
|
* regular HTML parsing. We are using modulization as a convenient
|
||||||
|
* way to represent the internals of HTMLDefinition, and our
|
||||||
|
* implementation is by no means conforming and does not directly
|
||||||
|
* use the normative DTDs or XML schemas.
|
||||||
|
* @note The public variables in a module should almost directly
|
||||||
|
* correspond to the variables in HTMLPurifier_HTMLDefinition.
|
||||||
|
* However, the prefix info carries no special meaning in these
|
||||||
|
* objects (include it anyway if that's the correspondence though).
|
||||||
|
*/
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Short unique string identifier of the module
|
||||||
|
*/
|
||||||
|
var $name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dynamically set integer that specifies when the module was loaded in.
|
||||||
|
*/
|
||||||
|
var $order;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Informally, a list of elements this module changes. Not used in
|
||||||
|
* any significant way.
|
||||||
|
* @protected
|
||||||
|
*/
|
||||||
|
var $elements = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of element names to element definitions.
|
||||||
|
* Some definitions may be incomplete, to be merged in later
|
||||||
|
* with the full definition.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of content set names to content set additions.
|
||||||
|
* This is commonly used to, say, add an A element to the Inline
|
||||||
|
* content set. This corresponds to an internal variable $content_sets
|
||||||
|
* and NOT info_content_sets member variable of HTMLDefinition.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $content_sets = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of attribute collection names to attribute
|
||||||
|
* collection additions. More rarely used for adding attributes to
|
||||||
|
* the global collections. Example is the StyleAttribute module adding
|
||||||
|
* the style attribute to the Core. Corresponds to HTMLDefinition's
|
||||||
|
* attr_collections->info, since the object's data is only info,
|
||||||
|
* with extra behavior associated with it.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $attr_collections = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_tag_transform = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_attr_transform_pre = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of HTMLPurifier_AttrTransform to be performed after validation.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_attr_transform_post = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Boolean flag that indicates whether or not getChildDef is implemented.
|
||||||
|
* For optimization reasons: may save a call to a function. Be sure
|
||||||
|
* to set it if you do implement getChildDef(), otherwise it will have
|
||||||
|
* no effect!
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $defines_child_def = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a proper HTMLPurifier_ChildDef subclass based on
|
||||||
|
* content_model and content_model_type member variables of
|
||||||
|
* the HTMLPurifier_ElementDef class. There is a similar function
|
||||||
|
* in HTMLPurifier_HTMLDefinition.
|
||||||
|
* @param $def HTMLPurifier_ElementDef instance
|
||||||
|
* @return HTMLPurifier_ChildDef subclass
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
function getChildDef($def) {return false;}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook method that lets module perform arbitrary operations on
|
||||||
|
* HTMLPurifier_HTMLDefinition before the module gets processed.
|
||||||
|
* @param $definition Reference to HTMLDefinition being setup
|
||||||
|
*/
|
||||||
|
function preProcess(&$definition) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook method that lets module perform arbitrary operations
|
||||||
|
* on HTMLPurifier_HTMLDefinition after the module gets processed.
|
||||||
|
* @param $definition Reference to HTMLDefinition being setup
|
||||||
|
*/
|
||||||
|
function postProcess(&$definition) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook method that is called when a module gets registered to
|
||||||
|
* the definition.
|
||||||
|
* @param $definition Reference to HTMLDefinition being setup
|
||||||
|
*/
|
||||||
|
function setup(&$definition) {}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
43
library/HTMLPurifier/HTMLModule/Bdo.php
Normal file
43
library/HTMLPurifier/HTMLModule/Bdo.php
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Bi-directional Text Module, defines elements that
|
||||||
|
* declare directionality of content. Text Extension Module.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Bdo';
|
||||||
|
var $elements = array('bdo');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array('Inline' => 'bdo');
|
||||||
|
var $attr_collections = array(
|
||||||
|
'I18N' => array('dir' => false)
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Bdo() {
|
||||||
|
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
|
||||||
|
$this->attr_collections['I18N']['dir'] = $dir;
|
||||||
|
$this->info['bdo'] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info['bdo']->attr = array(
|
||||||
|
0 => array('Core', 'Lang'),
|
||||||
|
'dir' => $dir, // required
|
||||||
|
// The Abstract Module specification has the attribute
|
||||||
|
// inclusions wrong for bdo: bdo allows
|
||||||
|
// xml:lang too (and we'll toss in lang for good measure,
|
||||||
|
// though it is not allowed for XHTML 1.1, this will
|
||||||
|
// be managed with a global attribute transform)
|
||||||
|
);
|
||||||
|
$this->info['bdo']->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info['bdo']->content_model_type = 'optional';
|
||||||
|
// provides fallback behavior if dir's missing (dir is required)
|
||||||
|
$this->info['bdo']->attr_transform_post['required-dir'] =
|
||||||
|
new HTMLPurifier_AttrTransform_BdoDir();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
31
library/HTMLPurifier/HTMLModule/CommonAttributes.php
Normal file
31
library/HTMLPurifier/HTMLModule/CommonAttributes.php
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
var $name = 'CommonAttributes';
|
||||||
|
|
||||||
|
var $attr_collections = array(
|
||||||
|
'Core' => array(
|
||||||
|
0 => array('Style'),
|
||||||
|
// 'xml:space' => false,
|
||||||
|
'class' => 'NMTOKENS',
|
||||||
|
'id' => 'ID',
|
||||||
|
'title' => 'CDATA',
|
||||||
|
),
|
||||||
|
'Lang' => array(
|
||||||
|
'xml:lang' => false, // see constructor
|
||||||
|
),
|
||||||
|
'I18N' => array(
|
||||||
|
0 => array('Lang'), // proprietary, for xml:lang/lang
|
||||||
|
),
|
||||||
|
'Common' => array(
|
||||||
|
0 => array('Core', 'I18N')
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_CommonAttributes() {
|
||||||
|
$this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
46
library/HTMLPurifier/HTMLModule/Edit.php
Normal file
46
library/HTMLPurifier/HTMLModule/Edit.php
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
|
||||||
|
* Module.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Edit';
|
||||||
|
var $elements = array('del', 'ins');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array('Inline' => 'del | ins');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Edit() {
|
||||||
|
foreach ($this->elements as $element) {
|
||||||
|
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$element]->attr = array(
|
||||||
|
0 => array('Common'),
|
||||||
|
'cite' => 'URI',
|
||||||
|
// 'datetime' => 'Datetime' // Datetime not implemented
|
||||||
|
);
|
||||||
|
// Inline context ! Block context (exclamation mark is
|
||||||
|
// separator, see getChildDef for parsing)
|
||||||
|
$this->info[$element]->content_model =
|
||||||
|
'#PCDATA | Inline ! #PCDATA | Flow';
|
||||||
|
// HTML 4.01 specifies that ins/del must not contain block
|
||||||
|
// elements when used in an inline context, chameleon is
|
||||||
|
// a complicated workaround to acheive this effect
|
||||||
|
$this->info[$element]->content_model_type = 'chameleon';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var $defines_child_def = true;
|
||||||
|
function getChildDef($def) {
|
||||||
|
if ($def->content_model_type != 'chameleon') return false;
|
||||||
|
$value = explode('!', $def->content_model);
|
||||||
|
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
36
library/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
36
library/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Hypertext';
|
||||||
|
var $elements = array('a');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array('Inline' => 'a');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Hypertext() {
|
||||||
|
$this->info['a'] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info['a']->attr = array(
|
||||||
|
0 => array('Common'),
|
||||||
|
// 'accesskey' => 'Character',
|
||||||
|
// 'charset' => 'Charset',
|
||||||
|
'href' => 'URI',
|
||||||
|
//'hreflang' => 'LanguageCode',
|
||||||
|
//'rel' => 'LinkTypes',
|
||||||
|
//'rev' => 'LinkTypes',
|
||||||
|
//'tabindex' => 'Number',
|
||||||
|
//'type' => 'ContentType',
|
||||||
|
);
|
||||||
|
$this->info['a']->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info['a']->content_model_type = 'optional';
|
||||||
|
$this->info['a']->excludes = array('a' => true);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
38
library/HTMLPurifier/HTMLModule/Image.php
Normal file
38
library/HTMLPurifier/HTMLModule/Image.php
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||||
|
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Image Module provides basic image embedding.
|
||||||
|
* @note There is specialized code for removing empty images in
|
||||||
|
* HTMLPurifier_Strategy_RemoveForeignElements
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Image';
|
||||||
|
var $elements = array('img');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array('Inline' => 'img');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Image() {
|
||||||
|
$this->info['img'] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info['img']->attr = array(
|
||||||
|
0 => array('Common'),
|
||||||
|
'alt' => 'Text',
|
||||||
|
'height' => 'Length',
|
||||||
|
'longdesc' => 'URI',
|
||||||
|
'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
|
||||||
|
'width' => 'Length'
|
||||||
|
);
|
||||||
|
$this->info['img']->content_model_type = 'empty';
|
||||||
|
$this->info['img']->attr_transform_post[] =
|
||||||
|
new HTMLPurifier_AttrTransform_ImgRequired();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
60
library/HTMLPurifier/HTMLModule/Legacy.php
Normal file
60
library/HTMLPurifier/HTMLModule/Legacy.php
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Legacy module defines elements that were previously
|
||||||
|
* deprecated.
|
||||||
|
*
|
||||||
|
* @note Not all legacy elements have been implemented yet, which
|
||||||
|
* is a bit of a reverse problem as compared to browsers! In
|
||||||
|
* addition, this legacy module may implement a bit more than
|
||||||
|
* mandated by XHTML 1.1.
|
||||||
|
*
|
||||||
|
* This module can be used in combination with TransformToStrict in order
|
||||||
|
* to transform as many deprecated elements as possible, but retain
|
||||||
|
* questionably deprecated elements that do not have good alternatives
|
||||||
|
* as well as transform elements that don't have an implementation.
|
||||||
|
* See docs/ref-strictness.txt for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
// incomplete
|
||||||
|
|
||||||
|
var $name = 'Legacy';
|
||||||
|
var $elements = array('u', 's', 'strike');
|
||||||
|
var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Legacy() {
|
||||||
|
// setup new elements
|
||||||
|
foreach ($this->elements as $name) {
|
||||||
|
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||||
|
// for u, s, strike, as more elements get added, add
|
||||||
|
// conditionals as necessary
|
||||||
|
$this->info[$name]->content_model = 'Inline | #PCDATA';
|
||||||
|
$this->info[$name]->content_model_type = 'optional';
|
||||||
|
$this->info[$name]->attr[0] = array('Common');
|
||||||
|
}
|
||||||
|
|
||||||
|
// setup modifications to old elements
|
||||||
|
foreach ($this->non_standalone_elements as $name) {
|
||||||
|
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$name]->standalone = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||||
|
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||||
|
|
||||||
|
$this->info['address']->content_model = 'Inline | #PCDATA | p';
|
||||||
|
$this->info['address']->content_model_type = 'optional';
|
||||||
|
$this->info['address']->child = false;
|
||||||
|
|
||||||
|
$this->info['blockquote']->content_model = 'Flow | #PCDATA';
|
||||||
|
$this->info['blockquote']->content_model_type = 'optional';
|
||||||
|
$this->info['blockquote']->child = false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
46
library/HTMLPurifier/HTMLModule/List.php
Normal file
46
library/HTMLPurifier/HTMLModule/List.php
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'List';
|
||||||
|
var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
|
||||||
|
var $info = array();
|
||||||
|
// According to the abstract schema, the List content set is a fully formed
|
||||||
|
// one or more expr, but it invariably occurs in an optional declaration
|
||||||
|
// so we're not going to do that subtlety. It might cause trouble
|
||||||
|
// if a user defines "List" and expects that multiple lists are
|
||||||
|
// allowed to be specified, but then again, that's not very intuitive.
|
||||||
|
// Furthermore, the actual XML Schema may disagree. Regardless,
|
||||||
|
// we don't have support for such nested expressions without using
|
||||||
|
// the incredibly inefficient and draconic Custom ChildDef.
|
||||||
|
var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_List() {
|
||||||
|
foreach ($this->elements as $element) {
|
||||||
|
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$element]->attr = array(0 => array('Common'));
|
||||||
|
if ($element == 'li' || $element == 'dd') {
|
||||||
|
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||||
|
$this->info[$element]->content_model_type = 'optional';
|
||||||
|
} elseif ($element == 'ol' || $element == 'ul') {
|
||||||
|
$this->info[$element]->content_model = 'li';
|
||||||
|
$this->info[$element]->content_model_type = 'required';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->info['dt']->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info['dt']->content_model_type = 'optional';
|
||||||
|
$this->info['dl']->content_model = 'dt | dd';
|
||||||
|
$this->info['dl']->content_model_type = 'required';
|
||||||
|
// this could be a LOT more robust
|
||||||
|
$this->info['li']->auto_close = array('li' => true);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
41
library/HTMLPurifier/HTMLModule/Presentation.php
Normal file
41
library/HTMLPurifier/HTMLModule/Presentation.php
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Presentation Module, defines simple presentation-related
|
||||||
|
* markup. Text Extension Module.
|
||||||
|
* @note The official XML Schema and DTD specs further divide this into
|
||||||
|
* two modules:
|
||||||
|
* - Block Presentation (hr)
|
||||||
|
* - Inline Presentation (b, big, i, small, sub, sup, tt)
|
||||||
|
* We have chosen not to heed this distinction, as content_sets
|
||||||
|
* provides satisfactory disambiguation.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Presentation';
|
||||||
|
var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array(
|
||||||
|
'Block' => 'hr',
|
||||||
|
'Inline' => 'b | big | i | small | sub | sup | tt'
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Presentation() {
|
||||||
|
foreach ($this->elements as $element) {
|
||||||
|
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$element]->attr = array(0 => array('Common'));
|
||||||
|
if ($element == 'hr') {
|
||||||
|
$this->info[$element]->content_model_type = 'empty';
|
||||||
|
} else {
|
||||||
|
$this->info[$element]->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info[$element]->content_model_type = 'optional';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
27
library/HTMLPurifier/HTMLModule/StyleAttribute.php
Normal file
27
library/HTMLPurifier/HTMLModule/StyleAttribute.php
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
|
||||||
|
* Module.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'StyleAttribute';
|
||||||
|
var $attr_collections = array(
|
||||||
|
// The inclusion routine differs from the Abstract Modules but
|
||||||
|
// is in line with the DTD and XML Schemas.
|
||||||
|
'Style' => array('style' => false), // see constructor
|
||||||
|
'Core' => array(0 => array('Style'))
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_StyleAttribute() {
|
||||||
|
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
88
library/HTMLPurifier/HTMLModule/Tables.php
Normal file
88
library/HTMLPurifier/HTMLModule/Tables.php
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Tables Module, fully defines accessible table elements.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Tables';
|
||||||
|
var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
|
||||||
|
'colgroup', 'tbody', 'thead', 'tfoot');
|
||||||
|
var $info = array();
|
||||||
|
var $content_sets = array('Block' => 'table');
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Tables() {
|
||||||
|
foreach ($this->elements as $e) {
|
||||||
|
$this->info[$e] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$e]->attr = array(0 => array('Common'));
|
||||||
|
$attr =& $this->info[$e]->attr;
|
||||||
|
if ($e == 'caption') continue;
|
||||||
|
if ($e == 'table'){
|
||||||
|
$attr['border'] = 'Pixels';
|
||||||
|
$attr['cellpadding'] = 'Length';
|
||||||
|
$attr['cellspacing'] = 'Length';
|
||||||
|
$attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
|
'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
|
||||||
|
'vsides', 'box', 'border'
|
||||||
|
), false);
|
||||||
|
$attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
|
'none', 'groups', 'rows', 'cols', 'all'
|
||||||
|
), false);
|
||||||
|
$attr['summary'] = 'Text';
|
||||||
|
$attr['width'] = 'Length';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($e == 'col' || $e == 'colgroup') {
|
||||||
|
$attr['span'] = 'Number';
|
||||||
|
$attr['width'] = 'MultiLength';
|
||||||
|
}
|
||||||
|
if ($e == 'td' || $e == 'th') {
|
||||||
|
$attr['abbr'] = 'Text';
|
||||||
|
$attr['colspan'] = 'Number';
|
||||||
|
$attr['rowspan'] = 'Number';
|
||||||
|
}
|
||||||
|
$attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
|
'left', 'center', 'right', 'justify', 'char'
|
||||||
|
), false);
|
||||||
|
$attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
|
'top', 'middle', 'bottom', 'baseline'
|
||||||
|
), false);
|
||||||
|
$attr['charoff'] = 'Length';
|
||||||
|
}
|
||||||
|
$this->info['caption']->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info['caption']->content_model_type = 'optional';
|
||||||
|
|
||||||
|
// Is done directly because it doesn't leverage substitution
|
||||||
|
// mechanisms. True model is:
|
||||||
|
// 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
|
||||||
|
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||||
|
|
||||||
|
$this->info['td']->content_model =
|
||||||
|
$this->info['th']->content_model = '#PCDATA | Flow';
|
||||||
|
$this->info['td']->content_model_type =
|
||||||
|
$this->info['th']->content_model_type = 'optional';
|
||||||
|
|
||||||
|
$this->info['tr']->content_model = 'td | th';
|
||||||
|
$this->info['tr']->content_model_type = 'required';
|
||||||
|
|
||||||
|
$this->info['col']->content_model_type = 'empty';
|
||||||
|
|
||||||
|
$this->info['colgroup']->content_model = 'col';
|
||||||
|
$this->info['colgroup']->content_model_type = 'optional';
|
||||||
|
|
||||||
|
$this->info['tbody']->content_model =
|
||||||
|
$this->info['thead']->content_model =
|
||||||
|
$this->info['tfoot']->content_model = 'tr';
|
||||||
|
$this->info['tbody']->content_model_type =
|
||||||
|
$this->info['thead']->content_model_type =
|
||||||
|
$this->info['tfoot']->content_model_type = 'required';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
78
library/HTMLPurifier/HTMLModule/Text.php
Normal file
78
library/HTMLPurifier/HTMLModule/Text.php
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Text Module, defines basic text containers. Core Module.
|
||||||
|
* @note In the normative XML Schema specification, this module
|
||||||
|
* is further abstracted into the following modules:
|
||||||
|
* - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
|
||||||
|
* - Block Structural (div, p)
|
||||||
|
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
|
||||||
|
* - Inline Structural (br, span)
|
||||||
|
* We have elected not to follow suite, but this may change.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Text';
|
||||||
|
|
||||||
|
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
|
||||||
|
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
|
||||||
|
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
|
||||||
|
'var');
|
||||||
|
|
||||||
|
var $info = array();
|
||||||
|
|
||||||
|
var $content_sets = array(
|
||||||
|
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
|
||||||
|
'Block' => 'address | blockquote | div | p | pre',
|
||||||
|
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
|
||||||
|
'Flow' => 'Heading | Block | Inline'
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_Text() {
|
||||||
|
foreach ($this->elements as $element) {
|
||||||
|
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||||
|
// attributes
|
||||||
|
if ($element == 'br') {
|
||||||
|
$this->info[$element]->attr = array(0 => array('Core'));
|
||||||
|
} elseif ($element == 'blockquote' || $element == 'q') {
|
||||||
|
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
|
||||||
|
} else {
|
||||||
|
$this->info[$element]->attr = array(0 => array('Common'));
|
||||||
|
}
|
||||||
|
// content models
|
||||||
|
if ($element == 'br') {
|
||||||
|
$this->info[$element]->content_model_type = 'empty';
|
||||||
|
} elseif ($element == 'blockquote') {
|
||||||
|
$this->info[$element]->content_model = 'Heading | Block | List';
|
||||||
|
$this->info[$element]->content_model_type = 'optional';
|
||||||
|
} elseif ($element == 'div') {
|
||||||
|
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||||
|
$this->info[$element]->content_model_type = 'optional';
|
||||||
|
} else {
|
||||||
|
$this->info[$element]->content_model = '#PCDATA | Inline';
|
||||||
|
$this->info[$element]->content_model_type = 'optional';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// SGML permits exclusions for all descendants, but this is
|
||||||
|
// not possible with DTDs or XML Schemas. W3C has elected to
|
||||||
|
// use complicated compositions of content_models to simulate
|
||||||
|
// exclusion for children, but we go the simpler, SGML-style
|
||||||
|
// route of flat-out exclusions. Note that the Abstract Module
|
||||||
|
// is blithely unaware of such distinctions.
|
||||||
|
$this->info['pre']->excludes = array_flip(array(
|
||||||
|
'img', 'big', 'small',
|
||||||
|
'object', 'applet', 'font', 'basefont' // generally not allowed
|
||||||
|
));
|
||||||
|
$this->info['p']->auto_close = array_flip(array(
|
||||||
|
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
||||||
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
||||||
|
'table', 'ul'
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
86
library/HTMLPurifier/HTMLModule/TransformToStrict.php
Normal file
86
library/HTMLPurifier/HTMLModule/TransformToStrict.php
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/TagTransform/Simple.php';
|
||||||
|
require_once 'HTMLPurifier/TagTransform/Center.php';
|
||||||
|
require_once 'HTMLPurifier/TagTransform/Font.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrTransform/Lang.php';
|
||||||
|
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Proprietary module that transforms deprecated elements into Strict
|
||||||
|
* HTML (see HTML 4.01 and XHTML 1.0) when possible.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'TransformToStrict';
|
||||||
|
|
||||||
|
// we're actually modifying these elements, not defining them
|
||||||
|
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
|
||||||
|
|
||||||
|
var $info_tag_transform = array(
|
||||||
|
// placeholders, see constructor for definitions
|
||||||
|
'font' => false,
|
||||||
|
'menu' => false,
|
||||||
|
'dir' => false,
|
||||||
|
'center'=> false
|
||||||
|
);
|
||||||
|
|
||||||
|
var $attr_collections = array(
|
||||||
|
'Lang' => array(
|
||||||
|
'lang' => false // placeholder
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
var $info_attr_transform_post = array(
|
||||||
|
'lang' => false // placeholder
|
||||||
|
);
|
||||||
|
|
||||||
|
function HTMLPurifier_HTMLModule_TransformToStrict() {
|
||||||
|
|
||||||
|
// deprecated tag transforms
|
||||||
|
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
||||||
|
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||||
|
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||||
|
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
|
||||||
|
|
||||||
|
foreach ($this->elements as $name) {
|
||||||
|
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||||
|
$this->info[$name]->standalone = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// deprecated attribute transforms
|
||||||
|
$this->info['h1']->attr_transform_pre['align'] =
|
||||||
|
$this->info['h2']->attr_transform_pre['align'] =
|
||||||
|
$this->info['h3']->attr_transform_pre['align'] =
|
||||||
|
$this->info['h4']->attr_transform_pre['align'] =
|
||||||
|
$this->info['h5']->attr_transform_pre['align'] =
|
||||||
|
$this->info['h6']->attr_transform_pre['align'] =
|
||||||
|
$this->info['p'] ->attr_transform_pre['align'] =
|
||||||
|
new HTMLPurifier_AttrTransform_TextAlign();
|
||||||
|
|
||||||
|
// xml:lang <=> lang mirroring, implement in TransformToStrict,
|
||||||
|
// this is overridden in TransformToXHTML11
|
||||||
|
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
|
||||||
|
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
|
||||||
|
|
||||||
|
// this should not be applied to XHTML 1.0 Transitional, ONLY
|
||||||
|
// XHTML 1.0 Strict. We may need three classes
|
||||||
|
$this->info['blockquote']->content_model_type = 'strictblockquote';
|
||||||
|
$this->info['blockquote']->child = false; // recalculate please!
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
var $defines_child_def = true;
|
||||||
|
function getChildDef($def) {
|
||||||
|
if ($def->content_model_type != 'strictblockquote') return false;
|
||||||
|
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
30
library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
Normal file
30
library/HTMLPurifier/HTMLModule/TransformToXHTML11.php
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Proprietary module that transforms XHTML 1.0 deprecated aspects into
|
||||||
|
* XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
|
||||||
|
* HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
|
||||||
|
* (otherwise, elements that were deprecated from Transitional to Strict
|
||||||
|
* will not be transformed).
|
||||||
|
*
|
||||||
|
* XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
|
||||||
|
* although they may not be as friendly to legacy browsers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'TransformToXHTML11';
|
||||||
|
var $attr_collections = array(
|
||||||
|
'Lang' => array(
|
||||||
|
'lang' => false // remove it
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
var $info_attr_transform_post = array(
|
||||||
|
'lang' => false // remove it
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
558
library/HTMLPurifier/HTMLModuleManager.php
Normal file
558
library/HTMLPurifier/HTMLModuleManager.php
Normal file
@ -0,0 +1,558 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
require_once 'HTMLPurifier/ElementDef.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ContentSets.php';
|
||||||
|
require_once 'HTMLPurifier/AttrTypes.php';
|
||||||
|
require_once 'HTMLPurifier/AttrCollections.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||||
|
|
||||||
|
// W3C modules
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Text.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/List.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Edit.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Bdo.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Tables.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Image.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||||
|
|
||||||
|
// proprietary modules
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'Doctype', null, 'string/null',
|
||||||
|
'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '.
|
||||||
|
'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
|
||||||
|
'Technically speaking this is not actually a doctype (as it does '.
|
||||||
|
'not identify a corresponding DTD), but we are using this name '.
|
||||||
|
'for sake of simplicity. This will override any older directives '.
|
||||||
|
'like %Core.XHTML or %HTML.Strict.'
|
||||||
|
);
|
||||||
|
|
||||||
|
class HTMLPurifier_HTMLModuleManager
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Array of HTMLPurifier_Module instances, indexed by module's class name.
|
||||||
|
* All known modules, regardless of use, are in this array.
|
||||||
|
*/
|
||||||
|
var $modules = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String doctype we will validate against. See $validModules for use.
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* There is a special doctype '*' that acts both as the "default"
|
||||||
|
* doctype if a customized system only defines one doctype and
|
||||||
|
* also a catch-all doctype that gets merged into all the other
|
||||||
|
* module collections. When possible, use a private collection to
|
||||||
|
* share modules between doctypes: this special doctype is to
|
||||||
|
* make life more convenient for users.
|
||||||
|
*/
|
||||||
|
var $doctype;
|
||||||
|
var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array: $collections[$type][$doctype] = list of modules.
|
||||||
|
* This is used to logically separate types of functionality so that
|
||||||
|
* based on the doctype and other configuration settings they may
|
||||||
|
* be easily switched and on and off. Custom setups may not need
|
||||||
|
* to use this abstraction, opting to have only one big collection
|
||||||
|
* with one valid doctype.
|
||||||
|
*/
|
||||||
|
var $collections = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Modules that may be used in a valid doctype of this kind.
|
||||||
|
* Correctional and leniency modules should not be placed in this
|
||||||
|
* array unless the user said so: don't stuff every possible lenient
|
||||||
|
* module for this doctype in here.
|
||||||
|
*/
|
||||||
|
var $validModules = array();
|
||||||
|
var $validCollections = array(); /**< Collections to merge into $validModules */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Modules that we will allow in input, subset of $validModules. Single
|
||||||
|
* element definitions may result in us consulting validModules.
|
||||||
|
*/
|
||||||
|
var $activeModules = array();
|
||||||
|
var $activeCollections = array(); /**< Collections to merge into $activeModules */
|
||||||
|
|
||||||
|
var $counter = 0; /**< Designates next available integer order for modules. */
|
||||||
|
var $initialized = false; /**< Says whether initialize() was called */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies what doctype to siphon new modules from addModule() to,
|
||||||
|
* or false to disable the functionality. Must be used in conjunction
|
||||||
|
* with $autoCollection.
|
||||||
|
*/
|
||||||
|
var $autoDoctype = false;
|
||||||
|
/**
|
||||||
|
* Specifies what collection to siphon new modules from addModule() to,
|
||||||
|
* or false to disable the functionality. Must be used in conjunction
|
||||||
|
* with $autoCollection.
|
||||||
|
*/
|
||||||
|
var $autoCollection = false;
|
||||||
|
|
||||||
|
/** Associative array of element name to defining modules (always array) */
|
||||||
|
var $elementLookup = array();
|
||||||
|
|
||||||
|
/** List of prefixes we should use for resolving small names */
|
||||||
|
var $prefixes = array('HTMLPurifier_HTMLModule_');
|
||||||
|
|
||||||
|
var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
|
||||||
|
var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
|
||||||
|
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $blank If true, don't do any initializing
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_HTMLModuleManager($blank = false) {
|
||||||
|
|
||||||
|
// the only editable internal object. The rest need to
|
||||||
|
// be manipulated through modules
|
||||||
|
$this->attrTypes = new HTMLPurifier_AttrTypes();
|
||||||
|
|
||||||
|
if (!$blank) $this->initialize();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function initialize() {
|
||||||
|
$this->initialized = true;
|
||||||
|
|
||||||
|
// load default modules to the recognized modules list (not active)
|
||||||
|
$modules = array(
|
||||||
|
// define
|
||||||
|
'CommonAttributes',
|
||||||
|
'Text', 'Hypertext', 'List', 'Presentation',
|
||||||
|
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
|
||||||
|
// define-redefine
|
||||||
|
'Legacy',
|
||||||
|
// redefine
|
||||||
|
'TransformToStrict', 'TransformToXHTML11'
|
||||||
|
);
|
||||||
|
foreach ($modules as $module) {
|
||||||
|
$this->addModule($module);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe modules for supported doctypes. These are included
|
||||||
|
// in the valid and active module lists by default
|
||||||
|
$this->collections['Safe'] = array(
|
||||||
|
'_Common' => array( // leading _ indicates private
|
||||||
|
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
||||||
|
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||||
|
'StyleAttribute'
|
||||||
|
),
|
||||||
|
// HTML definitions, defer to XHTML definitions
|
||||||
|
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
|
||||||
|
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
|
||||||
|
// XHTML definitions
|
||||||
|
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
|
||||||
|
'XHTML 1.0 Strict' => array(array('_Common')),
|
||||||
|
'XHTML 1.1' => array(array('_Common')),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Modules that specify elements that are unsafe from untrusted
|
||||||
|
// third-parties. These should be registered in $validModules but
|
||||||
|
// almost never $activeModules unless you really know what you're
|
||||||
|
// doing.
|
||||||
|
$this->collections['Unsafe'] = array();
|
||||||
|
|
||||||
|
// Modules to import if lenient mode (attempt to convert everything
|
||||||
|
// to a valid representation) is on. These must not be in $validModules
|
||||||
|
// unless specified so.
|
||||||
|
$this->collections['Lenient'] = array(
|
||||||
|
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
|
||||||
|
'XHTML 1.0 Strict' => array('TransformToStrict'),
|
||||||
|
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Modules to import if correctional mode (correct everything that
|
||||||
|
// is feasible to strict mode) is on. These must not be in $validModules
|
||||||
|
// unless specified so.
|
||||||
|
$this->collections['Correctional'] = array(
|
||||||
|
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
|
||||||
|
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
|
||||||
|
);
|
||||||
|
|
||||||
|
// User-space modules, custom code or whatever
|
||||||
|
$this->collections['Extension'] = array();
|
||||||
|
|
||||||
|
// setup active versus valid modules. ORDER IS IMPORTANT!
|
||||||
|
// definition modules
|
||||||
|
$this->makeCollectionActive('Safe');
|
||||||
|
$this->makeCollectionValid('Unsafe');
|
||||||
|
// redefinition modules
|
||||||
|
$this->makeCollectionActive('Lenient');
|
||||||
|
$this->makeCollectionActive('Correctional');
|
||||||
|
|
||||||
|
$this->autoDoctype = '*';
|
||||||
|
$this->autoCollection = 'Extension';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a module to the recognized module list. This does not
|
||||||
|
* do anything else: the module must be added to a corresponding
|
||||||
|
* collection to be "activated".
|
||||||
|
* @param $module Mixed: string module name, with or without
|
||||||
|
* HTMLPurifier_HTMLModule prefix, or instance of
|
||||||
|
* subclass of HTMLPurifier_HTMLModule.
|
||||||
|
*/
|
||||||
|
function addModule($module) {
|
||||||
|
if (is_string($module)) {
|
||||||
|
$original_module = $module;
|
||||||
|
if (!class_exists($module)) {
|
||||||
|
foreach ($this->prefixes as $prefix) {
|
||||||
|
$module = $prefix . $original_module;
|
||||||
|
if (class_exists($module)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!class_exists($module)) {
|
||||||
|
trigger_error($original_module . ' module does not exist',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$module = new $module();
|
||||||
|
}
|
||||||
|
$module->order = $this->counter++; // assign then increment
|
||||||
|
$this->modules[$module->name] = $module;
|
||||||
|
if ($this->autoDoctype !== false && $this->autoCollection !== false) {
|
||||||
|
$this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a collection active, while also making it valid if not
|
||||||
|
* already done so. See $activeModules for the semantics of "active".
|
||||||
|
* @param $collection_name Name of collection to activate
|
||||||
|
*/
|
||||||
|
function makeCollectionActive($collection_name) {
|
||||||
|
if (!in_array($collection_name, $this->validCollections)) {
|
||||||
|
$this->makeCollectionValid($collection_name);
|
||||||
|
}
|
||||||
|
$this->activeCollections[] = $collection_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a collection valid. See $validModules for the semantics of "valid"
|
||||||
|
*/
|
||||||
|
function makeCollectionValid($collection_name) {
|
||||||
|
$this->validCollections[] = $collection_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a class prefix that addModule() will use to resolve a
|
||||||
|
* string name to a concrete class
|
||||||
|
*/
|
||||||
|
function addPrefix($prefix) {
|
||||||
|
$this->prefixes[] = (string) $prefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
function setup($config) {
|
||||||
|
|
||||||
|
// load up the autocollection
|
||||||
|
if ($this->autoCollection !== false) {
|
||||||
|
$this->makeCollectionActive($this->autoCollection);
|
||||||
|
}
|
||||||
|
|
||||||
|
// retrieve the doctype
|
||||||
|
$this->doctype = $this->getDoctype($config);
|
||||||
|
if (isset($this->doctypeAliases[$this->doctype])) {
|
||||||
|
$this->doctype = $this->doctypeAliases[$this->doctype];
|
||||||
|
}
|
||||||
|
|
||||||
|
// process module collections to module name => module instance form
|
||||||
|
foreach ($this->collections as $col_i => $x) {
|
||||||
|
$this->processCollections($this->collections[$col_i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->validModules = $this->assembleModules($this->validCollections);
|
||||||
|
$this->activeModules = $this->assembleModules($this->activeCollections);
|
||||||
|
|
||||||
|
// setup lookup table based on all valid modules
|
||||||
|
foreach ($this->validModules as $module) {
|
||||||
|
foreach ($module->info as $name => $def) {
|
||||||
|
if (!isset($this->elementLookup[$name])) {
|
||||||
|
$this->elementLookup[$name] = array();
|
||||||
|
}
|
||||||
|
$this->elementLookup[$name][] = $module->name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// note the different choice
|
||||||
|
$this->contentSets = new HTMLPurifier_ContentSets(
|
||||||
|
// content models that contain non-allowed elements are
|
||||||
|
// harmless because RemoveForeignElements will ensure
|
||||||
|
// they never get in anyway, and there is usually no
|
||||||
|
// reason why you should want to restrict a content
|
||||||
|
// model beyond what is mandated by the doctype.
|
||||||
|
// Note, however, that this means redefinitions of
|
||||||
|
// content models can't be tossed in validModels willy-nilly:
|
||||||
|
// that stuff still is regulated by configuration.
|
||||||
|
$this->validModules
|
||||||
|
);
|
||||||
|
$this->attrCollections = new HTMLPurifier_AttrCollections(
|
||||||
|
$this->attrTypes,
|
||||||
|
// only explicitly allowed modules are allowed to affect
|
||||||
|
// the global attribute collections. This mean's there's
|
||||||
|
// a distinction between loading the Bdo module, and the
|
||||||
|
// bdo element: Bdo will enable the dir attribute on all
|
||||||
|
// elements, while bdo will only define the bdo element,
|
||||||
|
// which will not have an editable directionality. This might
|
||||||
|
// catch people who are loading only elements by surprise, so
|
||||||
|
// we should consider loading an entire module if all the
|
||||||
|
// elements it defines are requested by the user, especially
|
||||||
|
// if it affects the global attribute collections.
|
||||||
|
$this->activeModules
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a list of collections and merges together all the defined
|
||||||
|
* modules for the current doctype from those collections.
|
||||||
|
* @param $collections List of collection suffixes we should grab
|
||||||
|
* modules from (like 'Safe' or 'Lenient')
|
||||||
|
*/
|
||||||
|
function assembleModules($collections) {
|
||||||
|
$modules = array();
|
||||||
|
$numOfCollectionsUsed = 0;
|
||||||
|
foreach ($collections as $name) {
|
||||||
|
$disable_global = false;
|
||||||
|
if (!isset($this->collections[$name])) {
|
||||||
|
trigger_error("$name collection is undefined", E_USER_ERROR);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$cols = $this->collections[$name];
|
||||||
|
if (isset($cols[$this->doctype])) {
|
||||||
|
if (isset($cols[$this->doctype]['*'])) {
|
||||||
|
unset($cols[$this->doctype]['*']);
|
||||||
|
$disable_global = true;
|
||||||
|
}
|
||||||
|
$modules += $cols[$this->doctype];
|
||||||
|
$numOfCollectionsUsed++;
|
||||||
|
}
|
||||||
|
// accept catch-all doctype
|
||||||
|
if (
|
||||||
|
$this->doctype !== '*' &&
|
||||||
|
isset($cols['*']) &&
|
||||||
|
!$disable_global
|
||||||
|
) {
|
||||||
|
$modules += $cols['*'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($numOfCollectionsUsed < 1) {
|
||||||
|
// possible XSS injection if user-specified doctypes
|
||||||
|
// are allowed
|
||||||
|
trigger_error("Doctype {$this->doctype} does not exist, ".
|
||||||
|
"check for typos (if you desire a doctype that allows ".
|
||||||
|
"no elements, use an empty array collection)", E_USER_ERROR);
|
||||||
|
}
|
||||||
|
return $modules;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a collection and performs inclusions and substitutions for it.
|
||||||
|
* @param $cols Reference to collections class member variable
|
||||||
|
*/
|
||||||
|
function processCollections(&$cols) {
|
||||||
|
|
||||||
|
// $cols is the set of collections
|
||||||
|
// $col_i is the name (index) of a collection
|
||||||
|
// $col is a collection/list of modules
|
||||||
|
|
||||||
|
// perform inclusions
|
||||||
|
foreach ($cols as $col_i => $col) {
|
||||||
|
$seen = array();
|
||||||
|
if (!empty($col[0]) && is_array($col[0])) {
|
||||||
|
$seen[$col_i] = true; // recursion reporting
|
||||||
|
$includes = $col[0];
|
||||||
|
unset($cols[$col_i][0]); // remove inclusions value, recursion guard
|
||||||
|
} else {
|
||||||
|
$includes = array();
|
||||||
|
}
|
||||||
|
if (empty($includes)) continue;
|
||||||
|
for ($i = 0; isset($includes[$i]); $i++) {
|
||||||
|
$inc = $includes[$i];
|
||||||
|
if (isset($seen[$inc])) {
|
||||||
|
trigger_error(
|
||||||
|
"Circular inclusion detected in $col_i collection",
|
||||||
|
E_USER_ERROR
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
$seen[$inc] = true;
|
||||||
|
}
|
||||||
|
if (!isset($cols[$inc])) {
|
||||||
|
trigger_error(
|
||||||
|
"Collection $col_i tried to include undefined ".
|
||||||
|
"collection $inc", E_USER_ERROR);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
foreach ($cols[$inc] as $module) {
|
||||||
|
if (is_array($module)) { // another inclusion!
|
||||||
|
foreach ($module as $inc2) $includes[] = $inc2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$cols[$col_i][] = $module; // merge in the other modules
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// replace with real modules, invert module from list to
|
||||||
|
// assoc array of module name to module instance
|
||||||
|
foreach ($cols as $col_i => $col) {
|
||||||
|
$ignore_global = false;
|
||||||
|
$order = array();
|
||||||
|
foreach ($col as $module_i => $module) {
|
||||||
|
unset($cols[$col_i][$module_i]);
|
||||||
|
if (is_array($module)) {
|
||||||
|
trigger_error("Illegal inclusion array at index".
|
||||||
|
" $module_i found collection $col_i, inclusion".
|
||||||
|
" arrays must be at start of collection (index 0)",
|
||||||
|
E_USER_ERROR);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($module_i === '*' && $module === false) {
|
||||||
|
$ignore_global = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!isset($this->modules[$module])) {
|
||||||
|
trigger_error(
|
||||||
|
"Collection $col_i references undefined ".
|
||||||
|
"module $module",
|
||||||
|
E_USER_ERROR
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$module = $this->modules[$module];
|
||||||
|
$cols[$col_i][$module->name] = $module;
|
||||||
|
$order[$module->name] = $module->order;
|
||||||
|
}
|
||||||
|
array_multisort(
|
||||||
|
$order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
|
||||||
|
);
|
||||||
|
if ($ignore_global) $cols[$col_i]['*'] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete pseudo-collections
|
||||||
|
foreach ($cols as $col_i => $col) {
|
||||||
|
if ($col_i[0] == '_') unset($cols[$col_i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the doctype from the configuration object
|
||||||
|
*/
|
||||||
|
function getDoctype($config) {
|
||||||
|
$doctype = $config->get('HTML', 'Doctype');
|
||||||
|
if ($doctype !== null) {
|
||||||
|
return $doctype;
|
||||||
|
}
|
||||||
|
if (!$this->initialized) {
|
||||||
|
// don't do HTML-oriented backwards compatibility stuff
|
||||||
|
// use either the auto-doctype, or the catch-all doctype
|
||||||
|
return $this->autoDoctype ? $this->autoDoctype : '*';
|
||||||
|
}
|
||||||
|
// this is backwards-compatibility stuff
|
||||||
|
if ($config->get('Core', 'XHTML')) {
|
||||||
|
$doctype = 'XHTML 1.0';
|
||||||
|
} else {
|
||||||
|
$doctype = 'HTML 4.01';
|
||||||
|
}
|
||||||
|
if ($config->get('HTML', 'Strict')) {
|
||||||
|
$doctype .= ' Strict';
|
||||||
|
} else {
|
||||||
|
$doctype .= ' Transitional';
|
||||||
|
}
|
||||||
|
return $doctype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves merged element definitions for all active elements.
|
||||||
|
* @note We may want to generate an elements array during setup
|
||||||
|
* and pass that on, because a specific combination of
|
||||||
|
* elements may trigger the loading of a module.
|
||||||
|
* @param $config Instance of HTMLPurifier_Config, for determining
|
||||||
|
* stray elements.
|
||||||
|
*/
|
||||||
|
function getElements($config) {
|
||||||
|
|
||||||
|
$elements = array();
|
||||||
|
foreach ($this->activeModules as $module) {
|
||||||
|
foreach ($module->elements as $name) {
|
||||||
|
$elements[$name] = $this->getElement($name, $config);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// standalone elements now loaded
|
||||||
|
|
||||||
|
return $elements;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a single merged element definition
|
||||||
|
* @param $name Name of element
|
||||||
|
* @param $config Instance of HTMLPurifier_Config, may not be necessary.
|
||||||
|
*/
|
||||||
|
function getElement($name, $config) {
|
||||||
|
|
||||||
|
$def = false;
|
||||||
|
|
||||||
|
$modules = $this->validModules;
|
||||||
|
|
||||||
|
if (!isset($this->elementLookup[$name])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($this->elementLookup[$name] as $module_name) {
|
||||||
|
|
||||||
|
$module = $modules[$module_name];
|
||||||
|
$new_def = $module->info[$name];
|
||||||
|
|
||||||
|
if (!$def && $new_def->standalone) {
|
||||||
|
$def = $new_def;
|
||||||
|
} elseif ($def) {
|
||||||
|
$def->mergeIn($new_def);
|
||||||
|
} else {
|
||||||
|
// could "save it for another day":
|
||||||
|
// non-standalone definitions that don't have a standalone
|
||||||
|
// to merge into could be deferred to the end
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// attribute value expansions
|
||||||
|
$this->attrCollections->performInclusions($def->attr);
|
||||||
|
$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
|
||||||
|
|
||||||
|
// descendants_are_inline, for ChildDef_Chameleon
|
||||||
|
if (is_string($def->content_model) &&
|
||||||
|
strpos($def->content_model, 'Inline') !== false) {
|
||||||
|
if ($name != 'del' && $name != 'ins') {
|
||||||
|
// this is for you, ins/del
|
||||||
|
$def->descendants_are_inline = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->contentSets->generateChildDef($def, $module);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $def;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
56
library/HTMLPurifier/Language.php
Normal file
56
library/HTMLPurifier/Language.php
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_Language
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ISO 639 language code of language. Prefers shortest possible version
|
||||||
|
*/
|
||||||
|
var $code = 'en';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback language code
|
||||||
|
*/
|
||||||
|
var $fallback = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Array of localizable messages
|
||||||
|
*/
|
||||||
|
var $messages = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Has the language object been loaded yet?
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
var $_loaded = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads language object with necessary info from factory cache
|
||||||
|
* @note This is a lazy loader
|
||||||
|
*/
|
||||||
|
function load() {
|
||||||
|
if ($this->_loaded) return;
|
||||||
|
$factory = HTMLPurifier_LanguageFactory::instance();
|
||||||
|
$factory->loadLanguage($this->code);
|
||||||
|
foreach ($factory->keys as $key) {
|
||||||
|
$this->$key = $factory->cache[$this->code][$key];
|
||||||
|
}
|
||||||
|
$this->_loaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a localised message. Does not perform any operations.
|
||||||
|
* @param $key string identifier of message
|
||||||
|
* @return string localised message
|
||||||
|
*/
|
||||||
|
function getMessage($key) {
|
||||||
|
if (!$this->_loaded) $this->load();
|
||||||
|
if (!isset($this->messages[$key])) return '';
|
||||||
|
return $this->messages[$key];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
12
library/HTMLPurifier/Language/classes/en-x-test.php
Normal file
12
library/HTMLPurifier/Language/classes/en-x-test.php
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// private class for unit testing
|
||||||
|
|
||||||
|
class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
11
library/HTMLPurifier/Language/messages/en-x-test.php
Normal file
11
library/HTMLPurifier/Language/messages/en-x-test.php
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// private language message file for unit testing purposes
|
||||||
|
|
||||||
|
$fallback = 'en';
|
||||||
|
|
||||||
|
$messages = array(
|
||||||
|
'htmlpurifier' => 'HTML Purifier X'
|
||||||
|
);
|
||||||
|
|
||||||
|
?>
|
12
library/HTMLPurifier/Language/messages/en.php
Normal file
12
library/HTMLPurifier/Language/messages/en.php
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
$fallback = false;
|
||||||
|
|
||||||
|
$messages = array(
|
||||||
|
|
||||||
|
'htmlpurifier' => 'HTML Purifier',
|
||||||
|
'pizza' => 'Pizza', // for unit testing purposes
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
?>
|
196
library/HTMLPurifier/LanguageFactory.php
Normal file
196
library/HTMLPurifier/LanguageFactory.php
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Language.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class responsible for generating HTMLPurifier_Language objects, managing
|
||||||
|
* caching and fallbacks.
|
||||||
|
* @note Thanks to MediaWiki for the general logic, although this version
|
||||||
|
* has been entirely rewritten
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_LanguageFactory
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache of language code information used to load HTMLPurifier_Language objects
|
||||||
|
* Structure is: $factory->cache[$language_code][$key] = $value
|
||||||
|
* @value array map
|
||||||
|
*/
|
||||||
|
var $cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Valid keys in the HTMLPurifier_Language object. Designates which
|
||||||
|
* variables to slurp out of a message file.
|
||||||
|
* @value array list
|
||||||
|
*/
|
||||||
|
var $keys = array('fallback', 'messages');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
|
||||||
|
* @value object HTMLPurifier_AttrDef_Lang
|
||||||
|
*/
|
||||||
|
var $validator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cached copy of dirname(__FILE__), directory of current file without
|
||||||
|
* trailing slash
|
||||||
|
* @value string filename
|
||||||
|
*/
|
||||||
|
var $dir;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keys whose contents are a hash map and can be merged
|
||||||
|
* @value array lookup
|
||||||
|
*/
|
||||||
|
var $mergeable_keys_map = array('messages' => true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keys whose contents are a list and can be merged
|
||||||
|
* @value array lookup
|
||||||
|
*/
|
||||||
|
var $mergeable_keys_list = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve sole instance of the factory.
|
||||||
|
* @static
|
||||||
|
* @param $prototype Optional prototype to overload sole instance with,
|
||||||
|
* or bool true to reset to default factory.
|
||||||
|
*/
|
||||||
|
static function &instance($prototype = null) {
|
||||||
|
static $instance = null;
|
||||||
|
if ($prototype !== null) {
|
||||||
|
$instance = $prototype;
|
||||||
|
} elseif ($instance === null || $prototype == true) {
|
||||||
|
$instance = new HTMLPurifier_LanguageFactory();
|
||||||
|
$instance->setup();
|
||||||
|
}
|
||||||
|
return $instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets up the singleton, much like a constructor
|
||||||
|
* @note Prevents people from getting this outside of the singleton
|
||||||
|
*/
|
||||||
|
function setup() {
|
||||||
|
$this->validator = new HTMLPurifier_AttrDef_Lang();
|
||||||
|
$this->dir = dirname(__FILE__);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a language object, handles class fallbacks
|
||||||
|
* @param $code string language code
|
||||||
|
*/
|
||||||
|
function create($code) {
|
||||||
|
|
||||||
|
$config = $context = false; // hope it doesn't use these!
|
||||||
|
$code = $this->validator->validate($code, $config, $context);
|
||||||
|
if ($code === false) $code = 'en'; // malformed code becomes English
|
||||||
|
|
||||||
|
$pcode = str_replace('-', '_', $code); // make valid PHP classname
|
||||||
|
static $depth = 0; // recursion protection
|
||||||
|
|
||||||
|
if ($code == 'en') {
|
||||||
|
$class = 'HTMLPurifier_Language';
|
||||||
|
$file = $this->dir . '/Language.php';
|
||||||
|
} else {
|
||||||
|
$class = 'HTMLPurifier_Language_' . $pcode;
|
||||||
|
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
||||||
|
// PHP5/APC deps bug workaround can go here
|
||||||
|
// you can bypass the conditional include by loading the
|
||||||
|
// file yourself
|
||||||
|
if (file_exists($file) && !class_exists($class)) {
|
||||||
|
include_once $file;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!class_exists($class)) {
|
||||||
|
// go fallback
|
||||||
|
$fallback = HTMLPurifier_Language::getFallbackFor($code);
|
||||||
|
$depth++;
|
||||||
|
$lang = Language::factory( $fallback );
|
||||||
|
$depth--;
|
||||||
|
} else {
|
||||||
|
$lang = new $class;
|
||||||
|
}
|
||||||
|
$lang->code = $code;
|
||||||
|
|
||||||
|
return $lang;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the fallback language for language
|
||||||
|
* @note Loads the original language into cache
|
||||||
|
* @param $code string language code
|
||||||
|
*/
|
||||||
|
function getFallbackFor($code) {
|
||||||
|
$this->loadLanguage($code);
|
||||||
|
return $this->cache[$code]['fallback'];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads language into the cache, handles message file and fallbacks
|
||||||
|
* @param $code string language code
|
||||||
|
*/
|
||||||
|
function loadLanguage($code) {
|
||||||
|
static $languages_seen = array(); // recursion guard
|
||||||
|
|
||||||
|
// abort if we've already loaded it
|
||||||
|
if (isset($this->cache[$code])) return;
|
||||||
|
|
||||||
|
// generate filename
|
||||||
|
$filename = $this->dir . '/Language/messages/' . $code . '.php';
|
||||||
|
|
||||||
|
// default fallback : may be overwritten by the ensuing include
|
||||||
|
$fallback = ($code != 'en') ? 'en' : false;
|
||||||
|
|
||||||
|
// load primary localisation
|
||||||
|
if (!file_exists($filename)) {
|
||||||
|
// skip the include: will rely solely on fallback
|
||||||
|
$filename = $this->dir . '/Language/messages/en.php';
|
||||||
|
$cache = array();
|
||||||
|
} else {
|
||||||
|
include $filename;
|
||||||
|
$cache = compact($this->keys);
|
||||||
|
}
|
||||||
|
|
||||||
|
// load fallback localisation
|
||||||
|
if (!empty($fallback)) {
|
||||||
|
|
||||||
|
// infinite recursion guard
|
||||||
|
if (isset($languages_seen[$code])) {
|
||||||
|
trigger_error('Circular fallback reference in language ' .
|
||||||
|
$code, E_USER_ERROR);
|
||||||
|
$fallback = 'en';
|
||||||
|
}
|
||||||
|
$language_seen[$code] = true;
|
||||||
|
|
||||||
|
// load the fallback recursively
|
||||||
|
$this->loadLanguage($fallback);
|
||||||
|
$fallback_cache = $this->cache[$fallback];
|
||||||
|
|
||||||
|
// merge fallback with current language
|
||||||
|
foreach ( $this->keys as $key ) {
|
||||||
|
if (isset($cache[$key]) && isset($fallback_cache[$key])) {
|
||||||
|
if (isset($this->mergeable_keys_map[$key])) {
|
||||||
|
$cache[$key] = $cache[$key] + $fallback_cache[$key];
|
||||||
|
} elseif (isset($this->mergeable_keys_list[$key])) {
|
||||||
|
$cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$cache[$key] = $fallback_cache[$key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// save to cache for later retrieval
|
||||||
|
$this->cache[$code] = $cache;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -151,7 +151,8 @@ class HTMLPurifier_Lexer
|
|||||||
$lexer = $prototype;
|
$lexer = $prototype;
|
||||||
}
|
}
|
||||||
if (empty($lexer)) {
|
if (empty($lexer)) {
|
||||||
if (class_exists('DOMDocument')) { // check for DOM support
|
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
||||||
|
class_exists('DOMDocument')) { // check for DOM support
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
$lexer = new HTMLPurifier_Lexer_DOMLex();
|
$lexer = new HTMLPurifier_Lexer_DOMLex();
|
||||||
} else {
|
} else {
|
||||||
|
@ -21,7 +21,7 @@ require_once 'HTMLPurifier/TokenFactory.php';
|
|||||||
*
|
*
|
||||||
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||||
* If this is a huge problem, due to the fact that HTML is hand
|
* If this is a huge problem, due to the fact that HTML is hand
|
||||||
* edited and youa re unable to get a parser cache that caches the
|
* edited and you are unable to get a parser cache that caches the
|
||||||
* the output of HTML Purifier while keeping the original HTML lying
|
* the output of HTML Purifier while keeping the original HTML lying
|
||||||
* around, you may want to run Tidy on the resulting output or use
|
* around, you may want to run Tidy on the resulting output or use
|
||||||
* HTMLPurifier_DirectLex
|
* HTMLPurifier_DirectLex
|
||||||
@ -54,7 +54,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
$doc->encoding = 'UTF-8'; // technically does nothing, but whatever
|
$doc->encoding = 'UTF-8'; // technically does nothing, but whatever
|
||||||
@$doc->loadHTML($string); // mute all errors, handle it transparently
|
|
||||||
|
// DOM will toss errors if the HTML its parsing has really big
|
||||||
|
// problems, so we're going to mute them. This can cause problems
|
||||||
|
// if a custom error handler that doesn't implement error_reporting
|
||||||
|
// is set, as noted by a Drupal plugin of HTML Purifier. Consider
|
||||||
|
// making our own error reporter to temporarily load in
|
||||||
|
@$doc->loadHTML($string);
|
||||||
|
|
||||||
$tokens = array();
|
$tokens = array();
|
||||||
$this->tokenizeDOM(
|
$this->tokenizeDOM(
|
||||||
|
@ -13,6 +13,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
function render($config) {
|
function render($config) {
|
||||||
$ret = '';
|
$ret = '';
|
||||||
$this->config =& $config;
|
$this->config =& $config;
|
||||||
|
|
||||||
$this->def = $config->getHTMLDefinition();
|
$this->def = $config->getHTMLDefinition();
|
||||||
$def =& $this->def;
|
$def =& $this->def;
|
||||||
|
|
||||||
@ -21,16 +22,14 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
$ret .= $this->element('caption', 'Environment');
|
$ret .= $this->element('caption', 'Environment');
|
||||||
|
|
||||||
$ret .= $this->row('Parent of fragment', $def->info_parent);
|
$ret .= $this->row('Parent of fragment', $def->info_parent);
|
||||||
$ret .= $this->row('Strict mode', $def->strict);
|
$ret .= $this->renderChildren($def->info_parent_def->child);
|
||||||
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
|
$ret .= $this->row('Block wrap name', $def->info_block_wrapper);
|
||||||
|
|
||||||
$ret .= $this->start('tr');
|
$ret .= $this->start('tr');
|
||||||
$ret .= $this->element('th', 'Global attributes');
|
$ret .= $this->element('th', 'Global attributes');
|
||||||
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
|
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
|
||||||
$ret .= $this->end('tr');
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
$ret .= $this->renderChildren($def->info_parent_def->child);
|
|
||||||
|
|
||||||
$ret .= $this->start('tr');
|
$ret .= $this->start('tr');
|
||||||
$ret .= $this->element('th', 'Tag transforms');
|
$ret .= $this->element('th', 'Tag transforms');
|
||||||
$list = array();
|
$list = array();
|
||||||
@ -81,8 +80,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
|
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
|
||||||
$ret .= $this->end('tr');
|
$ret .= $this->end('tr');
|
||||||
$ret .= $this->start('tr');
|
$ret .= $this->start('tr');
|
||||||
$ret .= $this->element('th', 'Type');
|
$ret .= $this->element('th', 'Inline content');
|
||||||
$ret .= $this->element('td', ucfirst($def->type));
|
$ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
|
||||||
$ret .= $this->end('tr');
|
$ret .= $this->end('tr');
|
||||||
if (!empty($def->excludes)) {
|
if (!empty($def->excludes)) {
|
||||||
$ret .= $this->start('tr');
|
$ret .= $this->start('tr');
|
||||||
@ -130,15 +129,17 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
$elements = array();
|
$elements = array();
|
||||||
$attr = array();
|
$attr = array();
|
||||||
if (isset($def->elements)) {
|
if (isset($def->elements)) {
|
||||||
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
|
if ($def->type == 'strictblockquote') {
|
||||||
|
$def->validateChildren(array(), $this->config, $context);
|
||||||
|
}
|
||||||
$elements = $def->elements;
|
$elements = $def->elements;
|
||||||
} elseif ($def->type == 'chameleon') {
|
} elseif ($def->type == 'chameleon') {
|
||||||
$attr['rowspan'] = 2;
|
$attr['rowspan'] = 2;
|
||||||
} elseif ($def->type == 'empty') {
|
} elseif ($def->type == 'empty') {
|
||||||
$elements = array();
|
$elements = array();
|
||||||
} elseif ($def->type == 'table') {
|
} elseif ($def->type == 'table') {
|
||||||
$elements = array('col', 'caption', 'colgroup', 'thead',
|
$elements = array_flip(array('col', 'caption', 'colgroup', 'thead',
|
||||||
'tfoot', 'tbody', 'tr');
|
'tfoot', 'tbody', 'tr'));
|
||||||
}
|
}
|
||||||
$ret .= $this->element('th', 'Allowed children', $attr);
|
$ret .= $this->element('th', 'Allowed children', $attr);
|
||||||
|
|
||||||
@ -167,6 +168,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
* @param $array Tag lookup array in form of array('tagname' => true)
|
* @param $array Tag lookup array in form of array('tagname' => true)
|
||||||
*/
|
*/
|
||||||
function listifyTagLookup($array) {
|
function listifyTagLookup($array) {
|
||||||
|
ksort($array);
|
||||||
$list = array();
|
$list = array();
|
||||||
foreach ($array as $name => $discard) {
|
foreach ($array as $name => $discard) {
|
||||||
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
|
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
|
||||||
@ -181,6 +183,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
* @todo Also add information about internal state
|
* @todo Also add information about internal state
|
||||||
*/
|
*/
|
||||||
function listifyObjectList($array) {
|
function listifyObjectList($array) {
|
||||||
|
ksort($array);
|
||||||
$list = array();
|
$list = array();
|
||||||
foreach ($array as $discard => $obj) {
|
foreach ($array as $discard => $obj) {
|
||||||
$list[] = $this->getClass($obj, 'AttrTransform_');
|
$list[] = $this->getClass($obj, 'AttrTransform_');
|
||||||
@ -193,6 +196,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
|
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
|
||||||
*/
|
*/
|
||||||
function listifyAttr($array) {
|
function listifyAttr($array) {
|
||||||
|
ksort($array);
|
||||||
$list = array();
|
$list = array();
|
||||||
foreach ($array as $name => $obj) {
|
foreach ($array as $name => $obj) {
|
||||||
if ($obj === false) continue;
|
if ($obj === false) continue;
|
||||||
|
@ -49,8 +49,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
||||||
|
|
||||||
// setup the context variables
|
// setup the context variables
|
||||||
$parent_type = 'unknown'; // reference var that we alter
|
$is_inline = false; // reference var that we alter
|
||||||
$context->register('ParentType', $parent_type);
|
$context->register('IsInline', $is_inline);
|
||||||
|
|
||||||
//####################################################################//
|
//####################################################################//
|
||||||
// Loop initialization
|
// Loop initialization
|
||||||
@ -115,11 +115,16 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
|
|
||||||
// calculate context
|
// calculate context
|
||||||
if (isset($parent_def)) {
|
if ($is_inline === false) {
|
||||||
$parent_type = $parent_def->type;
|
// check if conditions make it inline
|
||||||
|
if (!empty($parent_def) && $parent_def->descendants_are_inline) {
|
||||||
|
$is_inline = $count - 1;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// generally found in specialized elements like UL
|
// check if we're out of inline
|
||||||
$parent_type = 'unknown';
|
if ($count === $is_inline) {
|
||||||
|
$is_inline = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//################################################################//
|
//################################################################//
|
||||||
@ -273,7 +278,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
array_pop($tokens);
|
array_pop($tokens);
|
||||||
|
|
||||||
// remove context variables
|
// remove context variables
|
||||||
$context->destroy('ParentType');
|
$context->destroy('IsInline');
|
||||||
|
|
||||||
//####################################################################//
|
//####################################################################//
|
||||||
// Return
|
// Return
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once('HTMLPurifier/Token.php');
|
require_once 'HTMLPurifier/Token.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines a mutation of an obsolete tag into a valid tag.
|
* Defines a mutation of an obsolete tag into a valid tag.
|
||||||
@ -26,132 +26,4 @@ class HTMLPurifier_TagTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Simple transformation, just change tag name to something else.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param $transform_to Tag name to transform to.
|
|
||||||
*/
|
|
||||||
function HTMLPurifier_TagTransform_Simple($transform_to) {
|
|
||||||
$this->transform_to = $transform_to;
|
|
||||||
}
|
|
||||||
|
|
||||||
function transform($tag, $config, &$context) {
|
|
||||||
$new_tag = $tag->copy();
|
|
||||||
$new_tag->name = $this->transform_to;
|
|
||||||
return $new_tag;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transforms CENTER tags into proper version (DIV with text-align CSS)
|
|
||||||
*
|
|
||||||
* Takes a CENTER tag, parses the align attribute, and then if it's valid
|
|
||||||
* assigns it to the CSS property text-align.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
|
||||||
{
|
|
||||||
var $transform_to = 'div';
|
|
||||||
|
|
||||||
function transform($tag, $config, &$context) {
|
|
||||||
if ($tag->type == 'end') {
|
|
||||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
|
||||||
return $new_tag;
|
|
||||||
}
|
|
||||||
$attr = $tag->attr;
|
|
||||||
$prepend_css = 'text-align:center;';
|
|
||||||
if (isset($attr['style'])) {
|
|
||||||
$attr['style'] = $prepend_css . $attr['style'];
|
|
||||||
} else {
|
|
||||||
$attr['style'] = $prepend_css;
|
|
||||||
}
|
|
||||||
$new_tag = $tag->copy();
|
|
||||||
$new_tag->name = $this->transform_to;
|
|
||||||
$new_tag->attr = $attr;
|
|
||||||
return $new_tag;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transforms FONT tags to the proper form (SPAN with CSS styling)
|
|
||||||
*
|
|
||||||
* This transformation takes the three proprietary attributes of FONT and
|
|
||||||
* transforms them into their corresponding CSS attributes. These are color,
|
|
||||||
* face, and size.
|
|
||||||
*
|
|
||||||
* @note Size is an interesting case because it doesn't map cleanly to CSS.
|
|
||||||
* Thanks to
|
|
||||||
* http://style.cleverchimp.com/font_size_intervals/altintervals.html
|
|
||||||
* for reasonable mappings.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
|
||||||
{
|
|
||||||
|
|
||||||
var $transform_to = 'span';
|
|
||||||
|
|
||||||
var $_size_lookup = array(
|
|
||||||
'1' => 'xx-small',
|
|
||||||
'2' => 'small',
|
|
||||||
'3' => 'medium',
|
|
||||||
'4' => 'large',
|
|
||||||
'5' => 'x-large',
|
|
||||||
'6' => 'xx-large',
|
|
||||||
'7' => '300%',
|
|
||||||
'-1' => 'smaller',
|
|
||||||
'+1' => 'larger',
|
|
||||||
'-2' => '60%',
|
|
||||||
'+2' => '150%',
|
|
||||||
'+4' => '300%'
|
|
||||||
);
|
|
||||||
|
|
||||||
function transform($tag, $config, &$context) {
|
|
||||||
|
|
||||||
if ($tag->type == 'end') {
|
|
||||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
|
||||||
return $new_tag;
|
|
||||||
}
|
|
||||||
|
|
||||||
$attr = $tag->attr;
|
|
||||||
$prepend_style = '';
|
|
||||||
|
|
||||||
// handle color transform
|
|
||||||
if (isset($attr['color'])) {
|
|
||||||
$prepend_style .= 'color:' . $attr['color'] . ';';
|
|
||||||
unset($attr['color']);
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle face transform
|
|
||||||
if (isset($attr['face'])) {
|
|
||||||
$prepend_style .= 'font-family:' . $attr['face'] . ';';
|
|
||||||
unset($attr['face']);
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle size transform
|
|
||||||
if (isset($attr['size'])) {
|
|
||||||
if (isset($this->_size_lookup[$attr['size']])) {
|
|
||||||
$prepend_style .= 'font-size:' .
|
|
||||||
$this->_size_lookup[$attr['size']] . ';';
|
|
||||||
}
|
|
||||||
unset($attr['size']);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($prepend_style) {
|
|
||||||
$attr['style'] = isset($attr['style']) ?
|
|
||||||
$prepend_style . $attr['style'] :
|
|
||||||
$prepend_style;
|
|
||||||
}
|
|
||||||
|
|
||||||
$new_tag = $tag->copy();
|
|
||||||
$new_tag->name = $this->transform_to;
|
|
||||||
$new_tag->attr = $attr;
|
|
||||||
|
|
||||||
return $new_tag;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
?>
|
34
library/HTMLPurifier/TagTransform/Center.php
Normal file
34
library/HTMLPurifier/TagTransform/Center.php
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/TagTransform.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transforms CENTER tags into proper version (DIV with text-align CSS)
|
||||||
|
*
|
||||||
|
* Takes a CENTER tag, parses the align attribute, and then if it's valid
|
||||||
|
* assigns it to the CSS property text-align.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
||||||
|
{
|
||||||
|
var $transform_to = 'div';
|
||||||
|
|
||||||
|
function transform($tag, $config, &$context) {
|
||||||
|
if ($tag->type == 'end') {
|
||||||
|
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||||
|
return $new_tag;
|
||||||
|
}
|
||||||
|
$attr = $tag->attr;
|
||||||
|
$prepend_css = 'text-align:center;';
|
||||||
|
if (isset($attr['style'])) {
|
||||||
|
$attr['style'] = $prepend_css . $attr['style'];
|
||||||
|
} else {
|
||||||
|
$attr['style'] = $prepend_css;
|
||||||
|
}
|
||||||
|
$new_tag = $tag->copy();
|
||||||
|
$new_tag->name = $this->transform_to;
|
||||||
|
$new_tag->attr = $attr;
|
||||||
|
return $new_tag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
83
library/HTMLPurifier/TagTransform/Font.php
Normal file
83
library/HTMLPurifier/TagTransform/Font.php
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/TagTransform.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transforms FONT tags to the proper form (SPAN with CSS styling)
|
||||||
|
*
|
||||||
|
* This transformation takes the three proprietary attributes of FONT and
|
||||||
|
* transforms them into their corresponding CSS attributes. These are color,
|
||||||
|
* face, and size.
|
||||||
|
*
|
||||||
|
* @note Size is an interesting case because it doesn't map cleanly to CSS.
|
||||||
|
* Thanks to
|
||||||
|
* http://style.cleverchimp.com/font_size_intervals/altintervals.html
|
||||||
|
* for reasonable mappings.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
||||||
|
{
|
||||||
|
|
||||||
|
var $transform_to = 'span';
|
||||||
|
|
||||||
|
var $_size_lookup = array(
|
||||||
|
'1' => 'xx-small',
|
||||||
|
'2' => 'small',
|
||||||
|
'3' => 'medium',
|
||||||
|
'4' => 'large',
|
||||||
|
'5' => 'x-large',
|
||||||
|
'6' => 'xx-large',
|
||||||
|
'7' => '300%',
|
||||||
|
'-1' => 'smaller',
|
||||||
|
'+1' => 'larger',
|
||||||
|
'-2' => '60%',
|
||||||
|
'+2' => '150%',
|
||||||
|
'+4' => '300%'
|
||||||
|
);
|
||||||
|
|
||||||
|
function transform($tag, $config, &$context) {
|
||||||
|
|
||||||
|
if ($tag->type == 'end') {
|
||||||
|
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||||
|
return $new_tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
$attr = $tag->attr;
|
||||||
|
$prepend_style = '';
|
||||||
|
|
||||||
|
// handle color transform
|
||||||
|
if (isset($attr['color'])) {
|
||||||
|
$prepend_style .= 'color:' . $attr['color'] . ';';
|
||||||
|
unset($attr['color']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle face transform
|
||||||
|
if (isset($attr['face'])) {
|
||||||
|
$prepend_style .= 'font-family:' . $attr['face'] . ';';
|
||||||
|
unset($attr['face']);
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle size transform
|
||||||
|
if (isset($attr['size'])) {
|
||||||
|
if (isset($this->_size_lookup[$attr['size']])) {
|
||||||
|
$prepend_style .= 'font-size:' .
|
||||||
|
$this->_size_lookup[$attr['size']] . ';';
|
||||||
|
}
|
||||||
|
unset($attr['size']);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($prepend_style) {
|
||||||
|
$attr['style'] = isset($attr['style']) ?
|
||||||
|
$prepend_style . $attr['style'] :
|
||||||
|
$prepend_style;
|
||||||
|
}
|
||||||
|
|
||||||
|
$new_tag = $tag->copy();
|
||||||
|
$new_tag->name = $this->transform_to;
|
||||||
|
$new_tag->attr = $attr;
|
||||||
|
|
||||||
|
return $new_tag;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
26
library/HTMLPurifier/TagTransform/Simple.php
Normal file
26
library/HTMLPurifier/TagTransform/Simple.php
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/TagTransform.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple transformation, just change tag name to something else.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $transform_to Tag name to transform to.
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_TagTransform_Simple($transform_to) {
|
||||||
|
$this->transform_to = $transform_to;
|
||||||
|
}
|
||||||
|
|
||||||
|
function transform($tag, $config, &$context) {
|
||||||
|
$new_tag = $tag->copy();
|
||||||
|
$new_tag->name = $this->transform_to;
|
||||||
|
return $new_tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@ -10,7 +10,7 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'irc' => true, // "Internet Relay Chat", usually needs another app
|
'irc' => true, // "Internet Relay Chat", usually needs another app
|
||||||
// for Usenet, these two are similar, but distinct
|
// for Usenet, these two are similar, but distinct
|
||||||
'nntp' => true, // individual Netnews articles
|
'nntp' => true, // individual Netnews articles
|
||||||
'news' => true // newsgroup or individual Netnews articles),
|
'news' => true // newsgroup or individual Netnews articles
|
||||||
), 'lookup',
|
), 'lookup',
|
||||||
'Whitelist that defines the schemes that a URI is allowed to have. This '.
|
'Whitelist that defines the schemes that a URI is allowed to have. This '.
|
||||||
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
||||||
|
@ -22,6 +22,17 @@ foreach ($_GET as $key => $value) {
|
|||||||
|
|
||||||
@$config->loadArray($get);
|
@$config->loadArray($get);
|
||||||
|
|
||||||
|
/* // sample local definition, obviously needs to be less clunky
|
||||||
|
$html_definition =& $config->getHTMLDefinition(true);
|
||||||
|
$module = new HTMLPurifier_HTMLModule();
|
||||||
|
$module->name = 'Marquee';
|
||||||
|
$module->info['marquee'] = new HTMLPurifier_ElementDef();
|
||||||
|
$module->info['marquee']->content_model = '#PCDATA | Inline';
|
||||||
|
$module->info['marquee']->content_model_type = 'optional';
|
||||||
|
$module->content_sets = array('Inline' => 'marquee');
|
||||||
|
$html_definition->manager->addModule($module);
|
||||||
|
*/
|
||||||
|
|
||||||
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
|
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
|
||||||
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
|
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_BackgroundPositionTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_BackgroundPositionTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_BackgroundPosition();
|
$this->def = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
|
||||||
|
|
||||||
// explicitly cited in spec
|
// explicitly cited in spec
|
||||||
$this->assertDef('0% 0%');
|
$this->assertDef('0% 0%');
|
@ -1,14 +1,15 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Background.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_BackgroundTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_Background(HTMLPurifier_Config::createDefault());
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||||
|
|
||||||
$valid = '#333 url(chess.png) repeat fixed 50% top';
|
$valid = '#333 url(chess.png) repeat fixed 50% top';
|
||||||
$this->assertDef($valid);
|
$this->assertDef($valid);
|
@ -1,14 +1,14 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Border.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/PixelsTest.php';
|
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_BorderTest extends HTMLPurifier_AttrDef_PixelsTest
|
class HTMLPurifier_AttrDef_CSS_BorderTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_Border(HTMLPurifier_Config::createDefault());
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->def = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||||
|
|
||||||
$this->assertDef('thick solid red', 'thick solid #F00');
|
$this->assertDef('thick solid red', 'thick solid #F00');
|
||||||
$this->assertDef('thick solid');
|
$this->assertDef('thick solid');
|
@ -1,14 +1,14 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Color.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_ColorTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_ColorTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_Color();
|
$this->def = new HTMLPurifier_AttrDef_CSS_Color();
|
||||||
|
|
||||||
$this->assertDef('#F00');
|
$this->assertDef('#F00');
|
||||||
$this->assertDef('#808080');
|
$this->assertDef('#808080');
|
@ -1,20 +1,20 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Composite.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_Composite_Testable extends
|
class HTMLPurifier_AttrDef_CSS_Composite_Testable extends
|
||||||
HTMLPurifier_AttrDef_Composite
|
HTMLPurifier_AttrDef_CSS_Composite
|
||||||
{
|
{
|
||||||
|
|
||||||
// we need to pass by ref to get the mocks in
|
// we need to pass by ref to get the mocks in
|
||||||
function HTMLPurifier_AttrDef_Composite_Testable(&$defs) {
|
function HTMLPurifier_AttrDef_CSS_Composite_Testable(&$defs) {
|
||||||
$this->defs =& $defs;
|
$this->defs =& $defs;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
var $def1, $def2;
|
var $def1, $def2;
|
||||||
@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$def1 = new HTMLPurifier_AttrDefMock($this);
|
$def1 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$def2 = new HTMLPurifier_AttrDefMock($this);
|
$def2 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$defs = array(&$def1, &$def2);
|
$defs = array(&$def1, &$def2);
|
||||||
$def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
|
$def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
|
||||||
$input = 'FOOBAR';
|
$input = 'FOOBAR';
|
||||||
$output = 'foobar';
|
$output = 'foobar';
|
||||||
$def1_params = array($input, $config, $context);
|
$def1_params = array($input, $config, $context);
|
||||||
@ -51,7 +51,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$def1 = new HTMLPurifier_AttrDefMock($this);
|
$def1 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$def2 = new HTMLPurifier_AttrDefMock($this);
|
$def2 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$defs = array(&$def1, &$def2);
|
$defs = array(&$def1, &$def2);
|
||||||
$def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
|
$def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
|
||||||
$input = 'BOOMA';
|
$input = 'BOOMA';
|
||||||
$output = 'booma';
|
$output = 'booma';
|
||||||
$def_params = array($input, $config, $context);
|
$def_params = array($input, $config, $context);
|
||||||
@ -71,7 +71,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$def1 = new HTMLPurifier_AttrDefMock($this);
|
$def1 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$def2 = new HTMLPurifier_AttrDefMock($this);
|
$def2 = new HTMLPurifier_AttrDefMock($this);
|
||||||
$defs = array(&$def1, &$def2);
|
$defs = array(&$def1, &$def2);
|
||||||
$def = new HTMLPurifier_AttrDef_Composite_Testable($defs);
|
$def = new HTMLPurifier_AttrDef_CSS_Composite_Testable($defs);
|
||||||
$input = 'BOOMA';
|
$input = 'BOOMA';
|
||||||
$output = false;
|
$output = false;
|
||||||
$def_params = array($input, $config, $context);
|
$def_params = array($input, $config, $context);
|
@ -1,14 +1,14 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/FontFamily.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_FontFamilyTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_FontFamily();
|
$this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||||
|
|
||||||
$this->assertDef('Gill, Helvetica, sans-serif');
|
$this->assertDef('Gill, Helvetica, sans-serif');
|
||||||
$this->assertDef('\'Times New Roman\', serif');
|
$this->assertDef('\'Times New Roman\', serif');
|
@ -1,14 +1,15 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Font.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_FontTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_Font(HTMLPurifier_Config::createDefault());
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->def = new HTMLPurifier_AttrDef_CSS_Font($config);
|
||||||
|
|
||||||
// hodgepodge of usage cases from W3C spec, but " -> '
|
// hodgepodge of usage cases from W3C spec, but " -> '
|
||||||
$this->assertDef('12px/14px sans-serif');
|
$this->assertDef('12px/14px sans-serif');
|
@ -1,14 +1,14 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_CSSLengthTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_LengthTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_CSSLength();
|
$this->def = new HTMLPurifier_AttrDef_CSS_Length();
|
||||||
|
|
||||||
$this->assertDef('0');
|
$this->assertDef('0');
|
||||||
$this->assertDef('0px');
|
$this->assertDef('0px');
|
||||||
@ -31,7 +31,7 @@ class HTMLPurifier_AttrDef_CSSLengthTest extends HTMLPurifier_AttrDefHarness
|
|||||||
|
|
||||||
function testNonNegative() {
|
function testNonNegative() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_CSSLength(true);
|
$this->def = new HTMLPurifier_AttrDef_CSS_Length(true);
|
||||||
|
|
||||||
$this->assertDef('3cm');
|
$this->assertDef('3cm');
|
||||||
$this->assertDef('-3mm', false);
|
$this->assertDef('-3mm', false);
|
@ -1,14 +1,15 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/ListStyle.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_ListStyleTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
$this->def = new HTMLPurifier_AttrDef_ListStyle(HTMLPurifier_Config::createDefault());
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->def = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
|
||||||
|
|
||||||
$this->assertDef('lower-alpha');
|
$this->assertDef('lower-alpha');
|
||||||
$this->assertDef('upper-roman inside');
|
$this->assertDef('upper-roman inside');
|
@ -1,16 +1,16 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef/Multiple.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||||
|
|
||||||
// borrowed for the sakes of this test
|
// borrowed for the sakes of this test
|
||||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_MultipleTest extends HTMLPurifier_AttrDefHarness
|
class HTMLPurifier_AttrDef_CSS_MultipleTest extends HTMLPurifier_AttrDefHarness
|
||||||
{
|
{
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
$this->def = new HTMLPurifier_AttrDef_Multiple(
|
$this->def = new HTMLPurifier_AttrDef_CSS_Multiple(
|
||||||
new HTMLPurifier_AttrDef_Integer()
|
new HTMLPurifier_AttrDef_Integer()
|
||||||
);
|
);
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user