mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-23 13:51:54 +00:00
Release 1.6.0, merged in r875-930.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@931 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
e967680250
commit
b829e76bbf
2
Doxyfile
2
Doxyfile
@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.5.0
|
||||
PROJECT_NUMBER = 1.6.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
4
INSTALL
4
INSTALL
@ -47,7 +47,9 @@ HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
|
||||
* XHTML 1.0 Transitional (default)
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
|
||||
...and these character encodings:
|
||||
|
||||
@ -87,7 +89,7 @@ into configuring things just for the heck of it, skip to 4.3).
|
||||
* Am I using UTF-8?
|
||||
* Am I using XHTML 1.0 Transitional?
|
||||
|
||||
If you answered yes to any of these questions, instantiate a configuration
|
||||
If you answered no to any of these questions, instantiate a configuration
|
||||
object and read on:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
71
INSTALL.fr.utf8
Normal file
71
INSTALL.fr.utf8
Normal file
@ -0,0 +1,71 @@
|
||||
|
||||
Installation
|
||||
Comment installer HTML Purifier
|
||||
|
||||
Attention: Ce document a encode en UTF-8. Si les lettres avec les accents
|
||||
est essoreuse, prenez un mieux editeur de texte.
|
||||
|
||||
À L'Aide: Je ne suis pas un diseur natif de français. Si vous trouvez une
|
||||
erreur dans ce document, racontez-moi! Merci.
|
||||
|
||||
|
||||
L'installation de HTML Purifier est trés simple, parce qu'il ne doit pas
|
||||
la configuration. Dans le pied de de document, les utilisateurs
|
||||
impatient peuvent trouver le code, mais je recommande que vous lisez
|
||||
ce document pour quelques choses.
|
||||
|
||||
|
||||
1. Compatibilité
|
||||
|
||||
HTML Purifier fonctionne dans PHP 4 et PHP 5. PHP 4.3.9 est le dernier
|
||||
version que je le testais. Il ne dépend de les autre librairies.
|
||||
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
tidy (répandu aussi). Si vous utilisez UTF-8 et ne voulez pas
|
||||
l'indentation, vous pouvez utiliser HTML Purifier sans ces extensions.
|
||||
|
||||
|
||||
2. Inclure la librarie
|
||||
|
||||
Utilisez:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous
|
||||
ne devez pas, parce que HTML Purifier est trés grand.)
|
||||
|
||||
Si vous n'aime pas que HTML Purifier change vos include_path, on peut
|
||||
change vos include_path, et:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Seuleument les contents dans library/ est essentiel; vous peut enlever
|
||||
les autre fichiers quand vous est dans une atmosphère professionnel.
|
||||
|
||||
|
||||
[En cours de construction]
|
||||
|
||||
|
||||
6. Installation vite
|
||||
|
||||
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purificateur = new HTMLPurifier();
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
||||
|
||||
Sinon, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //remplacez avec votre encoding
|
||||
$config->set('Core', 'XHTML', true); //remplacez avec false si HTML 4.01
|
||||
$purificateur = new HTMLPurifier($config);
|
||||
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
19
NEWS
19
NEWS
@ -9,6 +9,25 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.6.0, released 2007-04-01
|
||||
! Support for most common deprecated attributes via transformations:
|
||||
+ bgcolor in td, th, tr and table
|
||||
+ border in img
|
||||
+ name in a and img
|
||||
+ width in td, th and hr
|
||||
+ height in td, th
|
||||
! Support for CSS attribute 'height' added
|
||||
! Support for rel and rev attributes in a tags added, use %Attr.AllowedRel
|
||||
and %Attr.AllowedRev to activate
|
||||
- You can define ID blacklists using regular expressions via
|
||||
%Attr.IDBlacklistRegexp
|
||||
- Error messages are emitted when you attempt to "allow" elements or
|
||||
attributes that HTML Purifier does not support
|
||||
|
||||
1.5.1, unknown release date
|
||||
- Fix segfault in unit test. The problem is not very reproduceable and
|
||||
I don't know what causes it, but a six line patch fixed it.
|
||||
|
||||
1.5.0, released 2007-03-23
|
||||
! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
|
||||
doesn't actually do anything yet, but keep your eyes peeled.
|
||||
|
52
TODO
52
TODO
@ -4,33 +4,35 @@ TODO List
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? At-risk
|
||||
? Maybe I'll Do It
|
||||
==========================
|
||||
|
||||
1.6 release
|
||||
# Implement all non-essential attribute transforms, configurable
|
||||
1.7 release [Advanced API]
|
||||
# Complete advanced API, and fully document it
|
||||
# Implement all edge-case attribute transforms
|
||||
# Implement all deprecated tags and attributes
|
||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
|
||||
do this earlier)
|
||||
|
||||
1.8 release [Refactor, refactor!]
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Configuration profiles: predefined directives set with one func call
|
||||
- Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Allow non-ASCII characters in font names
|
||||
|
||||
1.9 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
? Configuration profiles: sets of directives that get set with one func call
|
||||
- XSS-attempt detection
|
||||
- Implement IDREF support
|
||||
|
||||
1.7 release
|
||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
- Allow specifying global attributes on a tag-by-tag basis in
|
||||
%HTML.AllowAttributes
|
||||
? More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||
tag or attribute that is not supported
|
||||
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||
|
||||
1.8 release
|
||||
1.10 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
@ -45,7 +47,7 @@ TODO List
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
|
||||
2.0 release
|
||||
2.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class)
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
@ -54,31 +56,31 @@ TODO List
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- Allow tags to be "armored", an internal flag that protects them
|
||||
from validation and passes them out unharmed
|
||||
- XHTML 1.1 support
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
3.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- XHTML 1.1 support
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress
|
||||
- WordPress (mostly written, needs beta-testing)
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||
? Semi-lossy dumb alternate character encoding transfor
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
|
||||
Requested
|
||||
|
@ -16,9 +16,10 @@
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>It makes no sense to adopt a <q>one-size-fits-all</q> approach to
|
||||
filtersets: therefore, users must be able to define their own sets of
|
||||
<q>allowed</q> elements, as well as switch in-between doctypes of HTML.</p>
|
||||
<p>HTML Purifier currently natively supports only a subset of HTML's
|
||||
allowed elements, attributes, and behavior. This is by design,
|
||||
but as the user is always right, they'll need some method to overload
|
||||
these behaviors.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
@ -26,20 +27,18 @@ filtersets: therefore, users must be able to define their own sets of
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<li>Filtersets: Rich / Plain / Full ...</li>
|
||||
<li>Mode: Lenient / Correctional</li>
|
||||
<li>Collections (?): Safe / Unsafe</li>
|
||||
<li>Modules / Tags / Attributes</li>
|
||||
<li>Elements / Attributes / Modules</li>
|
||||
<li>Filterset</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Tags / Attributes / Attribute Types</li>
|
||||
<li>Filtersets</li>
|
||||
<li>Root Node</li>
|
||||
<li>Attributes</li>
|
||||
<li>Elements</li>
|
||||
</ul></dd>
|
||||
<dt>Create</dt>
|
||||
<dt>Internals</dt>
|
||||
<dd><ul>
|
||||
<li>Modules / Tags / Attributes / Attribute Types</li>
|
||||
<li>Modules / Elements / Attributes / Attribute Types</li>
|
||||
<li>Filtersets</li>
|
||||
<li>Doctype</li>
|
||||
</ul></dd>
|
||||
@ -47,11 +46,14 @@ filtersets: therefore, users must be able to define their own sets of
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<p>For basic use, the user will have to specify some basic parameters. This
|
||||
is not strictly necessary, as HTML Purifier's default setting will always
|
||||
output safe code, but is required for standards-compliant output.</p>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>By default, users will use a doctype-based, permissive but secure
|
||||
whitelist. They must define a <strong>doctype</strong>, and this serves
|
||||
as the first method of determining a filterset.</p>
|
||||
<p>The first thing to select is the <strong>doctype</strong>. This
|
||||
is essential for standards-compliant output.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
@ -61,117 +63,131 @@ the DTD identifier.</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<h3>Selecting a Filterset</h3>
|
||||
|
||||
<p>However, selecting this doctype doesn't mean much, because if we
|
||||
adhered exactly to the definition we would be letting XSS and other
|
||||
nasties through. HTML Purifier must, in its filterset, allow a subset
|
||||
of the doctype, which we shall call a <strong>filterset</strong>.</p>
|
||||
|
||||
<p>By default, HTML Purifier will use the <strong>Rich</strong>
|
||||
filterset, which allows as many elements as possible with untrusted
|
||||
sources. Other possible filtersets could be:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Full</dt>
|
||||
<dd>Allows the full span of elements in the doctype, good if you want
|
||||
HTML Purifier to work as a Tidy substitute but not to strip
|
||||
anything out.</dd>
|
||||
<dt>Plain</dt>
|
||||
<dd>Provides a minimum set of tags for semantic markup of things
|
||||
like blog comments.</dd>
|
||||
</dl>
|
||||
|
||||
<p>Extension-authors would be able to define custom filtersets for
|
||||
other users to use.</p>
|
||||
|
||||
<p>A possible call to select a filterset would be:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Filterset', 'Rich');</pre>
|
||||
<p>Due to historical reasons, the default doctype is XHTML 1.0
|
||||
Transitional, however, we really shouldn't be guessing what the user's
|
||||
doctype is. Fortunantely, people who can't be bothered to set this won't
|
||||
be bothered when their pages stop validating.</p>
|
||||
|
||||
<h3>Selecting Mode</h3>
|
||||
|
||||
<p>Within filtersets, there are various <strong>modes</strong> of operation.
|
||||
<p>Within doctypes, there are various <strong>modes</strong> of operation.
|
||||
These indicate variant behaviors that, while not strictly changing the
|
||||
allowed set of elements and attributes, will definitely affect the output.
|
||||
allowed set of elements and attributes, definitely affect the output.
|
||||
Currently, we have two modes, which may be used together:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Lenient</dt>
|
||||
<dd>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives when explicitly disallowed. For
|
||||
example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||
tag would be turned into a <code>div</code> with the CSS property
|
||||
<dd>
|
||||
<p>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives when explicitly disallowed.</p>
|
||||
<p>For example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||
element would be turned into a <code>div</code> with the CSS property
|
||||
<code>text-align:center;</code>, but in XHTML 1.0 Transitional
|
||||
the tag would be preserved. This mode is on by default.</dd>
|
||||
<dt>Correctional</dt>
|
||||
<dd>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives whenever possible. Referring
|
||||
back to the previous example, the <code>center</code> tag would
|
||||
be transformed in both cases. However, tags without a
|
||||
the element would be preserved.</p>
|
||||
<p>This mode is on by default.</p>
|
||||
</dd>
|
||||
<dt>Correctional[items to correct]</dt>
|
||||
<dd>
|
||||
<p>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives whenever possible.
|
||||
It may have various levels of operation.</p>
|
||||
<p>Referring back to the previous example, the <code>center</code> element would
|
||||
be transformed in both cases. However, elements without a
|
||||
reasonable standards-compliant alternative will be preserved
|
||||
in their form. This mode is on by default. It may have
|
||||
various levels of operation.</dd>
|
||||
in their form.</p>
|
||||
<p>A user may want to correct certain deprecated attributes, but
|
||||
not others. For example, the <code>bgcolor</code> attribute may be
|
||||
acceptable, but the <code>center</code> element not; also, possibly,
|
||||
an HTML Purifier transformation may be buggy, so the user wants
|
||||
to forgo it. Thus, correctional accepts an array defining which
|
||||
elements and attributes to cleanup, or no parameter at all, which
|
||||
means everything gets corrected. This also means that each
|
||||
correction needs to be given a unique ID that can be referenced
|
||||
in this manner. (We may also allow globbing, like *.name or a.*
|
||||
for mass-enabling correction, and subtractive mode, where things
|
||||
specified stop correction.) This array gets passed into the
|
||||
constructor of the mode's module.</p>
|
||||
<p>This mode is on by default.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible call to select modes would be:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
|
||||
|
||||
<p>If modes have extra parameters, a hash might work well:</p>
|
||||
<p>If modes have extra parameters, a hash is necessary:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array(
|
||||
'correctional' => 9, // strongest level
|
||||
'correctional' => 'center,a.name',
|
||||
'lenient' => true // this one's just boolean
|
||||
));</pre>
|
||||
|
||||
<p>Modes may possibly be wrapped up with the filterset declaration:</p>
|
||||
<p>Modes may be specified along with the doctype declaration (we may want
|
||||
to get a better set of separator characters):</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');</pre>
|
||||
<pre>$config->setDoctype('XHTML Transitional 1.0', '+correctional[center,a.name] -lenient');</pre>
|
||||
|
||||
<p>Further investigation in this field is necessary.</p>
|
||||
<p>
|
||||
With regards to the various levels of operation conjectured in the
|
||||
Correctional mode, this is prompted by the fact that a user may want to
|
||||
correct certain problems but not others, for example, fix the <code>center</code>
|
||||
element but not the <code>u</code> element, both of which are deprecated.
|
||||
Having an integer <q>level</q> will not work very well for such fine
|
||||
grained tweaking, but an array of specific settings might.</p>
|
||||
|
||||
<h3>Selecting Modules / Tags / Attributes</h3>
|
||||
<h3>Selecting Elements / Attributes / Modules</h3>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>If this cookie cutter approach doesn't appeal to a user, they may
|
||||
decide to roll their own filterset by selecting modules, tags and
|
||||
decide to roll their own filterset by selecting modules, elements and
|
||||
attributes to allow.</p>
|
||||
|
||||
<p class="technical">This would make use of the same facilities
|
||||
as a filterset author would use, except that it would go under an
|
||||
<q>anonymous</q> filterset that would be auto-selected if any of the
|
||||
relevant module/tag/attribute selection configuration directives were
|
||||
relevant module/elements/attribute selection configuration directives were
|
||||
non-null.</p>
|
||||
|
||||
<p>On the highest level, a user will usually be most interested in
|
||||
directly specifying which elements and attributes are desired. For
|
||||
example:</p>
|
||||
<p>In practice, this is the most commonly demanded feature. Most users are
|
||||
perfectly happy defining a filterset that looks like:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');</pre>
|
||||
<pre>$config->setAllowedHTML('a[href,title];em;p;blockquote');</pre>
|
||||
|
||||
<p>Attribute declarations could be merged into this declaration as such:</p>
|
||||
<p class="technical">The directive %HTML.Allowed is a convenience function
|
||||
that may be fully expressed with the legacy interface, and thus is
|
||||
given its own setter.</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');</pre>
|
||||
<p>We currently support a separated interface, which also must be preserved:</p>
|
||||
|
||||
<p>...or be kept separate:</p>
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
|
||||
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');</pre>
|
||||
<p>A user may also choose to allow modules:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists'); // or
|
||||
$config->setAllowedHTML('Hypertext,Text,Lists');</pre>
|
||||
|
||||
<p>But it is not expected that this feature will be widely used.</p>
|
||||
|
||||
<p class="fixme">The granularity of these modules is too coarse for
|
||||
the average user (for example, the core module loads everything from
|
||||
the essential <code>p</code> element to the not-so-safe <code>h1</code>
|
||||
element). How do we make this still a viable solution? Possible answers
|
||||
may be sub-modules or module parameters. This may not even be a problem,
|
||||
considering that most people won't be selecting modules.</p>
|
||||
|
||||
<p class="technical">Modules are distinguished from regular elements by the
|
||||
case of their first letter. While XML distinguishes between and allows
|
||||
lower and uppercase letters in element names, most well-known XML
|
||||
languages use only lower-case
|
||||
element names for sake of consistency.</p>
|
||||
|
||||
<p class="technical">Considering that, internally speaking, as mandated by
|
||||
the XHTML 1.1 Modularization specification, we have organized our
|
||||
elements around modules, considerable gymnastics will be needed to
|
||||
get this sort of functionality working.</p>
|
||||
|
||||
<p>A user may also specify a module to load a class of elements and attributes
|
||||
into their filterest:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'Hypertext,Core');</pre>
|
||||
|
||||
<p class="fixme">The granularity of these modules is too coarse for
|
||||
the average user (for example, the core module loads everything from
|
||||
the essential <code>p</code> tag to the not-so-safe <code>h1</code>
|
||||
tag). How do we make this still a viable solution?</p>
|
||||
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
@ -183,6 +199,89 @@ for selecting a filterset. Possibility:</p>
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
|
||||
<h2>Customize</h2>
|
||||
|
||||
<p>By reviewing topic posts in the support forum, we determined that
|
||||
there were two primarily demanded customization features people wanted:
|
||||
to add an attribute to an existing element, and to add an element.
|
||||
Thus, we'll want to create convenience functions for these common
|
||||
use-cases.</p>
|
||||
|
||||
<p>Note that the functions described here are only available if
|
||||
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
|
||||
<code>addAttribute</code> may work on a processed copy, but for
|
||||
consistency's sake we will mandate this for everything.</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>An attribute is bound to an element by a name and has a specific
|
||||
<code>AttrDef</code> that validates it. Thus, the interface should
|
||||
be:</p>
|
||||
|
||||
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
|
||||
|
||||
<p>With a use-case that looks like:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', new HTMLPurifier_AttrDef_Enum(array('nofollow')));</pre>
|
||||
|
||||
<p>The <code>$attribute_def</code> value can be a little flexible,
|
||||
to make things simpler. We'll let it also be:</p>
|
||||
|
||||
<ul>
|
||||
<li>Class name: We'll instantiate it for you</li>
|
||||
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
|
||||
class with that function registered as a callback.</li>
|
||||
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
|
||||
</li>
|
||||
<li>String starting with <code>enum(</code>: We'll explode it and stuff it in an
|
||||
<code>HTMLPurifier_AttrDef_Enum</code> for you.</li>
|
||||
</ul>
|
||||
|
||||
<p>Making the previous example written as:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', 'enum(nofollow)');</pre>
|
||||
|
||||
<h3>Elements</h3>
|
||||
|
||||
<p>An element requires certain information as specified by
|
||||
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
|
||||
the usual things required are:</p>
|
||||
|
||||
<ul>
|
||||
<li>Attributes</li>
|
||||
<li>Content model/type</li>
|
||||
<li>Registration in a content set</li>
|
||||
</ul>
|
||||
|
||||
<p>This suggests an API like this:</p>
|
||||
|
||||
<pre>function addElement($element, $type, $content_model, $attributes = array());</pre>
|
||||
|
||||
<p>Each parameter explained in depth:</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>$element</code></dt>
|
||||
<dd>Element name, ex. 'label'</dd>
|
||||
<dt><code>$type</code></dt>
|
||||
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
|
||||
<dt><code>$content_model</code></dt>
|
||||
<dd>Description of allowed children. This is a merged form of
|
||||
<code>HTMLPurifier_ElementDef</code>'s member variables
|
||||
<code>$content_model</code> and <code>$content_model_type</code>,
|
||||
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.</dd>
|
||||
<dt><code>$attributes</code></dt>
|
||||
<dd>Array of attribute names to attribute definitions, much like
|
||||
the above-described attribute customization.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible usage:</p>
|
||||
|
||||
<pre>$def->addElement('font', 'Inline', 'Optional: Inline',
|
||||
array(0 => array('Common'), 'color' => 'Color'));</pre>
|
||||
|
||||
<p>We may want to Common attribute collection inclusion to be added
|
||||
by default.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@ -151,7 +151,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr class="css1"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
@ -244,8 +244,8 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="impl-yes"><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr class="impl-yes"><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
@ -262,28 +262,28 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||
<tr><th colspan="3">Transform, target milestone 1.6</th></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Near-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try <code>margin-right:auto; margin-left:0;</code> for left or <code>margin-right:0; margin-left:auto;</code> for right (optionally replacing 0 with the original margin for that side)</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Near equivalent style 'border-width', as it only applies when link present</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="3">bgcolor</td><td>TABLE</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TR</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>border</td><td>IMG</td><td>Equivalent style <code>border:[number]px solid</code></td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr class="impl-yes"><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr class="impl-yes"><td>A</td><td>Turn into ID</td></tr>
|
||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'height', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
@ -291,8 +291,8 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
@ -1003,7 +1003,11 @@ when dealing with Unicode text:</p>
|
||||
</ul></li>
|
||||
</ul>
|
||||
|
||||
<p>...and always think in bytes, not characters. If you use strpos()
|
||||
<p>Note: this list applies to UTF-8 encoded text only: if you have
|
||||
a string that you are 100% sure is ASCII, be my guest and use
|
||||
<code>strtolower</code> (HTML Purifier uses this function.)</p>
|
||||
|
||||
<p>Regardless, always think in bytes, not characters. If you use strpos()
|
||||
to find the position of a character, it will be in bytes, but this
|
||||
usually won't matter since substr() also operates with byte indices!</p>
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.5.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 1.6.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@ -64,7 +64,7 @@ require_once 'HTMLPurifier/Encoder.php';
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.5.0';
|
||||
var $version = '1.6.0';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
@ -43,6 +43,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklistRegexp', null, 'string/null',
|
||||
'PCRE regular expression to be matched against all IDs. If the expression '.
|
||||
'is matches, the ID is rejected. Use this with care: may cause '.
|
||||
'significant degradation. ID matching is done after all other '.
|
||||
'validation. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute ID.
|
||||
* @warning Even though this is the id processor, it
|
||||
@ -94,6 +102,11 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
|
||||
if ($regexp && preg_match($regexp, $id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
|
75
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
75
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
@ -0,0 +1,75 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRel', array(), 'lookup',
|
||||
'List of allowed forward document relationships in the rel attribute. '.
|
||||
'Common values may be nofollow or print. By default, this is empty, '.
|
||||
'meaning that no document relationships are allowed. This directive '.
|
||||
'was available since 1.6.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRev', array(), 'lookup',
|
||||
'List of allowed reverse document relationships in the rev attribute. '.
|
||||
'This attribute is a bit of an edge-case; if you don\'t know what it '.
|
||||
'is for, stay away. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a rel/rev link attribute against a directive of allowed values
|
||||
* @note We cannot use Enum because link types allow multiple
|
||||
* values.
|
||||
* @note Assumes link types are ASCII text
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/** Lookup array of attribute names to configuration name */
|
||||
var $configLookup = array(
|
||||
'rel' => 'AllowedRel',
|
||||
'rev' => 'AllowedRev'
|
||||
);
|
||||
|
||||
/** Name config attribute to pull. */
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
|
||||
if (!isset($this->configLookup[$name])) {
|
||||
trigger_error('Unrecognized attribute name for link '.
|
||||
'relationship.', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->name = $this->configLookup[$name];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
$parts = explode(' ', $string);
|
||||
|
||||
// lookup to prevent duplicates
|
||||
$ret_lookup = array();
|
||||
foreach ($parts as $part) {
|
||||
$part = strtolower(trim($part));
|
||||
if (!isset($allowed[$part])) continue;
|
||||
$ret_lookup[$part] = true;
|
||||
}
|
||||
|
||||
if (empty($ret_lookup)) return false;
|
||||
|
||||
$ret_array = array();
|
||||
foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
|
||||
$string = implode(' ', $ret_array);
|
||||
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
28
library/HTMLPurifier/AttrTransform/BgColor.php
Normal file
28
library/HTMLPurifier/AttrTransform/BgColor.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated bgcolor attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BgColor
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['bgcolor'])) return $attr;
|
||||
|
||||
$bgcolor = $attr['bgcolor'];
|
||||
unset($attr['bgcolor']);
|
||||
// some validation should happen here
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "background-color:$bgcolor;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
28
library/HTMLPurifier/AttrTransform/Border.php
Normal file
28
library/HTMLPurifier/AttrTransform/Border.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated border attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Border
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['border'])) return $attr;
|
||||
|
||||
$border_width = $attr['border'];
|
||||
unset($attr['border']);
|
||||
// some validation should happen here
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "border:{$border_width}px solid;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
33
library/HTMLPurifier/AttrTransform/Length.php
Normal file
33
library/HTMLPurifier/AttrTransform/Length.php
Normal file
@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Class for handling width/height length attribute transformations to CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $cssName;
|
||||
|
||||
function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
|
||||
$this->name = $name;
|
||||
$this->cssName = $css_name ? $css_name : $name;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr[$this->name])) return $attr;
|
||||
$length = $attr[$this->name];
|
||||
unset($attr[$this->name]);
|
||||
if(ctype_digit($length)) $length .= 'px';
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = $this->cssName . ":$length;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
31
library/HTMLPurifier/AttrTransform/Name.php
Normal file
31
library/HTMLPurifier/AttrTransform/Name.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated name attribute to ID if necessary
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
|
||||
$name = $attr['name'];
|
||||
unset($attr['name']);
|
||||
|
||||
if (isset($attr['id'])) {
|
||||
// ID already set, discard name
|
||||
return $attr;
|
||||
}
|
||||
|
||||
$attr['id'] = $name;
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -6,7 +6,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
* Pre-transform that changes deprecated align attribute to text-align.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TextAlign
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
|
@ -162,7 +162,9 @@ class HTMLPurifier_CSSDefinition
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
$this->info['width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
$this->info['width'] =
|
||||
$this->info['height'] =
|
||||
new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
|
@ -218,18 +218,31 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info_parent, $this->config);
|
||||
}
|
||||
|
||||
// support template text
|
||||
$support = "(for information on implementing this, see the ".
|
||||
"support forums) ";
|
||||
|
||||
// setup allowed elements, SubtractiveWhitelist module
|
||||
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
|
||||
if (is_array($allowed_elements)) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||
unset($allowed_elements[$name]);
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_elements as $element => $d) {
|
||||
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
|
||||
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
|
||||
$allowed_attributes_mutable = $allowed_attributes; // by copy!
|
||||
if (is_array($allowed_attributes)) {
|
||||
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||
unset($this->info_global_attr[$attr_key]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr_key"]);
|
||||
}
|
||||
}
|
||||
foreach ($this->info as $tag => $info) {
|
||||
@ -237,9 +250,27 @@ class HTMLPurifier_HTMLDefinition
|
||||
if (!isset($allowed_attributes["$tag.$attr"]) &&
|
||||
!isset($allowed_attributes["*.$attr"])) {
|
||||
unset($this->info[$tag]->attr[$attr]);
|
||||
} else {
|
||||
if (isset($allowed_attributes_mutable["$tag.$attr"])) {
|
||||
unset($allowed_attributes_mutable["$tag.$attr"]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_attributes_mutable as $elattr => $d) {
|
||||
list($element, $attribute) = explode('.', $elattr);
|
||||
if ($element == '*') {
|
||||
trigger_error("Global attribute '$attribute' is not ".
|
||||
"supported in any elements $support",
|
||||
E_USER_WARNING);
|
||||
} else {
|
||||
trigger_error("Attribute '$attribute' in element '$element' not supported $support",
|
||||
E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
|
||||
@ -21,8 +22,8 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
||||
// 'charset' => 'Charset',
|
||||
'href' => 'URI',
|
||||
//'hreflang' => 'LanguageCode',
|
||||
//'rel' => 'LinkTypes',
|
||||
//'rev' => 'LinkTypes',
|
||||
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
|
||||
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
|
||||
//'tabindex' => 'Number',
|
||||
//'type' => 'ContentType',
|
||||
);
|
||||
|
@ -8,6 +8,10 @@ require_once 'HTMLPurifier/TagTransform/Font.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Border.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Length.php';
|
||||
|
||||
/**
|
||||
* Proprietary module that transforms deprecated elements into Strict
|
||||
@ -20,7 +24,8 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
|
||||
var $name = 'TransformToStrict';
|
||||
|
||||
// we're actually modifying these elements, not defining them
|
||||
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'blockquote');
|
||||
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p',
|
||||
'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr');
|
||||
|
||||
var $info_tag_transform = array(
|
||||
// placeholders, see constructor for definitions
|
||||
@ -73,6 +78,23 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
|
||||
$this->info['blockquote']->content_model_type = 'strictblockquote';
|
||||
$this->info['blockquote']->child = false; // recalculate please!
|
||||
|
||||
$this->info['table']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['tr']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['td']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor();
|
||||
|
||||
$this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
|
||||
|
||||
$this->info['img']->attr_transform_pre['name'] =
|
||||
$this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
|
||||
|
||||
$this->info['td']->attr_transform_pre['width'] =
|
||||
$this->info['th']->attr_transform_pre['width'] =
|
||||
$this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
|
||||
|
||||
$this->info['td']->attr_transform_pre['height'] =
|
||||
$this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height');
|
||||
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
|
52
package.php
Normal file
52
package.php
Normal file
@ -0,0 +1,52 @@
|
||||
<?php
|
||||
|
||||
set_time_limit(0);
|
||||
|
||||
require_once 'PEAR/PackageFileManager2.php';
|
||||
PEAR::setErrorHandling(PEAR_ERROR_PRINT);
|
||||
$pkg = new PEAR_PackageFileManager2;
|
||||
|
||||
$pkg->setOptions(
|
||||
array(
|
||||
'baseinstalldir' => '/',
|
||||
'packagefile' => 'package2.xml',
|
||||
'packagedirectory' => dirname(__FILE__) . '/library',
|
||||
'filelistgenerator' => 'file',
|
||||
'include' => array('*'),
|
||||
'ignore' => array('HTMLPurifier.auto.php'),
|
||||
)
|
||||
);
|
||||
|
||||
$pkg->setPackage('HTMLPurifier');
|
||||
$pkg->setLicense('LGPL', 'http://www.gnu.org/licenses/lgpl.html');
|
||||
$pkg->setSummary('Standards-compliant HTML filter');
|
||||
$pkg->setDescription(
|
||||
'HTML Purifier is an HTML filter that will remove all malicious code
|
||||
(better known as XSS) with a thoroughly audited, secure yet permissive
|
||||
whitelist and will also make sure your documents are standards
|
||||
compliant.'
|
||||
);
|
||||
|
||||
$pkg->addMaintainer('lead', 'edwardzyang', 'Edward Z. Yang', 'htmlpurifier@jpsband.org', 'yes');
|
||||
|
||||
$pkg->setChannel('hp.jpsband.org');
|
||||
$pkg->setAPIVersion('1.5');
|
||||
$pkg->setAPIStability('stable');
|
||||
$pkg->setReleaseVersion('1.5.0');
|
||||
$pkg->setReleaseStability('stable');
|
||||
|
||||
$pkg->addRelease();
|
||||
|
||||
$pkg->setNotes('Major bugs were fixed and some major internal refactoring was undertaken. The visible changes include XHTML 1.1-style modularization of HTMLDefinition, rudimentary internationalization, and a fix for a fatal error when the PHP4 DOM XML extension was loaded. The x subtag is now allowed in language codes. Element by element AllowedAttribute declaration is now possible for global attributes. Instead of *.class, you can write span.class. The old syntax still works, and enables the attribute for all elements.');
|
||||
$pkg->setPackageType('php');
|
||||
|
||||
$pkg->setPhpDep('4.3.9');
|
||||
$pkg->setPearinstallerDep('1.4.3');
|
||||
|
||||
$pkg->generateContents();
|
||||
|
||||
$compat =& $pkg->exportCompatiblePackageFile1();
|
||||
$compat->writePackageFile();
|
||||
$pkg->writePackageFile();
|
||||
|
||||
?>
|
@ -40,8 +40,8 @@ class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
|
||||
$def1->setReturnValue('validate', $output, $def1_params);
|
||||
$def2->expectNever('validate');
|
||||
|
||||
$this->assertIdentical($output,
|
||||
$def->validate($input, $config, $context));
|
||||
$result = $def->validate($input, $config, $context);
|
||||
$this->assertIdentical($output, $result);
|
||||
|
||||
$def1->tally();
|
||||
$def2->tally();
|
||||
@ -60,8 +60,8 @@ class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
|
||||
$def2->expectOnce('validate', $def_params);
|
||||
$def2->setReturnValue('validate', $output, $def_params);
|
||||
|
||||
$this->assertIdentical($output,
|
||||
$def->validate($input, $config, $context));
|
||||
$result = $def->validate($input, $config, $context);
|
||||
$this->assertIdentical($output, $result);
|
||||
|
||||
$def1->tally();
|
||||
$def2->tally();
|
||||
@ -80,8 +80,8 @@ class HTMLPurifier_AttrDef_CSS_CompositeTest extends HTMLPurifier_AttrDefHarness
|
||||
$def2->expectOnce('validate', $def_params);
|
||||
$def2->setReturnValue('validate', false, $def_params);
|
||||
|
||||
$this->assertIdentical($output,
|
||||
$def->validate($input, $config, $context));
|
||||
$result = $def->validate($input, $config, $context);
|
||||
$this->assertIdentical($output, $result);
|
||||
|
||||
$def1->tally();
|
||||
$def2->tally();
|
||||
|
@ -95,6 +95,15 @@ class HTMLPurifier_AttrDef_HTML_IDTest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
}
|
||||
|
||||
function testRegexp() {
|
||||
|
||||
$this->config->set('Attr', 'IDBlacklistRegexp', '/^g_/');
|
||||
|
||||
$this->assertDef('good_id');
|
||||
$this->assertDef('g_bad_id', false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
24
tests/HTMLPurifier/AttrDef/HTML/LinkTypesTest.php
Normal file
24
tests/HTMLPurifier/AttrDef/HTML/LinkTypesTest.php
Normal file
@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_HTML_LinkTypesTest extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
function testNull() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_HTML_LinkTypes('rel');
|
||||
$this->config->set('Attr', 'AllowedRel', array('nofollow', 'foo'));
|
||||
|
||||
$this->assertDef('', false);
|
||||
$this->assertDef('nofollow', true);
|
||||
$this->assertDef('nofollow foo', true);
|
||||
$this->assertDef('nofollow bar', 'nofollow');
|
||||
$this->assertDef('bar', false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
43
tests/HTMLPurifier/AttrTransform/BgColorTest.php
Normal file
43
tests/HTMLPurifier/AttrTransform/BgColorTest.php
Normal file
@ -0,0 +1,43 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require_once 'HTMLPurifier/AttrTransformHarness.php';
|
||||
|
||||
class HTMLPurifier_AttrTransform_BgColorTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_BgColor();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->assertResult( array() );
|
||||
|
||||
// we currently rely on the CSS validator to fix any problems.
|
||||
// This means that this transform, strictly speaking, supports
|
||||
// a superset of the functionality.
|
||||
|
||||
$this->assertResult(
|
||||
array('bgcolor' => '#000000'),
|
||||
array('style' => 'background-color:#000000;')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
array('bgcolor' => '#000000', 'style' => 'font-weight:bold'),
|
||||
array('style' => 'background-color:#000000;font-weight:bold')
|
||||
);
|
||||
|
||||
// this may change when we natively support the datatype and
|
||||
// validate its contents before forwarding it on
|
||||
$this->assertResult(
|
||||
array('bgcolor' => '#F00'),
|
||||
array('style' => 'background-color:#F00;')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
40
tests/HTMLPurifier/AttrTransform/BorderTest.php
Normal file
40
tests/HTMLPurifier/AttrTransform/BorderTest.php
Normal file
@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/Border.php';
|
||||
require_once 'HTMLPurifier/AttrTransformHarness.php';
|
||||
|
||||
|
||||
class HTMLPurifier_AttrTransform_BorderTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Border();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->assertResult( array() );
|
||||
|
||||
$this->assertResult(
|
||||
array('border' => '1'),
|
||||
array('style' => 'border:1px solid;')
|
||||
);
|
||||
|
||||
// once again, no validation done here, we expect CSS validator
|
||||
// to catch it
|
||||
$this->assertResult(
|
||||
array('border' => '10%'),
|
||||
array('style' => 'border:10%px solid;')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
array('border' => '23', 'style' => 'font-weight:bold;'),
|
||||
array('style' => 'border:23px solid;font-weight:bold;')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
37
tests/HTMLPurifier/AttrTransform/LengthTest.php
Normal file
37
tests/HTMLPurifier/AttrTransform/LengthTest.php
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once 'HTMLPurifier/AttrTransformHarness.php';
|
||||
|
||||
class HTMLPurifier_AttrTransform_LengthTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Length('width');
|
||||
}
|
||||
|
||||
function test() {
|
||||
$this->assertResult( array() );
|
||||
$this->assertResult(
|
||||
array('width' => '10'),
|
||||
array('style' => 'width:10px;')
|
||||
);
|
||||
$this->assertResult(
|
||||
array('width' => '10%'),
|
||||
array('style' => 'width:10%;')
|
||||
);
|
||||
$this->assertResult(
|
||||
array('width' => '10%', 'style' => 'font-weight:bold'),
|
||||
array('style' => 'width:10%;font-weight:bold')
|
||||
);
|
||||
// this behavior might change
|
||||
$this->assertResult(
|
||||
array('width' => 'asdf'),
|
||||
array('style' => 'width:asdf;')
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
28
tests/HTMLPurifier/AttrTransform/NameTest.php
Normal file
28
tests/HTMLPurifier/AttrTransform/NameTest.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once 'HTMLPurifier/AttrTransformHarness.php';
|
||||
|
||||
class HTMLPurifier_AttrTransform_NameTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Name();
|
||||
}
|
||||
|
||||
function test() {
|
||||
$this->assertResult( array() );
|
||||
$this->assertResult(
|
||||
array('name' => 'free'),
|
||||
array('id' => 'free')
|
||||
);
|
||||
$this->assertResult(
|
||||
array('name' => 'tryit', 'id' => 'tobad'),
|
||||
array('id' => 'tobad')
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -172,6 +172,26 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
|
||||
'<img src="" alt="Invalid image" />'
|
||||
);
|
||||
|
||||
// name rewritten as id
|
||||
$this->assertResult(
|
||||
'<a name="foobar" />',
|
||||
'<a id="foobar" />',
|
||||
array('HTML.EnableAttrID' => true)
|
||||
);
|
||||
|
||||
// lengths
|
||||
$this->assertResult(
|
||||
'<td height="10" width="5%" /><th height="5%" width="10" /><hr width="10" height="10" />',
|
||||
'<td style="height:10px;width:5%;" /><th style="height:5%;width:10px;" /><hr style="width:10px;" />'
|
||||
);
|
||||
|
||||
// link types
|
||||
$this->assertResult(
|
||||
'<a href="foo" rel="nofollow" />',
|
||||
true,
|
||||
array('Attr.AllowedRel' => 'nofollow')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ $test_files[] = 'AttrDef/HTML/LengthTest.php';
|
||||
$test_files[] = 'AttrDef/HTML/MultiLengthTest.php';
|
||||
$test_files[] = 'AttrDef/HTML/NmtokensTest.php';
|
||||
$test_files[] = 'AttrDef/HTML/PixelsTest.php';
|
||||
$test_files[] = 'AttrDef/HTML/LinkTypesTest.php';
|
||||
$test_files[] = 'AttrDef/IntegerTest.php';
|
||||
$test_files[] = 'AttrDef/LangTest.php';
|
||||
$test_files[] = 'AttrDef/TextTest.php';
|
||||
@ -34,8 +35,12 @@ $test_files[] = 'AttrDef/URI/IPv6Test.php';
|
||||
$test_files[] = 'AttrDef/URITest.php';
|
||||
$test_files[] = 'AttrDefTest.php';
|
||||
$test_files[] = 'AttrTransform/BdoDirTest.php';
|
||||
$test_files[] = 'AttrTransform/BgColorTest.php';
|
||||
$test_files[] = 'AttrTransform/BorderTest.php';
|
||||
$test_files[] = 'AttrTransform/ImgRequiredTest.php';
|
||||
$test_files[] = 'AttrTransform/LangTest.php';
|
||||
$test_files[] = 'AttrTransform/LengthTest.php';
|
||||
$test_files[] = 'AttrTransform/NameTest.php';
|
||||
$test_files[] = 'AttrTransform/TextAlignTest.php';
|
||||
$test_files[] = 'ChildDef/ChameleonTest.php';
|
||||
$test_files[] = 'ChildDef/CustomTest.php';
|
||||
|
Loading…
Reference in New Issue
Block a user