0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-20 19:25:19 +00:00

Transition is complete! Cleanup and class rearrangement now necessary.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1539 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2008-02-10 20:34:39 +00:00
parent c7e115c81c
commit 35f8b3c801
41 changed files with 87 additions and 951 deletions

View File

@ -55,7 +55,7 @@ class ConfigSchema_StringHashAdapter
}
if (isset($hash['ALIASES'])) {
$raw_aliases = $hash->offsetGet('ALIASES');
$raw_aliases = trim($hash->offsetGet('ALIASES'));
$aliases = preg_split('/\s*,\s*/', $raw_aliases);
foreach ($aliases as $alias) {
list($alias_ns, $alias_directive) = explode('.', $alias, 2);
@ -63,6 +63,11 @@ class ConfigSchema_StringHashAdapter
}
}
// We don't use these yet, but there being used
if (isset($hash['VERSION'])) $hash->offsetGet('VERSION');
if (isset($hash['DEPRECATED-USE'])) $hash->offsetGet('DEPRECATED-USE');
if (isset($hash['DEPRECATED-VERSION'])) $hash->offsetGet('DEPRECATED-VERSION');
$this->_findUnused($hash);
}

View File

@ -43,16 +43,6 @@
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
// every class has an undocumented dependency to these, must be included!
HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This
is a useful way to give feedback to your users. <strong>Warning:</strong>
Currently this feature is very patchy and experimental, with lots of
possible error messages not yet implemented. It will not cause any problems,
but it may not help your users either. This directive has been available
since 2.0.0.
');
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*

View File

@ -1,30 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'ColorKeywords', array(
'maroon' => '#800000',
'red' => '#FF0000',
'orange' => '#FFA500',
'yellow' => '#FFFF00',
'olive' => '#808000',
'purple' => '#800080',
'fuchsia' => '#FF00FF',
'white' => '#FFFFFF',
'lime' => '#00FF00',
'green' => '#008000',
'navy' => '#000080',
'blue' => '#0000FF',
'aqua' => '#00FFFF',
'teal' => '#008080',
'black' => '#000000',
'silver' => '#C0C0C0',
'gray' => '#808080'
), 'hash', '
Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors.
This directive has been available since 2.0.0.
');
/**
* Validates Color as defined by CSS.
*/

View File

@ -1,16 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'AllowedFrameTargets', array(), 'lookup',
'Lookup table of all allowed link frame targets. Some commonly used '.
'link targets include _blank, _self, _parent and _top. Values should '.
'be lowercase, as validation will be done in a case-sensitive manner '.
'despite W3C\'s recommendation. XHTML 1.0 Strict does not permit '.
'the target attribute so this directive will have no effect in that '.
'doctype. XHTML 1.1 does not enable the Target module by default, you '.
'will have to manually enable it (see the module documentation for more details.)'
);
/**
* Special-case enum attribute definition that lazy loads allowed frame targets
*/

View File

@ -1,53 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'EnableID', false, 'bool',
'Allows the ID attribute in HTML. This is disabled by default '.
'due to the fact that without proper configuration user input can '.
'easily break the validation of a webpage by specifying an ID that is '.
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
'the wind, enable this directive, but I strongly recommend you also '.
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
'versions.'
);
HTMLPurifier_ConfigSchema::defineAlias(
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDPrefix', '', 'string',
'String to prefix to IDs. If you have no idea what IDs your pages '.
'may use, you may opt to simply add a prefix to all user-submitted ID '.
'attributes so that they are still usable, but will not conflict with '.
'core page IDs. Example: setting the directive to \'user_\' will result in '.
'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
'%HTML.EnableAttrID to true before using '.
'this. This directive was available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDPrefixLocal', '', 'string',
'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
'you need to allow multiple sets of '.
'user content on web page, you may need to have a seperate prefix that '.
'changes with each iteration. This way, seperately submitted user content '.
'displayed on the same page doesn\'t clobber each other. Ideal values '.
'are unique identifiers for the content it represents (i.e. the id of '.
'the row in the database). Be sure to add a seperator (like an underscore) '.
'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
'is set to a non-empty value! This directive was available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklistRegexp', null, 'string/null',
'PCRE regular expression to be matched against all IDs. If the expression '.
'is matches, the ID is rejected. Use this with care: may cause '.
'significant degradation. ID matching is done after all other '.
'validation. This directive was available since 1.6.0.'
);
/**
* Validates the HTML attribute ID.
* @warning Even though this is the id processor, it

View File

@ -1,20 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'AllowedRel', array(), 'lookup',
'List of allowed forward document relationships in the rel attribute. '.
'Common values may be nofollow or print. By default, this is empty, '.
'meaning that no document relationships are allowed. This directive '.
'was available since 1.6.0.'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'AllowedRev', array(), 'lookup',
'List of allowed reverse document relationships in the rev attribute. '.
'This attribute is a bit of an edge-case; if you don\'t know what it '.
'is for, stay away. This directive was available since 1.6.0.'
);
/**
* Validates a rel/rev link attribute against a directive of allowed values
* @note We cannot use Enum because link types allow multiple

View File

@ -2,57 +2,8 @@
// special case filtering directives
HTMLPurifier_ConfigSchema::define(
'URI', 'Munge', null, 'string/null', '
<p>
Munges all browsable (usually http, https and ftp)
absolute URI\'s into another URI, usually a URI redirection service.
This directive accepts a URI, formatted with a <code>%s</code> where
the url-encoded original URI should be inserted (sample:
<code>http://www.google.com/url?q=%s</code>).
</p>
<p>
Uses for this directive:
</p>
<ul>
<li>
Prevent PageRank leaks, while being fairly transparent
to users (you may also want to add some client side JavaScript to
override the text in the statusbar). <strong>Notice</strong>:
Many security experts believe that this form of protection does not deter spam-bots.
</li>
<li>
Redirect users to a splash page telling them they are leaving your
website. While this is poor usability practice, it is often mandated
in corporate environments.
</li>
</ul>
<p>
This directive has been available since 1.3.0.
</p>
');
// disabling directives
HTMLPurifier_ConfigSchema::define(
'URI', 'Disable', false, 'bool', '
<p>
Disables all URIs in all forms. Not sure why you\'d want to do that
(after all, the Internet\'s founded on the notion of a hyperlink).
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableResources', false, 'bool', '
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea. This directive has been available since 1.3.0.
</p>
');
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme

View File

@ -2,16 +2,6 @@
// this MUST be placed in post, as it assumes that any value in dir is valid
HTMLPurifier_ConfigSchema::define(
'Attr', 'DefaultTextDir', 'ltr', 'string',
'Defines the default text direction (ltr or rtl) of the document '.
'being parsed. This generally is the same as the value of the dir '.
'attribute in HTML, or ltr if that is not specified.'
);
HTMLPurifier_ConfigSchema::defineAllowedValues(
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
);
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.
*/

View File

@ -2,21 +2,6 @@
// must be called POST validation
HTMLPurifier_ConfigSchema::define(
'Attr', 'DefaultInvalidImage', '', 'string',
'This is the default image an img tag will be pointed to if it does '.
'not have a valid src attribute. In future versions, we may allow the '.
'image tag to be removed completely, but due to design issues, this is '.
'not possible right now.'
);
HTMLPurifier_ConfigSchema::define(
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
'This is the content of the alt tag of an invalid image if the user '.
'had not previously specified an alt attribute. It has no effect when the '.
'image is valid but there was no alt attribute present.'
);
/**
* Transform that supplies default values for the src and alt attributes
* in img tags, as well as prevents the img tag from being removed

View File

@ -1,22 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'CSS', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details. This directive has been available
since 2.0.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'CSS', 'Proprietary', false, 'bool', '
<p>
Whether or not to allow safe, proprietary CSS values. This directive
has been available since 3.0.0.
</p>
');
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition

View File

@ -5,15 +5,6 @@
// false = delete parent node and all children
// array(...) = replace children nodes with these
HTMLPurifier_ConfigSchema::define(
'Core', 'EscapeInvalidChildren', false, 'bool',
'When true, a child is found that is not allowed in the context of the '.
'parent element will be transformed into text as if it were ASCII. When '.
'false, that element and all internal tags will be dropped, though text '.
'will be preserved. There is no option for dropping the element but '.
'preserving child nodes.'
);
/**
* Defines allowed child nodes and validates tokens against it.
*/

View File

@ -68,20 +68,10 @@ class HTMLPurifier_ConfigSchema {
);
/**
* Initializes the default namespaces.
* Unserializes the default ConfigSchema.
*/
public function initialize() {
$this->defineNamespace('Core', 'Core features that are always available.');
$this->defineNamespace('Attr', 'Features regarding attribute validation.');
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
$this->defineNamespace('Filter', 'Configuration for filters');
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
public static function makeFromSerial() {
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
}
/**
@ -91,8 +81,7 @@ class HTMLPurifier_ConfigSchema {
if ($prototype !== null) {
HTMLPurifier_ConfigSchema::$singleton = $prototype;
} elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
HTMLPurifier_ConfigSchema::$singleton = new HTMLPurifier_ConfigSchema();
HTMLPurifier_ConfigSchema::$singleton->initialize();
HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
}
return HTMLPurifier_ConfigSchema::$singleton;
}

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Cache', 'SerializerPath', null, 'string/null', '
<p>
Absolute path with no trailing slash to store serialized definitions in.
Default is within the
HTML Purifier library inside DefinitionCache/Serializer. This
path must be writable by the webserver. This directive has been
available since 2.0.0.
</p>
');
class HTMLPurifier_DefinitionCache_Serializer extends
HTMLPurifier_DefinitionCache
{

View File

@ -1,19 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
This directive defines which method to use when caching definitions,
the complex data-type that makes HTML Purifier tick. Set to null
to disable caching (not recommended, as you will see a definite
performance degradation). This directive has been available since 2.0.0.
');
HTMLPurifier_ConfigSchema::defineAlias(
'Core', 'DefinitionCache',
'Cache', 'DefinitionImpl'
);
/**
* Responsible for creating definition caches.
*/

View File

@ -1,21 +1,6 @@
<?php
// Legacy directives for doctype specification
HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
'This directive is deprecated in favor of %HTML.Doctype. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'XHTML', true, 'bool',
'Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. '.
'This directive is deprecated in favor of %HTML.Doctype. '.
'This directive was available since 1.1.'
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'XHTML', 'HTML', 'XHTML');
class HTMLPurifier_DoctypeRegistry
{

View File

@ -1,53 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'Encoding', 'utf-8', 'istring',
'If for some reason you are unable to convert all webpages to UTF-8, '.
'you can use this directive as a stop-gap compatibility change to '.
'let HTML Purifier deal with non UTF-8 input. This technique has '.
'notable deficiencies: absolutely no characters outside of the selected '.
'character encoding will be preserved, not even the ones that have '.
'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> '.
'that automatically resolves all entities), making it pretty useless '.
'for anything except the most I18N-blind applications, although '.
'%Core.EscapeNonASCIICharacters offers fixes this trouble with '.
'another tradeoff. This directive '.
'only accepts ISO-8859-1 if iconv is not enabled.'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'EscapeNonASCIICharacters', false, 'bool',
'This directive overcomes a deficiency in %Core.Encoding by blindly '.
'converting all non-ASCII characters into decimal numeric entities before '.
'converting it to its native encoding. This means that even '.
'characters that can be expressed in the non-UTF-8 encoding will '.
'be entity-ized, which can be a real downer for encodings like Big5. '.
'It also assumes that the ASCII repetoire is available, although '.
'this is the case for almost all encodings. Anyway, use UTF-8! This '.
'directive has been available since 1.4.0.'
);
if ( !function_exists('iconv') ) {
// only encodings with native PHP support
HTMLPurifier_ConfigSchema::defineAllowedValues(
'Core', 'Encoding', array(
'utf-8',
'iso-8859-1'
)
);
HTMLPurifier_ConfigSchema::defineValueAliases(
'Core', 'Encoding', array(
'iso8859-1' => 'iso-8859-1'
)
);
}
HTMLPurifier_ConfigSchema::define(
'Test', 'ForceNoIconv', false, 'bool',
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
'exist and use only pure PHP implementations.'
);
/**
* A UTF-8 specific character encoder that handles cleaning and transforming.
* @note All functions in this class should be static.

View File

@ -1,46 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Filter', 'ExtractStyleBlocksEscaping', true, 'bool', '
<p>
Whether or not to escape the dangerous characters &lt;, &gt; and &amp;
as \3C, \3E and \26, respectively. This is can be safely set to false
if the contents of StyleBlocks will be placed in an external stylesheet,
where there is no risk of it being interpreted as HTML. This directive
has been available since 3.0.0.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Filter', 'ExtractStyleBlocksScope', null, 'string/null', '
<p>
If you would like users to be able to define external stylesheets, but
only allow them to specify CSS declarations for a specific node and
prevent them from fiddling with other elements, use this directive.
It accepts any valid CSS selector, and will prepend this to any
CSS declaration extracted from the document. For example, if this
directive is set to <code>#user-content</code> and a user uses the
selector <code>a:hover</code>, the final selector will be
<code>#user-content a:hover</code>.
</p>
<p>
The comma shorthand may be used; consider the above example, with
<code>#user-content, #user-content2</code>, the final selector will
be <code>#user-content a:hover, #user-content2 a:hover</code>.
</p>
<p>
<strong>Warning:</strong> It is possible for users to bypass this measure
using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
Purifier, and I am working to get it fixed. Until then, HTML Purifier
performs a basic check to prevent this.
</p>
<p>
This directive has been available since 3.0.0.
</p>
'
);
/**
* This filter extracts <style> blocks from input HTML, cleans them up
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')

View File

@ -1,49 +1,6 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Output', 'CommentScriptContents', true, 'bool',
'Determines whether or not HTML Purifier should attempt to fix up '.
'the contents of script tags for legacy browsers with comments. This '.
'directive was available since 2.0.0.'
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
// extension constraints could be factored into ConfigSchema
HTMLPurifier_ConfigSchema::define(
'Output', 'TidyFormat', false, 'bool', <<<HTML
<p>
Determines whether or not to run Tidy on the final output for pretty
formatting reasons, such as indentation and wrap.
</p>
<p>
This can greatly improve readability for editors who are hand-editing
the HTML, but is by no means necessary as HTML Purifier has already
fixed all major errors the HTML may have had. Tidy is a non-default
extension, and this directive will silently fail if Tidy is not
available.
</p>
<p>
If you are looking to make the overall look of your page's source
better, I recommend running Tidy on the entire page rather than just
user-content (after all, the indentation relative to the containing
blocks will be incorrect).
</p>
<p>
This directive was available since 1.1.1.
</p>
HTML
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
<p>
Newline string to format final output with. If left null, HTML Purifier
will auto-detect the default newline type of the system and use that;
you can manually override it here. Remember, \r\n is Windows, \r
is Mac, and \n is Unix. This directive was available since 2.0.1.
</p>
');
/**
* Generates HTML from tokens.
* @todo Refactor interface so that configuration/context is determined

View File

@ -3,129 +3,6 @@
// this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces
HTMLPurifier_ConfigSchema::define(
'HTML', 'DefinitionID', null, 'string/null', '
<p>
Unique identifier for a custom-built HTML definition. If you edit
the raw version of the HTMLDefinition, introducing changes that the
configuration object does not reflect, you must specify this variable.
If you change your custom edits, you should change this directive, or
clear your cache. Example:
</p>
<pre>
$config = HTMLPurifier_Config::createDefault();
$config->set(\'HTML\', \'DefinitionID\', \'1\');
$def = $config->getHTMLDefinition();
$def->addAttribute(\'a\', \'tabindex\', \'Number\');
</pre>
<p>
In the above example, the configuration is still at the defaults, but
using the advanced API, an extra attribute has been added. The
configuration object normally has no way of knowing that this change
has taken place, so it needs an extra directive: %HTML.DefinitionID.
If someone else attempts to use the default configuration, these two
pieces of code will not clobber each other in the cache, since one has
an extra directive attached to it.
</p>
<p>
This directive has been available since 2.0.0, and in that version or
later you <em>must</em> specify a value to this directive to use the
advanced API features.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition specified in
%HTML.DefinitionID. This serves the same purpose: uniquely identifying
your custom definition, but this one does so in a chronological
context: revision 3 is more up-to-date then revision 2. Thus, when
this gets incremented, the cache handling is smart enough to clean
up any older revisions of your definition as well as flush the
cache. This directive has been available since 2.0.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'BlockWrapper', 'p', 'string', '
<p>
String name of element to wrap inline elements that are inside a block
context. This only occurs in the children of blockquote in strict mode.
</p>
<p>
Example: by default value,
<code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
<code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
as long as it is a block level element. This directive has been available
since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string', '
<p>
String name of element that HTML fragment passed to library will be
inserted in. An interesting variation would be using span as the
parent element, meaning that only inline tags would be allowed.
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null', '
<p>
If HTML Purifier\'s tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override.
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null', '
<p>
If HTML Purifier\'s attribute set is unsatisfactory, overload it!
The syntax is "tag.attr" or "*.attr" for the global attributes
(style, id, class, dir, lang, xml:lang).
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override. For
example, %HTML.EnableAttrID will take precedence over *.id in this
directive. You must set that directive to true before you can use
IDs at all. This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'Allowed', null, 'itext/null', '
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:
All of the constraints on the component directives are still enforced.
The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
whitelist: directly copy-pasting it here will probably result in
broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
are set, this directive has no effect.
This directive has been available since 2.0.0.
</p>
');
/**
* Definition of the purified HTML that describes allowed children,
* attributes, and many other things.

View File

@ -1,39 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'TidyLevel', 'medium', 'string', '
<p>General level of cleanliness the Tidy module should enforce.
There are four allowed values:</p>
<dl>
<dt>none</dt>
<dd>No extra tidying should be done</dd>
<dt>light</dt>
<dd>Only fix elements that would be discarded otherwise due to
lack of support in doctype</dd>
<dt>medium</dt>
<dd>Enforce best practices</dd>
<dt>heavy</dt>
<dd>Transform all deprecated elements and attributes to standards
compliant equivalents</dd>
</dl>
<p>This directive has been available since 2.0.0</p>
' );
HTMLPurifier_ConfigSchema::defineAllowedValues(
'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy')
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'TidyAdd', array(), 'lookup', '
Fixes to add to the default set of Tidy fixes as per your level. This
directive has been available since 2.0.0.
' );
HTMLPurifier_ConfigSchema::define(
'HTML', 'TidyRemove', array(), 'lookup', '
Fixes to remove from the default set of Tidy fixes as per your level. This
directive has been available since 2.0.0.
' );
/**
* Abstract class for a set of proprietary modules that clean up (tidy)
* poorly written HTML.

View File

@ -1,72 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'HTML', 'Doctype', '', 'string',
'Doctype to use during filtering. '.
'Technically speaking this is not actually a doctype (as it does '.
'not identify a corresponding DTD), but we are using this name '.
'for sake of simplicity. When non-blank, this will override any older directives '.
'like %HTML.XHTML or %HTML.Strict.'
);
HTMLPurifier_ConfigSchema::defineAllowedValues('HTML', 'Doctype', array(
'', 'HTML 4.01 Transitional', 'HTML 4.01 Strict',
'XHTML 1.0 Transitional', 'XHTML 1.0 Strict',
'XHTML 1.1'
));
HTMLPurifier_ConfigSchema::define(
'HTML', 'CustomDoctype', null, 'string/null',
'
A custom doctype for power-users who defined there own document
type. This directive only applies when %HTML.Doctype is blank.
This directive has been available since 2.0.1.
'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'Trusted', false, 'bool',
'Indicates whether or not the user input is trusted or not. If the '.
'input is trusted, a more expansive set of allowed tags and attributes '.
'will be used. This directive has been available since 2.0.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedModules', null, 'lookup/null', '
<p>
A doctype comes with a set of usual modules to use. Without having
to mucking about with the doctypes, you can quickly activate or
disable these modules by specifying which modules you wish to allow
with this directive. This is most useful for unit testing specific
modules, although end users may find it useful for their own ends.
</p>
<p>
If you specify a module that does not exist, the manager will silently
fail to use it, so be careful! User-defined modules are not affected
by this directive. Modules defined in %HTML.CoreModules are not
affected by this directive. This directive has been available since 2.0.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'CoreModules', array(
'Structure' => true,
'Text' => true,
'Hypertext' => true,
'List' => true,
'NonXMLCommonAttributes' => true,
'XMLCommonAttributes' => true,
'CommonAttributes' => true
), 'lookup', '
<p>
Certain modularized doctypes (XHTML, namely), have certain modules
that must be included for the doctype to be an conforming document
type: put those modules here. By default, XHTML\'s core modules
are used. You can set this to a blank array to disable core module
protection, but this is not recommended. This directive has been
available since 2.0.0.
</p>
');
class HTMLPurifier_HTMLModuleManager
{

View File

@ -1,10 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.'
);
/**
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
* @note In Slashdot-speak, dupe means duplicate.

View File

@ -1,36 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'AutoParagraph', false, 'bool', '
<p>
This directive turns on auto-paragraphing, where double newlines are
converted in to paragraphs whenever possible. Auto-paragraphing:
</p>
<ul>
<li>Always applies to inline elements or text in the root node,</li>
<li>Applies to inline elements or text with double newlines in nodes
that allow paragraph tags,</li>
<li>Applies to double newlines in paragraph tags</li>
</ul>
<p>
<code>p</code> tags must be allowed for this directive to take effect.
We do not use <code>br</code> tags for paragraphing, as that is
semantically incorrect.
</p>
<p>
To prevent auto-paragraphing as a content-producer, refrain from using
double-newlines except to specify a new paragraph or in contexts where
it has special meaning (whitespace usually has no meaning except in
tags like <code>pre</code>, so this should not be difficult.) To prevent
the paragraphing of inline text adjacent to block elements, wrap them
in <code>div</code> tags (the behavior is slightly different outside of
the root node.)
</p>
<p>
This directive has been available since 2.0.1.
</p>
');
/**
* Injector that auto paragraphs text in the root node based on
* double-spacing.

View File

@ -1,14 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'Linkify', false, 'bool', '
<p>
This directive turns on linkification, auto-linking http, ftp and
https URLs. <code>a</code> tags with the <code>href</code> attribute
must be allowed. This directive has been available since 2.0.1.
</p>
');
/**
* Injector that converts http, https and ftp text URLs to actual links.
*/

View File

@ -1,24 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'PurifierLinkify', false, 'bool', '
<p>
Internal auto-formatter that converts configuration directives in
syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags
with the <code>href</code> attribute must be allowed.
This directive has been available since 2.0.1.
</p>
');
HTMLPurifier_ConfigSchema::define(
'AutoFormatParam', 'PurifierLinkifyDocURL', '#%s', 'string', '
<p>
Location of configuration documentation to link to, let %s substitute
into the configuration\'s namespace and directive names sans the percent
sign. This directive has been available since 2.0.1.
</p>
');
/**
* Injector that converts configuration directive syntax %Namespace.Directive
* to links

View File

@ -1,13 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'Language', 'en', 'string', '
ISO 639 language code for localizable things in HTML Purifier to use,
which is mainly error reporting. There is currently only an English (en)
translation, so this directive is currently useless.
This directive has been available since 2.0.0.
');
/**
* Class responsible for generating HTMLPurifier_Language objects, managing
* caching and fallbacks.

View File

@ -1,72 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'ConvertDocumentToFragment', true, 'bool', '
This parameter determines whether or not the filter should convert
input that is a full document with html and body tags to a fragment
of just the contents of a body tag. This parameter is simply something
HTML Purifier can do during an edge-case: for most inputs, this
processing is not necessary.
');
HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
HTMLPurifier_ConfigSchema::define(
'Core', 'LexerImpl', null, 'mixed/null', '
<p>
This parameter determines what lexer implementation can be used. The
valid values are:
</p>
<dl>
<dt><em>null</em></dt>
<dd>
Recommended, the lexer implementation will be auto-detected based on
your PHP-version and configuration.
</dd>
<dt><em>string</em> lexer identifier</dt>
<dd>
This is a slim way of manually overridding the implementation.
Currently recognized values are: DOMLex (the default PHP5 implementation)
and DirectLex (the default PHP4 implementation). Only use this if
you know what you are doing: usually, the auto-detection will
manage things for cases you aren\'t even aware of.
</dd>
<dt><em>object</em> lexer instance</dt>
<dd>
Super-advanced: you can specify your own, custom, implementation that
implements the interface defined by <code>HTMLPurifier_Lexer</code>.
I may remove this option simply because I don\'t expect anyone
to use it.
</dd>
</dl>
<p>
This directive has been available since 2.0.0.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'MaintainLineNumbers', null, 'bool/null', '
<p>
If true, HTML Purifier will add line number information to all tokens.
This is useful when error reporting is turned on, but can result in
significant performance degradation and should not be used when
unnecessary. This directive must be used with the DirectLex lexer,
as the DOMLex lexer does not (yet) support this functionality.
If the value is null, an appropriate value will be selected based
on other configuration. This directive has been available since 2.0.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'Core', 'AggressivelyFixLt', false, 'bool', '
This directive enables aggressive pre-filter fixes HTML Purifier can
perform in order to ensure that open angled-brackets do not get killed
during parsing stage. Enabling this will result in two preg_replace_callback
calls and one preg_replace call for every bit of HTML passed through here.
It is not necessary and will have no effect for PHP 4.
This directive has been available since 2.1.0.
');
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*

View File

@ -1,19 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'DirectLexLineNumberSyncInterval', 0, 'int', '
<p>
Specifies the number of tokens the DirectLex line number tracking
implementations should process before attempting to resyncronize the
current line count by manually counting all previous new-lines. When
at 0, this functionality is disabled. Lower values will decrease
performance, and this is only strictly necessary if the counting
algorithm is buggy (in which case you should report it as a bug).
This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is
not being used. This directive has been available since 2.0.0.
</p>
');
/**
* Our in-house implementation of a parser.
*

View File

@ -8,11 +8,6 @@
* features, such as custom tags, custom parsing of text, etc.
*/
HTMLPurifier_ConfigSchema::define(
'Core', 'EscapeInvalidTags', false, 'bool',
'When true, invalid tags will be written back to the document as plain '.
'text. Otherwise, they are silently dropped.'
);
abstract class HTMLPurifier_Strategy
{

View File

@ -1,16 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'Custom', array(), 'list', '
<p>
This directive can be used to add custom auto-format injectors.
Specify an array of injector names (class name minus the prefix)
or concrete implementations. Injector class must exist. This directive
has been available since 2.0.1.
</p>
'
);
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*/

View File

@ -1,42 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveInvalidImg', true, 'bool', '
<p>
This directive enables pre-emptive URI checking in <code>img</code>
tags, as the attribute validation strategy is not authorized to
remove elements from the document. This directive has been available
since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveScriptContents', null, 'bool/null', '
<p>
This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been deprecated since 2.1.0,
and when not set the value of %Core.HiddenElements will take
precedence. This directive has been available since 2.0.0, and can be used to
revert to pre-2.0.0 behavior by setting it to false.
</p>
'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
<p>
This directive is a lookup array of elements which should have their
contents removed when they are not allowed by the HTML definition.
For example, the contents of a <code>script</code> tag are not
normally shown in a document, so if script tags are to be removed,
their contents should be removed to. This is opposed to a <code>b</code>
tag, which defines some presentational changes but does not hide its
contents.
</p>
'
);
/**
* Removes all unrecognized tags from the list of tokens.
*

View File

@ -1,65 +1,7 @@
<?php
HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionID', null, 'string/null', '
<p>
Unique identifier for a custom-built URI definition. If you want
to add custom URIFilters, you must specify this value.
This directive has been available since 2.1.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'URI', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details. This directive has been available
since 2.1.0.
</p>
');
// informative URI directives
HTMLPurifier_ConfigSchema::define(
'URI', 'DefaultScheme', 'http', 'string', '
<p>
Defines through what scheme the output will be served, in order to
select the proper object validator when no scheme information is present.
</p>
');
HTMLPurifier_ConfigSchema::define(
'URI', 'Host', null, 'string/null', '
<p>
Defines the domain name of the server, so we can determine whether or
an absolute URI is from your website or not. Not strictly necessary,
as users should be using relative URIs to reference resources on your
website. It will, however, let you use absolute URIs to link to
subdomains of the domain you post here: i.e. example.com will allow
sub.example.com. However, higher up domains will still be excluded:
if you set %URI.Host to sub.example.com, example.com will be blocked.
<strong>Note:</strong> This directive overrides %URI.Base because
a given page may be on a sub-domain, but you wish HTML Purifier to be
more relaxed and allow some of the parent domains too.
This directive has been available since 1.2.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'URI', 'Base', null, 'string/null', '
<p>
The base URI is the URI of the document this purified HTML will be
inserted into. This information is important if HTML Purifier needs
to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
is on. You may use a non-absolute URI for this value, but behavior
may vary (%URI.MakeAbsolute deals nicely with both absolute and
relative paths, but forwards-compatibility is not guaranteed).
<strong>Warning:</strong> If set, the scheme on this URI
overrides the one specified by %URI.DefaultScheme. This directive has
been available since 2.1.0.
</p>
');
class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
{

View File

@ -1,15 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternal', false, 'bool',
'Disables links to external websites. This is a highly effective '.
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
'links or images outside of your domain will be allowed. Non-linkified '.
'URIs will still be preserved. If you want to be able to link to '.
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
'This directive has been available since 1.2.0.'
);
class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
{
public $name = 'DisableExternal';

View File

@ -1,18 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternalResources', false, 'bool',
'Disables the embedding of external resources, preventing users from '.
'embedding things like images from other hosts. This prevents '.
'access tracking (good for email viewers), bandwidth leeching, '.
'cross-site request forging, goatse.cx posting, and '.
'other nasties, but also results in '.
'a loss of end-user functionality (they can\'t directly post a pic '.
'they posted from Flickr anymore). Use it if you don\'t have a '.
'robust user-content moderation team. This directive has been '.
'available since 1.3.0.'
);
class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
{
public $name = 'DisableExternalResources';

View File

@ -1,13 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'URI', 'HostBlacklist', array(), 'list',
'List of strings that are forbidden in the host of any URI. Use it to '.
'kill domain names of spam, etc. Note that it will catch anything in '.
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
'This directive has been available since 1.3.0.'
);
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{
public $name = 'HostBlacklist';

View File

@ -2,17 +2,6 @@
// does not support network paths
HTMLPurifier_ConfigSchema::define(
'URI', 'MakeAbsolute', false, 'bool', '
<p>
Converts all URIs into absolute forms. This is useful when the HTML
being filtered assumes a specific base path, but will actually be
viewed in a different context (and setting an alternate base URI is
not possible). %URI.Base must be set for this directive to work.
This directive has been available since 2.1.0.
</p>
');
class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
{
public $name = 'MakeAbsolute';

View File

@ -1,28 +1,5 @@
<?php
HTMLPurifier_ConfigSchema::define(
'URI', 'AllowedSchemes', array(
'http' => true, // "Hypertext Transfer Protocol", nuf' said
'https' => true, // HTTP over SSL (Secure Socket Layer)
// quite useful, but not necessary
'mailto' => true,// Email
'ftp' => true, // "File Transfer Protocol"
// for Usenet, these two are similar, but distinct
'nntp' => true, // individual Netnews articles
'news' => true // newsgroup or individual Netnews articles
), 'lookup',
'Whitelist that defines the schemes that a URI is allowed to have. This '.
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'OverrideAllowedSchemes', true, 'bool',
'If this is set to true (which it is by default), you can override '.
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
'to the registry. If false, you will also have to update that directive '.
'in order to add more schemes.'
);
/**
* Registry for retrieving specific URI scheme validator objects.
*/

View File

@ -0,0 +1,45 @@
#!/usr/bin/php
<?php
chdir(dirname(__FILE__));
require_once 'common.php';
require_once '../library/HTMLPurifier.auto.php';
assertCli();
/**
* @file
* Generates a schema cache file from the contents of
* library/HTMLPurifier/ConfigSchema/schema.ser
*/
$target = '../library/HTMLPurifier/ConfigSchema/schema.ser';
$FS = new FSTools();
if (file_exists($target)) {
echo "Delete HTMLPurifier/ConfigSchema/schema.ser before running this script.";
exit;
}
$files = $FS->globr('../library/HTMLPurifier/ConfigSchema', '*.txt');
$namespaces = array();
$directives = array();
// Generate string hashes
$parser = new ConfigSchema_StringHashParser();
foreach ($files as $file) {
$hash = $parser->parseFile($file);
if (strpos($hash['ID'], '.') === false) {
$namespaces[] = $hash;
} else {
$directives[] = $hash;
}
}
$adapter = new ConfigSchema_StringHashAdapter();
$schema = new HTMLPurifier_ConfigSchema();
foreach ($namespaces as $hash) $adapter->adapt($hash, $schema);
foreach ($directives as $hash) $adapter->adapt($hash, $schema);
file_put_contents($target, serialize($schema));

View File

@ -25,6 +25,6 @@ foreach ($files as $file) {
$old_code = file_get_contents($file);
$new_code = preg_replace("#^require_once .+[\n\r]*#m", '', $old_code);
if ($old_code !== $new_code) {
file_put_contents($file . '.new', $new_code);
file_put_contents($file, $new_code);
}
}

View File

@ -0,0 +1,30 @@
#!/usr/bin/php
<?php
chdir(dirname(__FILE__));
require_once 'common.php';
assertCli();
echo "Please do not run this script. It is here for historical purposes only.";
exit;
/**
* @file
* Removes ConfigSchema function calls from source files.
*/
chdir(dirname(__FILE__) . '/../library/');
$FS = new FSTools();
$files = $FS->globr('.', '*.php');
foreach ($files as $file) {
if (substr_count(basename($file), '.') > 1) continue;
$old_code = file_get_contents($file);
$new_code = preg_replace("#^HTMLPurifier_ConfigSchema::.+?\);[\n\r]*#ms", '', $old_code);
if ($old_code !== $new_code) {
file_put_contents($file, $new_code);
}
if (preg_match('#^\s+HTMLPurifier_ConfigSchema::#m', $new_code)) {
echo "Indented ConfigSchema call in $file\n";
}
}