mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
[1.7.0] Prototype-declarations for Lexer removed in favor of configuration determination of Lexer implementations.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1153 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
bd44105ca9
commit
bf6ce67fc1
2
NEWS
2
NEWS
@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
DefinitionID directive (%HTML.DefinitionID for HTMLDefinition).
|
||||
# Contents between <script> tags are now completely removed if <script>
|
||||
is not allowed
|
||||
# Prototype-declarations for Lexer removed in favor of configuration
|
||||
determination of Lexer implementations.
|
||||
! HTML Purifier now works in PHP 4.3.2.
|
||||
! Configuration form-editing API makes tweaking HTMLPurifier_Config a
|
||||
breeze!
|
||||
|
@ -69,7 +69,7 @@ class HTMLPurifier
|
||||
var $config;
|
||||
var $filters;
|
||||
|
||||
var $lexer, $strategy, $generator;
|
||||
var $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
@ -89,7 +89,6 @@ class HTMLPurifier
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
$this->lexer = HTMLPurifier_Lexer::create();
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
|
||||
@ -117,6 +116,10 @@ class HTMLPurifier
|
||||
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
// implementation is partially environment dependant, partially
|
||||
// configuration dependant
|
||||
$lexer = HTMLPurifier_Lexer::create($config);
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
@ -130,7 +133,7 @@ class HTMLPurifier
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
// list of un-purified tokens
|
||||
$this->lexer->tokenizeHTML(
|
||||
$lexer->tokenizeHTML(
|
||||
// un-purified HTML
|
||||
$html, $config, $context
|
||||
),
|
||||
|
@ -19,6 +19,37 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'drop all sections except the content between body.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'LexerImpl', null, 'mixed/null', '
|
||||
<p>
|
||||
This parameter determines what lexer implementation can be used. The
|
||||
valid values are:
|
||||
</p>
|
||||
<dl>
|
||||
<dt><em>null</em></dt>
|
||||
<dd>
|
||||
Recommended, the lexer implementation will be auto-detected based on
|
||||
your PHP-version and configuration.
|
||||
</dd>
|
||||
<dt><em>string</em> lexer identifier</dt>
|
||||
<dd>
|
||||
This is a slim way of manually overridding the implementation.
|
||||
Currently recognized values are: DOMLex (the default PHP5 implementation)
|
||||
and DirectLex (the default PHP4 implementation). Only use this if
|
||||
you know what you are doing: usually, the auto-detection will
|
||||
manage things for cases you aren\'t even aware of.
|
||||
</dd>
|
||||
<dt><em>object</em> lexer instance</dt>
|
||||
<dd>
|
||||
Super-advanced: you can specify your own, custom, implementation that
|
||||
implements the interface defined by <code>HTMLPurifier_Lexer</code>.
|
||||
I may remove this option simply because I don\'t expect anyone
|
||||
to use it.
|
||||
</dd>
|
||||
</dl>
|
||||
'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
@ -63,11 +94,76 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
// -- STATIC ----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Retrieves or sets the default Lexer as a Prototype Factory.
|
||||
*
|
||||
* Depending on what PHP version you are running, the abstract base
|
||||
* Lexer class will determine which concrete Lexer is best for you:
|
||||
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
|
||||
* for PHP 5 and beyond. This general rule has a few exceptions to it
|
||||
* involving special features that only DirectLex implements.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @note The behavior of this class has changed, rather than accepting
|
||||
* a prototype object, it now accepts a configuration object.
|
||||
* To specify your own prototype, set %Core.LexerImpl to it.
|
||||
* This change in behavior de-singletonizes the lexer object.
|
||||
*
|
||||
* @note In PHP4, it is possible to call this factory method from
|
||||
* subclasses, such usage is not recommended and not
|
||||
* forwards-compatible.
|
||||
*
|
||||
* @param $prototype Optional prototype lexer or configuration object
|
||||
* @return Concrete lexer.
|
||||
*/
|
||||
function create($config) {
|
||||
|
||||
if (!is_a($config, 'HTMLPurifier_Config')) {
|
||||
$lexer = $config;
|
||||
trigger_error("Passing a prototype to
|
||||
HTMLPurifier_Lexer::create() is deprecated, please instead
|
||||
use %Core.LexerImpl", E_USER_WARNING);
|
||||
} else {
|
||||
$lexer = $config->get('Core', 'LexerImpl');
|
||||
}
|
||||
|
||||
if (is_object($lexer)) {
|
||||
return $lexer;
|
||||
}
|
||||
|
||||
if (is_null($lexer)) { do {
|
||||
// auto-detectection algorithm
|
||||
|
||||
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
||||
class_exists('DOMDocument')) { // check for DOM support
|
||||
$lexer = 'DOMLex';
|
||||
} else {
|
||||
$lexer = 'DirectLex';
|
||||
}
|
||||
|
||||
} while(0); } // do..while so we can break
|
||||
|
||||
// instantiate recognized string names
|
||||
switch ($lexer) {
|
||||
case 'DOMLex':
|
||||
return new HTMLPurifier_Lexer_DOMLex();
|
||||
case 'DirectLex':
|
||||
return new HTMLPurifier_Lexer_DirectLex();
|
||||
default:
|
||||
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// -- CONVENIENCE MEMBERS ---------------------------------------------
|
||||
|
||||
function HTMLPurifier_Lexer() {
|
||||
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Most common entity to raw value conversion table for special entities.
|
||||
* @protected
|
||||
@ -131,44 +227,6 @@ class HTMLPurifier_Lexer
|
||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves or sets the default Lexer as a Prototype Factory.
|
||||
*
|
||||
* Depending on what PHP version you are running, the abstract base
|
||||
* Lexer class will determine which concrete Lexer is best for you:
|
||||
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
|
||||
* for PHP 5 and beyond.
|
||||
*
|
||||
* Passing the optional prototype lexer parameter will override the
|
||||
* default with your own implementation. A copy/reference of the prototype
|
||||
* lexer will now be returned when you request a new lexer.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @note
|
||||
* Though it is possible to call this factory method from subclasses,
|
||||
* such usage is not recommended.
|
||||
*
|
||||
* @param $prototype Optional prototype lexer.
|
||||
* @return Concrete lexer.
|
||||
*/
|
||||
function create($prototype = null) {
|
||||
// we don't really care if it's a reference or a copy
|
||||
static $lexer = null;
|
||||
if ($prototype) {
|
||||
$lexer = $prototype;
|
||||
}
|
||||
if (empty($lexer)) {
|
||||
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
||||
class_exists('DOMDocument')) { // check for DOM support
|
||||
$lexer = new HTMLPurifier_Lexer_DOMLex();
|
||||
} else {
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
}
|
||||
}
|
||||
return $lexer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates CDATA sections into regular sections (through escaping).
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user