mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-18 11:41:52 +00:00
[1.7.0] Prototype-declarations for Lexer removed in favor of configuration determination of Lexer implementations.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1153 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
bd44105ca9
commit
bf6ce67fc1
2
NEWS
2
NEWS
@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
DefinitionID directive (%HTML.DefinitionID for HTMLDefinition).
|
DefinitionID directive (%HTML.DefinitionID for HTMLDefinition).
|
||||||
# Contents between <script> tags are now completely removed if <script>
|
# Contents between <script> tags are now completely removed if <script>
|
||||||
is not allowed
|
is not allowed
|
||||||
|
# Prototype-declarations for Lexer removed in favor of configuration
|
||||||
|
determination of Lexer implementations.
|
||||||
! HTML Purifier now works in PHP 4.3.2.
|
! HTML Purifier now works in PHP 4.3.2.
|
||||||
! Configuration form-editing API makes tweaking HTMLPurifier_Config a
|
! Configuration form-editing API makes tweaking HTMLPurifier_Config a
|
||||||
breeze!
|
breeze!
|
||||||
|
@ -69,7 +69,7 @@ class HTMLPurifier
|
|||||||
var $config;
|
var $config;
|
||||||
var $filters;
|
var $filters;
|
||||||
|
|
||||||
var $lexer, $strategy, $generator;
|
var $strategy, $generator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||||
@ -89,7 +89,6 @@ class HTMLPurifier
|
|||||||
|
|
||||||
$this->config = HTMLPurifier_Config::create($config);
|
$this->config = HTMLPurifier_Config::create($config);
|
||||||
|
|
||||||
$this->lexer = HTMLPurifier_Lexer::create();
|
|
||||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||||
$this->generator = new HTMLPurifier_Generator();
|
$this->generator = new HTMLPurifier_Generator();
|
||||||
|
|
||||||
@ -117,6 +116,10 @@ class HTMLPurifier
|
|||||||
|
|
||||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||||
|
|
||||||
|
// implementation is partially environment dependant, partially
|
||||||
|
// configuration dependant
|
||||||
|
$lexer = HTMLPurifier_Lexer::create($config);
|
||||||
|
|
||||||
$context = new HTMLPurifier_Context();
|
$context = new HTMLPurifier_Context();
|
||||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||||
|
|
||||||
@ -130,7 +133,7 @@ class HTMLPurifier
|
|||||||
// list of tokens
|
// list of tokens
|
||||||
$this->strategy->execute(
|
$this->strategy->execute(
|
||||||
// list of un-purified tokens
|
// list of un-purified tokens
|
||||||
$this->lexer->tokenizeHTML(
|
$lexer->tokenizeHTML(
|
||||||
// un-purified HTML
|
// un-purified HTML
|
||||||
$html, $config, $context
|
$html, $config, $context
|
||||||
),
|
),
|
||||||
|
@ -19,6 +19,37 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'drop all sections except the content between body.'
|
'drop all sections except the content between body.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'LexerImpl', null, 'mixed/null', '
|
||||||
|
<p>
|
||||||
|
This parameter determines what lexer implementation can be used. The
|
||||||
|
valid values are:
|
||||||
|
</p>
|
||||||
|
<dl>
|
||||||
|
<dt><em>null</em></dt>
|
||||||
|
<dd>
|
||||||
|
Recommended, the lexer implementation will be auto-detected based on
|
||||||
|
your PHP-version and configuration.
|
||||||
|
</dd>
|
||||||
|
<dt><em>string</em> lexer identifier</dt>
|
||||||
|
<dd>
|
||||||
|
This is a slim way of manually overridding the implementation.
|
||||||
|
Currently recognized values are: DOMLex (the default PHP5 implementation)
|
||||||
|
and DirectLex (the default PHP4 implementation). Only use this if
|
||||||
|
you know what you are doing: usually, the auto-detection will
|
||||||
|
manage things for cases you aren\'t even aware of.
|
||||||
|
</dd>
|
||||||
|
<dt><em>object</em> lexer instance</dt>
|
||||||
|
<dd>
|
||||||
|
Super-advanced: you can specify your own, custom, implementation that
|
||||||
|
implements the interface defined by <code>HTMLPurifier_Lexer</code>.
|
||||||
|
I may remove this option simply because I don\'t expect anyone
|
||||||
|
to use it.
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||||
*
|
*
|
||||||
@ -63,11 +94,76 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
class HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// -- STATIC ----------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves or sets the default Lexer as a Prototype Factory.
|
||||||
|
*
|
||||||
|
* Depending on what PHP version you are running, the abstract base
|
||||||
|
* Lexer class will determine which concrete Lexer is best for you:
|
||||||
|
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
|
||||||
|
* for PHP 5 and beyond. This general rule has a few exceptions to it
|
||||||
|
* involving special features that only DirectLex implements.
|
||||||
|
*
|
||||||
|
* @static
|
||||||
|
*
|
||||||
|
* @note The behavior of this class has changed, rather than accepting
|
||||||
|
* a prototype object, it now accepts a configuration object.
|
||||||
|
* To specify your own prototype, set %Core.LexerImpl to it.
|
||||||
|
* This change in behavior de-singletonizes the lexer object.
|
||||||
|
*
|
||||||
|
* @note In PHP4, it is possible to call this factory method from
|
||||||
|
* subclasses, such usage is not recommended and not
|
||||||
|
* forwards-compatible.
|
||||||
|
*
|
||||||
|
* @param $prototype Optional prototype lexer or configuration object
|
||||||
|
* @return Concrete lexer.
|
||||||
|
*/
|
||||||
|
function create($config) {
|
||||||
|
|
||||||
|
if (!is_a($config, 'HTMLPurifier_Config')) {
|
||||||
|
$lexer = $config;
|
||||||
|
trigger_error("Passing a prototype to
|
||||||
|
HTMLPurifier_Lexer::create() is deprecated, please instead
|
||||||
|
use %Core.LexerImpl", E_USER_WARNING);
|
||||||
|
} else {
|
||||||
|
$lexer = $config->get('Core', 'LexerImpl');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_object($lexer)) {
|
||||||
|
return $lexer;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_null($lexer)) { do {
|
||||||
|
// auto-detectection algorithm
|
||||||
|
|
||||||
|
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
||||||
|
class_exists('DOMDocument')) { // check for DOM support
|
||||||
|
$lexer = 'DOMLex';
|
||||||
|
} else {
|
||||||
|
$lexer = 'DirectLex';
|
||||||
|
}
|
||||||
|
|
||||||
|
} while(0); } // do..while so we can break
|
||||||
|
|
||||||
|
// instantiate recognized string names
|
||||||
|
switch ($lexer) {
|
||||||
|
case 'DOMLex':
|
||||||
|
return new HTMLPurifier_Lexer_DOMLex();
|
||||||
|
case 'DirectLex':
|
||||||
|
return new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
default:
|
||||||
|
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- CONVENIENCE MEMBERS ---------------------------------------------
|
||||||
|
|
||||||
function HTMLPurifier_Lexer() {
|
function HTMLPurifier_Lexer() {
|
||||||
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Most common entity to raw value conversion table for special entities.
|
* Most common entity to raw value conversion table for special entities.
|
||||||
* @protected
|
* @protected
|
||||||
@ -131,44 +227,6 @@ class HTMLPurifier_Lexer
|
|||||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves or sets the default Lexer as a Prototype Factory.
|
|
||||||
*
|
|
||||||
* Depending on what PHP version you are running, the abstract base
|
|
||||||
* Lexer class will determine which concrete Lexer is best for you:
|
|
||||||
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
|
|
||||||
* for PHP 5 and beyond.
|
|
||||||
*
|
|
||||||
* Passing the optional prototype lexer parameter will override the
|
|
||||||
* default with your own implementation. A copy/reference of the prototype
|
|
||||||
* lexer will now be returned when you request a new lexer.
|
|
||||||
*
|
|
||||||
* @static
|
|
||||||
*
|
|
||||||
* @note
|
|
||||||
* Though it is possible to call this factory method from subclasses,
|
|
||||||
* such usage is not recommended.
|
|
||||||
*
|
|
||||||
* @param $prototype Optional prototype lexer.
|
|
||||||
* @return Concrete lexer.
|
|
||||||
*/
|
|
||||||
function create($prototype = null) {
|
|
||||||
// we don't really care if it's a reference or a copy
|
|
||||||
static $lexer = null;
|
|
||||||
if ($prototype) {
|
|
||||||
$lexer = $prototype;
|
|
||||||
}
|
|
||||||
if (empty($lexer)) {
|
|
||||||
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
|
||||||
class_exists('DOMDocument')) { // check for DOM support
|
|
||||||
$lexer = new HTMLPurifier_Lexer_DOMLex();
|
|
||||||
} else {
|
|
||||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $lexer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates CDATA sections into regular sections (through escaping).
|
* Translates CDATA sections into regular sections (through escaping).
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user