0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-11-09 15:28:40 +00:00

[1.7.0] Prototype-declarations for Lexer removed in favor of configuration determination of Lexer implementations.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1153 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-17 21:27:39 +00:00
parent bd44105ca9
commit bf6ce67fc1
3 changed files with 105 additions and 42 deletions

2
NEWS
View File

@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
DefinitionID directive (%HTML.DefinitionID for HTMLDefinition).
# Contents between <script> tags are now completely removed if <script>
is not allowed
# Prototype-declarations for Lexer removed in favor of configuration
determination of Lexer implementations.
! HTML Purifier now works in PHP 4.3.2.
! Configuration form-editing API makes tweaking HTMLPurifier_Config a
breeze!

View File

@ -69,7 +69,7 @@ class HTMLPurifier
var $config;
var $filters;
var $lexer, $strategy, $generator;
var $strategy, $generator;
/**
* Final HTMLPurifier_Context of last run purification. Might be an array.
@ -89,7 +89,6 @@ class HTMLPurifier
$this->config = HTMLPurifier_Config::create($config);
$this->lexer = HTMLPurifier_Lexer::create();
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->generator = new HTMLPurifier_Generator();
@ -117,6 +116,10 @@ class HTMLPurifier
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
@ -130,7 +133,7 @@ class HTMLPurifier
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$this->lexer->tokenizeHTML(
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),

View File

@ -19,6 +19,37 @@ HTMLPurifier_ConfigSchema::define(
'drop all sections except the content between body.'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'LexerImpl', null, 'mixed/null', '
<p>
This parameter determines what lexer implementation can be used. The
valid values are:
</p>
<dl>
<dt><em>null</em></dt>
<dd>
Recommended, the lexer implementation will be auto-detected based on
your PHP-version and configuration.
</dd>
<dt><em>string</em> lexer identifier</dt>
<dd>
This is a slim way of manually overridding the implementation.
Currently recognized values are: DOMLex (the default PHP5 implementation)
and DirectLex (the default PHP4 implementation). Only use this if
you know what you are doing: usually, the auto-detection will
manage things for cases you aren\'t even aware of.
</dd>
<dt><em>object</em> lexer instance</dt>
<dd>
Super-advanced: you can specify your own, custom, implementation that
implements the interface defined by <code>HTMLPurifier_Lexer</code>.
I may remove this option simply because I don\'t expect anyone
to use it.
</dd>
</dl>
'
);
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
@ -63,11 +94,76 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_Lexer
{
// -- STATIC ----------------------------------------------------------
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* Depending on what PHP version you are running, the abstract base
* Lexer class will determine which concrete Lexer is best for you:
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
* for PHP 5 and beyond. This general rule has a few exceptions to it
* involving special features that only DirectLex implements.
*
* @static
*
* @note The behavior of this class has changed, rather than accepting
* a prototype object, it now accepts a configuration object.
* To specify your own prototype, set %Core.LexerImpl to it.
* This change in behavior de-singletonizes the lexer object.
*
* @note In PHP4, it is possible to call this factory method from
* subclasses, such usage is not recommended and not
* forwards-compatible.
*
* @param $prototype Optional prototype lexer or configuration object
* @return Concrete lexer.
*/
function create($config) {
if (!is_a($config, 'HTMLPurifier_Config')) {
$lexer = $config;
trigger_error("Passing a prototype to
HTMLPurifier_Lexer::create() is deprecated, please instead
use %Core.LexerImpl", E_USER_WARNING);
} else {
$lexer = $config->get('Core', 'LexerImpl');
}
if (is_object($lexer)) {
return $lexer;
}
if (is_null($lexer)) { do {
// auto-detectection algorithm
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
class_exists('DOMDocument')) { // check for DOM support
$lexer = 'DOMLex';
} else {
$lexer = 'DirectLex';
}
} while(0); } // do..while so we can break
// instantiate recognized string names
switch ($lexer) {
case 'DOMLex':
return new HTMLPurifier_Lexer_DOMLex();
case 'DirectLex':
return new HTMLPurifier_Lexer_DirectLex();
default:
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
}
}
// -- CONVENIENCE MEMBERS ---------------------------------------------
function HTMLPurifier_Lexer() {
$this->_entity_parser = new HTMLPurifier_EntityParser();
}
/**
* Most common entity to raw value conversion table for special entities.
* @protected
@ -131,44 +227,6 @@ class HTMLPurifier_Lexer
trigger_error('Call to abstract class', E_USER_ERROR);
}
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* Depending on what PHP version you are running, the abstract base
* Lexer class will determine which concrete Lexer is best for you:
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
* for PHP 5 and beyond.
*
* Passing the optional prototype lexer parameter will override the
* default with your own implementation. A copy/reference of the prototype
* lexer will now be returned when you request a new lexer.
*
* @static
*
* @note
* Though it is possible to call this factory method from subclasses,
* such usage is not recommended.
*
* @param $prototype Optional prototype lexer.
* @return Concrete lexer.
*/
function create($prototype = null) {
// we don't really care if it's a reference or a copy
static $lexer = null;
if ($prototype) {
$lexer = $prototype;
}
if (empty($lexer)) {
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
class_exists('DOMDocument')) { // check for DOM support
$lexer = new HTMLPurifier_Lexer_DOMLex();
} else {
$lexer = new HTMLPurifier_Lexer_DirectLex();
}
}
return $lexer;
}
/**
* Translates CDATA sections into regular sections (through escaping).
*