0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-03 13:21:51 +00:00

Refactor lexer instantiation logic with exceptions and forced line tracking.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2008-09-05 14:04:23 -04:00
parent 92df9e5b28
commit ed7983b559
4 changed files with 79 additions and 34 deletions

5
NEWS
View File

@ -10,6 +10,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
========================== ==========================
3.1.2, unknown release date 3.1.2, unknown release date
# Using %Core.CollectErrors forces line number/column tracking on, whereas
previously you could theoretically turn it off.
! %Output.AttrSort for when you need your attributes in alphabetical order to ! %Output.AttrSort for when you need your attributes in alphabetical order to
deal with a bug in FCKEditor. Requested by frank farmer. deal with a bug in FCKEditor. Requested by frank farmer.
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith. ! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
@ -49,6 +51,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
not collapsed by URIFilter_MakeAbsolute. not collapsed by URIFilter_MakeAbsolute.
- Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements - Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements
by reordering their addition. by reordering their addition.
- Will now throw exception on many error conditions during lexer creation; also
throw an exception when MaintainLineNumbers is true, but a non-tracksLineNumbers
is being used.
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing . Strategy_MakeWellFormed now operates in-place, saving memory and allowing
for more interesting filter-backtracking for more interesting filter-backtracking
. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind . New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind

View File

@ -42,6 +42,12 @@
class HTMLPurifier_Lexer class HTMLPurifier_Lexer
{ {
/**
* Whether or not this lexer implements line-number/column-number tracking.
* If it does, set to true.
*/
public $tracksLineNumbers = false;
// -- STATIC ---------------------------------------------------------- // -- STATIC ----------------------------------------------------------
/** /**
@ -70,20 +76,19 @@ class HTMLPurifier_Lexer
$lexer = $config->get('Core', 'LexerImpl'); $lexer = $config->get('Core', 'LexerImpl');
} }
$needs_tracking =
$config->get('Core', 'MaintainLineNumbers') ||
$config->get('Core', 'CollectErrors');
$inst = null;
if (is_object($lexer)) { if (is_object($lexer)) {
return $lexer; $inst = $lexer;
} } else {
if (is_null($lexer)) { do { if (is_null($lexer)) { do {
// auto-detection algorithm // auto-detection algorithm
// once PHP DOM implements native line numbers, or we if ($needs_tracking) {
// hack out something using XSLT, remove this stipulation
$line_numbers = $config->get('Core', 'MaintainLineNumbers');
if (
$line_numbers === true ||
($line_numbers === null && $config->get('Core', 'CollectErrors'))
) {
$lexer = 'DirectLex'; $lexer = 'DirectLex';
break; break;
} }
@ -101,14 +106,28 @@ class HTMLPurifier_Lexer
// instantiate recognized string names // instantiate recognized string names
switch ($lexer) { switch ($lexer) {
case 'DOMLex': case 'DOMLex':
return new HTMLPurifier_Lexer_DOMLex(); $inst = new HTMLPurifier_Lexer_DOMLex();
break;
case 'DirectLex': case 'DirectLex':
return new HTMLPurifier_Lexer_DirectLex(); $inst = new HTMLPurifier_Lexer_DirectLex();
break;
case 'PH5P': case 'PH5P':
return new HTMLPurifier_Lexer_PH5P(); $inst = new HTMLPurifier_Lexer_PH5P();
break;
default: default:
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR); throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
} }
}
if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
// once PHP DOM implements native line numbers, or we
// hack out something using XSLT, remove this stipulation
if ($needs_tracking && !$inst->tracksLineNumbers) {
throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
}
return $inst;
} }

View File

@ -13,6 +13,8 @@
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
{ {
public $tracksLineNumbers = true;
/** /**
* Whitespace characters for str(c)spn. * Whitespace characters for str(c)spn.
*/ */

View File

@ -29,6 +29,25 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex'); $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
} }
function test_create_objectLexerImpl() {
$this->config->set('Core', 'LexerImpl', new HTMLPurifier_Lexer_DirectLex());
$lexer = HTMLPurifier_Lexer::create($this->config);
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
}
function test_create_unknownLexer() {
$this->config->set('Core', 'LexerImpl', 'AsdfAsdf');
$this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
HTMLPurifier_Lexer::create($this->config);
}
function test_create_incompatibleLexer() {
$this->config->set('Core', 'LexerImpl', 'DOMLex');
$this->config->set('Core', 'MaintainLineNumbers', true);
$this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
HTMLPurifier_Lexer::create($this->config);
}
// HTMLPurifier_Lexer->parseData() ----------------------------------------- // HTMLPurifier_Lexer->parseData() -----------------------------------------
function assertParseData($input, $expect = true) { function assertParseData($input, $expect = true) {