0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-03-11 17:18:44 +00:00

Rename HTML_Lexer to HTMLPurifier_Lexer. However, some more refactoring still needs to be done (namely making the lexer an interface).

Also fixed broken benchmarks.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@65 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-07-21 23:07:47 +00:00
parent cf4776cfbd
commit 6a6afaccc5
9 changed files with 28 additions and 28 deletions

View File

@ -8,7 +8,7 @@ class HTML_Purifier
var $generator;
function HTML_Purifier() {
$this->lexer = new HTML_Lexer();
$this->lexer = new HTMLPurifier_Lexer();
$this->definition = new PureHTMLDefinition();
$this->generator = new HTML_Generator();
}

View File

@ -12,13 +12,13 @@ TODO:
*/
class HTML_Lexer
class HTMLPurifier_Lexer
{
// does this version of PHP support utf8 as entity function charset?
var $_entity_utf8;
function HTML_Lexer() {
function HTMLPurifier_Lexer() {
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
}
@ -343,7 +343,7 @@ class HTML_Lexer
// uses the PEAR class XML_HTMLSax3 to parse XML
// only shares the tokenizeHTML() function
class HTML_Lexer_Sax extends HTML_Lexer
class HTMLPurifier_Lexer_Sax extends HTMLPurifier_Lexer
{
var $tokens = array();

View File

@ -9,8 +9,8 @@ require_once 'Benchmark/Timer.php';
require_once 'XML/HTMLSax3.php';
require_once 'Text/Password.php';
require_once '../MarkupFragment.php';
require_once '../HTML_Lexer.php';
require_once '../Token.php';
require_once '../Lexer.php';
class TinyTimer extends Benchmark_Timer
{
@ -52,12 +52,12 @@ class TinyTimer extends Benchmark_Timer
?>
<html>
<head>
<title>Benchmark: HTML_Lexer versus HTMLSax</title>
<title>Benchmark: HTMLPurifier_Lexer versus HTMLSax</title>
</head>
<body>
<h1>Benchmark: HTML_Lexer versus HTMLSax</h1>
<h1>Benchmark: HTMLPurifier_Lexer versus HTMLSax</h1>
<table border="1">
<tr><th>Case</th><th>HTML_Lexer</th><th>HTML_Lexer_Sax</th></tr>
<tr><th>Case</th><th>HTMLPurifier_Lexer</th><th>HTMLPurifier_Lexer_Sax</th></tr>
<?php
@ -65,13 +65,13 @@ function do_benchmark($name, $document) {
$timer = new TinyTimer($name);
$timer->start();
$lexer = new HTML_Lexer();
$lexer = new HTMLPurifier_Lexer();
$tokens = $lexer->tokenizeHTML($document);
$timer->setMarker('HTML_Lexer');
$timer->setMarker('HTMLPurifier_Lexer');
$lexer = new HTML_Lexer_Sax();
$lexer = new HTMLPurifier_Lexer_Sax();
$sax_tokens = $lexer->tokenizeHTML($document);
$timer->setMarker('HTML_Lexer_Sax');
$timer->setMarker('HTMLPurifier_Lexer_Sax');
$timer->stop();
$timer->display();
@ -79,7 +79,7 @@ function do_benchmark($name, $document) {
// sample of html pages
$dir = 'samples/HTML_Lexer';
$dir = 'samples/Lexer';
$dh = opendir($dir);
while (false !== ($filename = readdir($dh))) {

View File

@ -5,7 +5,7 @@ load_simpletest(); // includes all relevant simpletest files
require_once 'XML/HTMLSax3.php'; // optional PEAR class
require_once 'HTML_Purifier.php';
require_once 'HTML_Lexer.php';
require_once 'Lexer.php';
require_once 'Token.php';
require_once 'PureHTMLDefinition.php';
require_once 'HTML_Generator.php';
@ -14,7 +14,7 @@ $test = new GroupTest('HTML_Purifier');
chdir('tests/');
$test->addTestFile('HTML_Purifier.php');
$test->addTestFile('HTML_Lexer.php');
$test->addTestFile('Lexer.php');
//$test->addTestFile('Token.php');
$test->addTestFile('PureHTMLDefinition.php');
$test->addTestFile('HTML_Generator.php');

View File

@ -4,19 +4,19 @@
* Benchmark the SAX parser with my homemade one
*/
class Test_HTML_Lexer extends UnitTestCase
class Test_HTMLPurifier_Lexer extends UnitTestCase
{
var $HTML_Lexer;
var $HTML_Lexer_Sax;
var $HTMLPurifier_Lexer;
var $HTMLPurifier_Lexer_Sax;
function setUp() {
$this->HTML_Lexer =& new HTML_Lexer();
$this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();
$this->HTMLPurifier_Lexer =& new HTMLPurifier_Lexer();
$this->HTMLPurifier_Lexer_Sax =& new HTMLPurifier_Lexer_Sax();
}
function test_nextWhiteSpace() {
$HP =& $this->HTML_Lexer;
$HP =& $this->HTMLPurifier_Lexer;
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
@ -26,7 +26,7 @@ class Test_HTML_Lexer extends UnitTestCase
}
function test_parseData() {
$HP =& $this->HTML_Lexer;
$HP =& $this->HTMLPurifier_Lexer;
$this->assertIdentical('asdf', $HP->parseData('asdf'));
$this->assertIdentical('&', $HP->parseData('&amp;'));
$this->assertIdentical('"', $HP->parseData('&quot;'));
@ -144,12 +144,12 @@ class Test_HTML_Lexer extends UnitTestCase
// SAX chokes on this? We do have entity parsing on, so it should work!
foreach($input as $i => $discard) {
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
$result = $this->HTMLPurifier_Lexer->tokenizeHTML($input[$i]);
$this->assertEqual($expect[$i], $result);
paintIf($result, $expect[$i] != $result);
// assert unless I say otherwise
$sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
$sax_result = $this->HTMLPurifier_Lexer_Sax->tokenizeHTML($input[$i]);
if (!isset($sax_expect[$i])) {
// by default, assert with normal result
$this->assertEqual($expect[$i], $sax_result);
@ -191,7 +191,7 @@ class Test_HTML_Lexer extends UnitTestCase
$size = count($input);
for($i = 0; $i < $size; $i++) {
$result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);
$result = $this->HTMLPurifier_Lexer->tokenizeAttributeString($input[$i]);
$this->assertEqual($expect[$i], $result);
paintIf($result, $expect[$i] != $result);
}

View File

@ -7,7 +7,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
var $gen;
function Test_HTMLDTD_ChildDef() {
$this->lex = new HTML_Lexer();
$this->lex = new HTMLPurifier_Lexer();
$this->gen = new HTML_Generator();
parent::UnitTestCase();
}
@ -134,7 +134,7 @@ class Test_PureHTMLDefinition extends UnitTestCase
$this->UnitTestCase();
$this->def = new PureHTMLDefinition();
$this->def->loadData();
$this->lex = new HTML_Lexer();
$this->lex = new HTMLPurifier_Lexer();
}
function test_removeForeignElements() {