mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-03-11 17:18:44 +00:00
Rename HTML_Lexer to HTMLPurifier_Lexer. However, some more refactoring still needs to be done (namely making the lexer an interface).
Also fixed broken benchmarks. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@65 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
cf4776cfbd
commit
6a6afaccc5
@ -8,7 +8,7 @@ class HTML_Purifier
|
||||
var $generator;
|
||||
|
||||
function HTML_Purifier() {
|
||||
$this->lexer = new HTML_Lexer();
|
||||
$this->lexer = new HTMLPurifier_Lexer();
|
||||
$this->definition = new PureHTMLDefinition();
|
||||
$this->generator = new HTML_Generator();
|
||||
}
|
||||
|
@ -12,13 +12,13 @@ TODO:
|
||||
|
||||
*/
|
||||
|
||||
class HTML_Lexer
|
||||
class HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
// does this version of PHP support utf8 as entity function charset?
|
||||
var $_entity_utf8;
|
||||
|
||||
function HTML_Lexer() {
|
||||
function HTMLPurifier_Lexer() {
|
||||
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
|
||||
}
|
||||
|
||||
@ -343,7 +343,7 @@ class HTML_Lexer
|
||||
|
||||
// uses the PEAR class XML_HTMLSax3 to parse XML
|
||||
// only shares the tokenizeHTML() function
|
||||
class HTML_Lexer_Sax extends HTML_Lexer
|
||||
class HTMLPurifier_Lexer_Sax extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
var $tokens = array();
|
@ -9,8 +9,8 @@ require_once 'Benchmark/Timer.php';
|
||||
require_once 'XML/HTMLSax3.php';
|
||||
require_once 'Text/Password.php';
|
||||
|
||||
require_once '../MarkupFragment.php';
|
||||
require_once '../HTML_Lexer.php';
|
||||
require_once '../Token.php';
|
||||
require_once '../Lexer.php';
|
||||
|
||||
class TinyTimer extends Benchmark_Timer
|
||||
{
|
||||
@ -52,12 +52,12 @@ class TinyTimer extends Benchmark_Timer
|
||||
?>
|
||||
<html>
|
||||
<head>
|
||||
<title>Benchmark: HTML_Lexer versus HTMLSax</title>
|
||||
<title>Benchmark: HTMLPurifier_Lexer versus HTMLSax</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Benchmark: HTML_Lexer versus HTMLSax</h1>
|
||||
<h1>Benchmark: HTMLPurifier_Lexer versus HTMLSax</h1>
|
||||
<table border="1">
|
||||
<tr><th>Case</th><th>HTML_Lexer</th><th>HTML_Lexer_Sax</th></tr>
|
||||
<tr><th>Case</th><th>HTMLPurifier_Lexer</th><th>HTMLPurifier_Lexer_Sax</th></tr>
|
||||
<?php
|
||||
|
||||
|
||||
@ -65,13 +65,13 @@ function do_benchmark($name, $document) {
|
||||
$timer = new TinyTimer($name);
|
||||
$timer->start();
|
||||
|
||||
$lexer = new HTML_Lexer();
|
||||
$lexer = new HTMLPurifier_Lexer();
|
||||
$tokens = $lexer->tokenizeHTML($document);
|
||||
$timer->setMarker('HTML_Lexer');
|
||||
$timer->setMarker('HTMLPurifier_Lexer');
|
||||
|
||||
$lexer = new HTML_Lexer_Sax();
|
||||
$lexer = new HTMLPurifier_Lexer_Sax();
|
||||
$sax_tokens = $lexer->tokenizeHTML($document);
|
||||
$timer->setMarker('HTML_Lexer_Sax');
|
||||
$timer->setMarker('HTMLPurifier_Lexer_Sax');
|
||||
|
||||
$timer->stop();
|
||||
$timer->display();
|
||||
@ -79,7 +79,7 @@ function do_benchmark($name, $document) {
|
||||
|
||||
// sample of html pages
|
||||
|
||||
$dir = 'samples/HTML_Lexer';
|
||||
$dir = 'samples/Lexer';
|
||||
$dh = opendir($dir);
|
||||
while (false !== ($filename = readdir($dh))) {
|
||||
|
||||
|
@ -5,7 +5,7 @@ load_simpletest(); // includes all relevant simpletest files
|
||||
require_once 'XML/HTMLSax3.php'; // optional PEAR class
|
||||
|
||||
require_once 'HTML_Purifier.php';
|
||||
require_once 'HTML_Lexer.php';
|
||||
require_once 'Lexer.php';
|
||||
require_once 'Token.php';
|
||||
require_once 'PureHTMLDefinition.php';
|
||||
require_once 'HTML_Generator.php';
|
||||
@ -14,7 +14,7 @@ $test = new GroupTest('HTML_Purifier');
|
||||
|
||||
chdir('tests/');
|
||||
$test->addTestFile('HTML_Purifier.php');
|
||||
$test->addTestFile('HTML_Lexer.php');
|
||||
$test->addTestFile('Lexer.php');
|
||||
//$test->addTestFile('Token.php');
|
||||
$test->addTestFile('PureHTMLDefinition.php');
|
||||
$test->addTestFile('HTML_Generator.php');
|
||||
|
@ -4,19 +4,19 @@
|
||||
* Benchmark the SAX parser with my homemade one
|
||||
*/
|
||||
|
||||
class Test_HTML_Lexer extends UnitTestCase
|
||||
class Test_HTMLPurifier_Lexer extends UnitTestCase
|
||||
{
|
||||
|
||||
var $HTML_Lexer;
|
||||
var $HTML_Lexer_Sax;
|
||||
var $HTMLPurifier_Lexer;
|
||||
var $HTMLPurifier_Lexer_Sax;
|
||||
|
||||
function setUp() {
|
||||
$this->HTML_Lexer =& new HTML_Lexer();
|
||||
$this->HTML_Lexer_Sax =& new HTML_Lexer_Sax();
|
||||
$this->HTMLPurifier_Lexer =& new HTMLPurifier_Lexer();
|
||||
$this->HTMLPurifier_Lexer_Sax =& new HTMLPurifier_Lexer_Sax();
|
||||
}
|
||||
|
||||
function test_nextWhiteSpace() {
|
||||
$HP =& $this->HTML_Lexer;
|
||||
$HP =& $this->HTMLPurifier_Lexer;
|
||||
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
|
||||
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
|
||||
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
|
||||
@ -26,7 +26,7 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
}
|
||||
|
||||
function test_parseData() {
|
||||
$HP =& $this->HTML_Lexer;
|
||||
$HP =& $this->HTMLPurifier_Lexer;
|
||||
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
||||
$this->assertIdentical('&', $HP->parseData('&'));
|
||||
$this->assertIdentical('"', $HP->parseData('"'));
|
||||
@ -144,12 +144,12 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
// SAX chokes on this? We do have entity parsing on, so it should work!
|
||||
|
||||
foreach($input as $i => $discard) {
|
||||
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
|
||||
$result = $this->HTMLPurifier_Lexer->tokenizeHTML($input[$i]);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $expect[$i] != $result);
|
||||
|
||||
// assert unless I say otherwise
|
||||
$sax_result = $this->HTML_Lexer_Sax->tokenizeHTML($input[$i]);
|
||||
$sax_result = $this->HTMLPurifier_Lexer_Sax->tokenizeHTML($input[$i]);
|
||||
if (!isset($sax_expect[$i])) {
|
||||
// by default, assert with normal result
|
||||
$this->assertEqual($expect[$i], $sax_result);
|
||||
@ -191,7 +191,7 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
|
||||
$size = count($input);
|
||||
for($i = 0; $i < $size; $i++) {
|
||||
$result = $this->HTML_Lexer->tokenizeAttributeString($input[$i]);
|
||||
$result = $this->HTMLPurifier_Lexer->tokenizeAttributeString($input[$i]);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $expect[$i] != $result);
|
||||
}
|
@ -7,7 +7,7 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
|
||||
var $gen;
|
||||
|
||||
function Test_HTMLDTD_ChildDef() {
|
||||
$this->lex = new HTML_Lexer();
|
||||
$this->lex = new HTMLPurifier_Lexer();
|
||||
$this->gen = new HTML_Generator();
|
||||
parent::UnitTestCase();
|
||||
}
|
||||
@ -134,7 +134,7 @@ class Test_PureHTMLDefinition extends UnitTestCase
|
||||
$this->UnitTestCase();
|
||||
$this->def = new PureHTMLDefinition();
|
||||
$this->def->loadData();
|
||||
$this->lex = new HTML_Lexer();
|
||||
$this->lex = new HTMLPurifier_Lexer();
|
||||
}
|
||||
|
||||
function test_removeForeignElements() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user