2006-04-16 00:35:34 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
// test our parser versus HTMLSax parser
|
|
|
|
|
|
|
|
set_time_limit(5);
|
|
|
|
|
2006-07-22 00:13:08 +00:00
|
|
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
|
|
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../../');
|
|
|
|
|
2006-04-16 00:35:34 +00:00
|
|
|
// PEAR
|
|
|
|
require_once 'Benchmark/Timer.php';
|
|
|
|
require_once 'XML/HTMLSax3.php';
|
|
|
|
require_once 'Text/Password.php';
|
|
|
|
|
2006-07-22 00:13:08 +00:00
|
|
|
require_once 'HTMLPurifier/Lexer.php';
|
2006-04-16 00:35:34 +00:00
|
|
|
|
2006-04-16 01:09:32 +00:00
|
|
|
class TinyTimer extends Benchmark_Timer
|
|
|
|
{
|
|
|
|
|
|
|
|
var $name;
|
|
|
|
|
|
|
|
function TinyTimer($name, $auto = false) {
|
|
|
|
$this->name = htmlentities($name);
|
|
|
|
$this->Benchmark_Timer($auto);
|
|
|
|
}
|
|
|
|
|
|
|
|
function getOutput() {
|
|
|
|
|
|
|
|
$total = $this->TimeElapsed();
|
|
|
|
$result = $this->getProfiling();
|
|
|
|
$dashes = '';
|
|
|
|
|
|
|
|
$out = '<tr>';
|
|
|
|
|
|
|
|
$out .= "<td>{$this->name}</td>";
|
|
|
|
|
|
|
|
foreach ($result as $k => $v) {
|
|
|
|
if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
|
|
|
|
|
|
|
|
$perc = (($v['diff'] * 100) / $total);
|
|
|
|
$tperc = (($v['total'] * 100) / $total);
|
|
|
|
|
|
|
|
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
|
|
|
"%</td>";
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
$out .= '</tr>';
|
|
|
|
|
|
|
|
return $out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-04-16 00:35:34 +00:00
|
|
|
?>
|
|
|
|
<html>
|
|
|
|
<head>
|
2006-07-21 23:07:47 +00:00
|
|
|
<title>Benchmark: HTMLPurifier_Lexer versus HTMLSax</title>
|
2006-04-16 00:35:34 +00:00
|
|
|
</head>
|
|
|
|
<body>
|
2006-07-21 23:07:47 +00:00
|
|
|
<h1>Benchmark: HTMLPurifier_Lexer versus HTMLSax</h1>
|
2006-04-16 01:09:32 +00:00
|
|
|
<table border="1">
|
2006-07-21 23:07:47 +00:00
|
|
|
<tr><th>Case</th><th>HTMLPurifier_Lexer</th><th>HTMLPurifier_Lexer_Sax</th></tr>
|
2006-04-16 00:35:34 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
2006-04-16 01:09:32 +00:00
|
|
|
function do_benchmark($name, $document) {
|
|
|
|
$timer = new TinyTimer($name);
|
2006-04-16 00:35:34 +00:00
|
|
|
$timer->start();
|
|
|
|
|
2006-07-21 23:07:47 +00:00
|
|
|
$lexer = new HTMLPurifier_Lexer();
|
2006-04-16 00:35:34 +00:00
|
|
|
$tokens = $lexer->tokenizeHTML($document);
|
2006-07-21 23:07:47 +00:00
|
|
|
$timer->setMarker('HTMLPurifier_Lexer');
|
2006-04-16 00:35:34 +00:00
|
|
|
|
2006-07-21 23:07:47 +00:00
|
|
|
$lexer = new HTMLPurifier_Lexer_Sax();
|
2006-04-16 00:35:34 +00:00
|
|
|
$sax_tokens = $lexer->tokenizeHTML($document);
|
2006-07-21 23:07:47 +00:00
|
|
|
$timer->setMarker('HTMLPurifier_Lexer_Sax');
|
2006-04-16 00:35:34 +00:00
|
|
|
|
|
|
|
$timer->stop();
|
|
|
|
$timer->display();
|
|
|
|
}
|
|
|
|
|
|
|
|
// sample of html pages
|
|
|
|
|
2006-07-21 23:07:47 +00:00
|
|
|
$dir = 'samples/Lexer';
|
2006-04-16 00:35:34 +00:00
|
|
|
$dh = opendir($dir);
|
|
|
|
while (false !== ($filename = readdir($dh))) {
|
|
|
|
|
|
|
|
if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
|
|
|
|
$document = file_get_contents($dir . '/' . $filename);
|
2006-04-16 01:09:32 +00:00
|
|
|
do_benchmark("File: $filename", $document);
|
2006-04-16 00:35:34 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// crashers
|
|
|
|
|
|
|
|
$snippets = array();
|
|
|
|
$snippets[] = '<a href="foo>';
|
|
|
|
$snippets[] = '<a "=>';
|
|
|
|
|
|
|
|
foreach ($snippets as $snippet) {
|
2006-04-16 01:09:32 +00:00
|
|
|
do_benchmark($snippet, $snippet);
|
2006-04-16 00:35:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// random input
|
|
|
|
|
2006-04-16 01:09:32 +00:00
|
|
|
$random = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\'');
|
|
|
|
|
|
|
|
do_benchmark('Random input', $random);
|
|
|
|
|
|
|
|
?></table>
|
|
|
|
|
|
|
|
<?php
|
|
|
|
|
|
|
|
echo '<div>Random input was: ' .
|
|
|
|
'<span colspan="4" style="font-family:monospace;">' . htmlentities($random) .
|
|
|
|
'</span></div>';
|
|
|
|
|
|
|
|
?>
|
|
|
|
|
2006-04-16 00:35:34 +00:00
|
|
|
|
2006-04-16 01:09:32 +00:00
|
|
|
</body></html>
|