From 20e027646d1f8f9658d43eb3131c5b1b9ad96349 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 16 Apr 2006 00:35:34 +0000 Subject: [PATCH] Commit benchmarking code. git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@28 48356398-32a2-884e-a903-53898d9a118a --- benchmarks/HTML_Lexer.php | 72 +++++++++++++++ benchmarks/samples/HTML_Lexer/1.html | 53 +++++++++++ benchmarks/samples/HTML_Lexer/2.html | 17 ++++ benchmarks/samples/HTML_Lexer/3.html | 128 +++++++++++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 benchmarks/HTML_Lexer.php create mode 100644 benchmarks/samples/HTML_Lexer/1.html create mode 100644 benchmarks/samples/HTML_Lexer/2.html create mode 100644 benchmarks/samples/HTML_Lexer/3.html diff --git a/benchmarks/HTML_Lexer.php b/benchmarks/HTML_Lexer.php new file mode 100644 index 00000000..19a8bac3 --- /dev/null +++ b/benchmarks/HTML_Lexer.php @@ -0,0 +1,72 @@ + + + +Benchmark: HTML_Lexer versus HTMLSax + + +

Benchmark: HTML_Lexer versus HTMLSax

+start(); + + $lexer = new HTML_Lexer(); + $tokens = $lexer->tokenizeHTML($document); + $timer->setMarker('HTML_Lexer'); + + $lexer = new HTML_Lexer_Sax(); + $sax_tokens = $lexer->tokenizeHTML($document); + $timer->setMarker('HTML_Lexer_Sax'); + + $timer->stop(); + $timer->display(); +} + +// sample of html pages + +$dir = 'samples/HTML_Lexer'; +$dh = opendir($dir); +while (false !== ($filename = readdir($dh))) { + + if (strpos($filename, '.html') !== strlen($filename) - 5) continue; + $document = file_get_contents($dir . '/' . $filename); + echo "

File: $filename

\n"; + do_benchmark($document); + +} + +// crashers + +$snippets = array(); +$snippets[] = ''; + +foreach ($snippets as $snippet) { + echo '

' . htmlentities($snippet) . '

'; + do_benchmark($snippet); +} + +// random input + +$document = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\''); +echo "

Random input

\n"; +echo '

' . htmlentities($document) . '

'; +do_benchmark($document); + +?> \ No newline at end of file diff --git a/benchmarks/samples/HTML_Lexer/1.html b/benchmarks/samples/HTML_Lexer/1.html new file mode 100644 index 00000000..60a61450 --- /dev/null +++ b/benchmarks/samples/HTML_Lexer/1.html @@ -0,0 +1,53 @@ + + + + Main Page - Huaxia Taiji Club + + + + + + +
中文
+ +
Huaxia Taiji Club + 华夏太极俱乐部
+ +
+

Main Page

Taiji (Tai Chi)

+ + + + + +

Taiji is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating.

+ +

The founder of Taiji was Zhang Sanfeng (Chang San-feng), who was a monk of the Wu Dang (Wu Tang) Monastery and lived in the period from 1391 to 1459. His exercises stressed suppleness and elasticity as opposed to the hardness and force of other martial art styles. Several centuries old, Taiji was originally developed as a form of self-defense, emphasizing strength, balance, flexibility and speed. Tai Chi also differs from other martial arts in that it is based on the Taoist religion and aims to avoid aggressive forces.

+ +

Modern Taiji includes many forms — Quan, Sword and Fan. Impacting the mind and body of the practitioners, Taiji is practiced as a meditative exercise made up of a series of forms, or choreographed motions, requiring slow, gentle movement of the arms, legs and torso. Taiji practitioners learn to center their attention on their breathing and body movements so that the exercise strengthens their overall mental and physical awareness. In a sense, Taiji is similar to yoga in that it is also a form of moving meditation, with the goal of achieving stillness through the motion and awareness of breath. To perform Taiji, practitioners have to empty their mind of thoughts and worries in order to achieve harmony. It is a great aid for reducing stress and improving the quality of life.

+ +

In China and in communities all over the world, Taiji is practiced by young and old in the early morning hours. It's a great way to bring a new and fresh day!

+ +

Check out our gallery.

+ +
+ +
Click on photo to see HR version
+ + \ No newline at end of file diff --git a/benchmarks/samples/HTML_Lexer/2.html b/benchmarks/samples/HTML_Lexer/2.html new file mode 100644 index 00000000..a39352cf --- /dev/null +++ b/benchmarks/samples/HTML_Lexer/2.html @@ -0,0 +1,17 @@ +Google + +
edwardzyang@gmail.com | Personalized Home | Search History | My Account | Sign out
Google

+
Web    Images    Groups    News    Froogle    Local    more »
 
  Advanced Search
  Preferences
  Language Tools


Advertising Programs - Business Solutions - About Google

©2006 Google

\ No newline at end of file diff --git a/benchmarks/samples/HTML_Lexer/3.html b/benchmarks/samples/HTML_Lexer/3.html new file mode 100644 index 00000000..776708d4 --- /dev/null +++ b/benchmarks/samples/HTML_Lexer/3.html @@ -0,0 +1,128 @@ + + +Anime Digi-Lib Index + + + +
+ +
+ + + + + + + + + + + + + + + + « + + Previous | + Top 100 | + Next + + + » + + + + +
+ + + + + + + + + + +
 Search:The WebAngelfire    Planet
+
+ Edit your Site show site directoryBrowse Sites hosted by angelfire
  + Vonagehosted by angelfire
+
+
+ + +
+ + + +
+ + + + + +
+

May 1, 2000

+

Pop Culture

+

by. H. Finkelstein

+ +
+

Welcome to the Anime Digi-Lib, a virtual index to anime on the + internet. This site strives to house a comprehensive index to both personal + and commercial websites and provides reviews to these sites. We hope to + be a gateway for people who've never imagined they'd ever be interested + in Japanese Animation.

+ + + + + + +
+

 

+

 

+ +
+ + + + + + + + + + + + + + + + +
Search term:
Case-sensitive - +yes
exactfuzzy
+
+ + +
+ + + + + +
What is better, subtitled or dubbed anime?
Subtitled
Current results
Free + Web Polls
+ +
+ + +