mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
svn:eol-style = native
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
39c16f5cfd
commit
14f481bcf6
@ -1,144 +1,144 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
|
||||||
$LEXERS = array(
|
$LEXERS = array(
|
||||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
||||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
||||||
);
|
);
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
||||||
}
|
}
|
||||||
|
|
||||||
// PEAR
|
// PEAR
|
||||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||||
require_once 'Text/Password.php'; // for generating random input
|
require_once 'Text/Password.php'; // for generating random input
|
||||||
|
|
||||||
// custom class to aid unit testing
|
// custom class to aid unit testing
|
||||||
class RowTimer extends Benchmark_Timer
|
class RowTimer extends Benchmark_Timer
|
||||||
{
|
{
|
||||||
|
|
||||||
var $name;
|
var $name;
|
||||||
|
|
||||||
function RowTimer($name, $auto = false) {
|
function RowTimer($name, $auto = false) {
|
||||||
$this->name = htmlentities($name);
|
$this->name = htmlentities($name);
|
||||||
$this->Benchmark_Timer($auto);
|
$this->Benchmark_Timer($auto);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getOutput() {
|
function getOutput() {
|
||||||
|
|
||||||
$total = $this->TimeElapsed();
|
$total = $this->TimeElapsed();
|
||||||
$result = $this->getProfiling();
|
$result = $this->getProfiling();
|
||||||
$dashes = '';
|
$dashes = '';
|
||||||
|
|
||||||
$out = '<tr>';
|
$out = '<tr>';
|
||||||
|
|
||||||
$out .= "<td>{$this->name}</td>";
|
$out .= "<td>{$this->name}</td>";
|
||||||
|
|
||||||
foreach ($result as $k => $v) {
|
foreach ($result as $k => $v) {
|
||||||
if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
|
if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
|
||||||
|
|
||||||
//$perc = (($v['diff'] * 100) / $total);
|
//$perc = (($v['diff'] * 100) / $total);
|
||||||
//$tperc = (($v['total'] * 100) / $total);
|
//$tperc = (($v['total'] * 100) / $total);
|
||||||
|
|
||||||
$out .= '<td align="right">' . $v['diff'] . '</td>';
|
$out .= '<td align="right">' . $v['diff'] . '</td>';
|
||||||
|
|
||||||
//$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
//$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
||||||
// '%</td>';
|
// '%</td>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$out .= '</tr>';
|
$out .= '</tr>';
|
||||||
|
|
||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function print_lexers() {
|
function print_lexers() {
|
||||||
global $LEXERS;
|
global $LEXERS;
|
||||||
$first = true;
|
$first = true;
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
if (!$first) echo ' / ';
|
if (!$first) echo ' / ';
|
||||||
echo htmlspecialchars($key);
|
echo htmlspecialchars($key);
|
||||||
$first = false;
|
$first = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS;
|
global $LEXERS;
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
$tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
$timer->stop();
|
$timer->stop();
|
||||||
$timer->display();
|
$timer->display();
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Benchmark: <?php print_lexers(); ?></title>
|
<title>Benchmark: <?php print_lexers(); ?></title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h1>Benchmark: <?php print_lexers(); ?></h1>
|
<h1>Benchmark: <?php print_lexers(); ?></h1>
|
||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><?php
|
<tr><th>Case</th><?php
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
echo '<th>' . htmlspecialchars($key) . '</th>';
|
||||||
}
|
}
|
||||||
?></tr>
|
?></tr>
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
// ************************************************************************** //
|
// ************************************************************************** //
|
||||||
|
|
||||||
// sample of html pages
|
// sample of html pages
|
||||||
|
|
||||||
$dir = 'samples/Lexer';
|
$dir = 'samples/Lexer';
|
||||||
$dh = opendir($dir);
|
$dh = opendir($dir);
|
||||||
while (false !== ($filename = readdir($dh))) {
|
while (false !== ($filename = readdir($dh))) {
|
||||||
|
|
||||||
if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
|
if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
|
||||||
$document = file_get_contents($dir . '/' . $filename);
|
$document = file_get_contents($dir . '/' . $filename);
|
||||||
do_benchmark("File: $filename", $document);
|
do_benchmark("File: $filename", $document);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// crashers, caused infinite loops before
|
// crashers, caused infinite loops before
|
||||||
|
|
||||||
$snippets = array();
|
$snippets = array();
|
||||||
$snippets[] = '<a href="foo>';
|
$snippets[] = '<a href="foo>';
|
||||||
$snippets[] = '<a "=>';
|
$snippets[] = '<a "=>';
|
||||||
|
|
||||||
foreach ($snippets as $snippet) {
|
foreach ($snippets as $snippet) {
|
||||||
do_benchmark($snippet, $snippet);
|
do_benchmark($snippet, $snippet);
|
||||||
}
|
}
|
||||||
|
|
||||||
// random input
|
// random input
|
||||||
|
|
||||||
$random = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\'');
|
$random = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\'');
|
||||||
|
|
||||||
do_benchmark('Random input', $random);
|
do_benchmark('Random input', $random);
|
||||||
|
|
||||||
?></table>
|
?></table>
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
echo '<div>Random input was: ' .
|
echo '<div>Random input was: ' .
|
||||||
'<span colspan="4" style="font-family:monospace;">' .
|
'<span colspan="4" style="font-family:monospace;">' .
|
||||||
htmlspecialchars($random) . '</span></div>';
|
htmlspecialchars($random) . '</span></div>';
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
|
||||||
|
|
||||||
</body></html>
|
</body></html>
|
@ -1,53 +1,53 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Main Page - Huaxia Taiji Club</title>
|
<title>Main Page - Huaxia Taiji Club</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
<link rel="stylesheet" type="text/css" media="screen, projection" href="/screen.css" />
|
<link rel="stylesheet" type="text/css" media="screen, projection" href="/screen.css" />
|
||||||
<link rel="stylesheet" type="text/css" media="print" href="/print.css" />
|
<link rel="stylesheet" type="text/css" media="print" href="/print.css" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
<div id="translation"><a href="/ch/Main_Page">中文</a></div>
|
<div id="translation"><a href="/ch/Main_Page">中文</a></div>
|
||||||
|
|
||||||
<div id="heading"><a href="/en/Main_Page" title="English Main Page">Huaxia Taiji Club</a>
|
<div id="heading"><a href="/en/Main_Page" title="English Main Page">Huaxia Taiji Club</a>
|
||||||
<a class="heading_ch" href="/ch/Main_Page" title="中文主页">华夏太极俱乐部</a></div>
|
<a class="heading_ch" href="/ch/Main_Page" title="中文主页">华夏太极俱乐部</a></div>
|
||||||
<ul id="menu">
|
<ul id="menu">
|
||||||
<li><a href="/en/Main_Page" class="active">Main Page</a></li><li><a href="/en/About">About</a></li><li><a href="/en/News">News</a></li><li><a href="/en/Events">Events</a></li><li><a href="/en/Digest">Digest</a></li><li><a href="/en/Taiji_and_I">Taiji and I</a></li><li><a href="/en/Downloads">Downloads</a></li><li><a href="/en/Registration">Registration</a></li><li><a href="/en/Contact">Contact</a></li> <li><a href="http://www.taijiclub.org/gallery2/main.php">Gallery</a></li>
|
<li><a href="/en/Main_Page" class="active">Main Page</a></li><li><a href="/en/About">About</a></li><li><a href="/en/News">News</a></li><li><a href="/en/Events">Events</a></li><li><a href="/en/Digest">Digest</a></li><li><a href="/en/Taiji_and_I">Taiji and I</a></li><li><a href="/en/Downloads">Downloads</a></li><li><a href="/en/Registration">Registration</a></li><li><a href="/en/Contact">Contact</a></li> <li><a href="http://www.taijiclub.org/gallery2/main.php">Gallery</a></li>
|
||||||
|
|
||||||
<li><a href="http://www.taijiclub.org/forums/index.php">Forums</a></li>
|
<li><a href="http://www.taijiclub.org/forums/index.php">Forums</a></li>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
<div id="content">
|
<div id="content">
|
||||||
<h1 id="title">Main Page</h1><h2>Taiji (Tai Chi) </h2>
|
<h1 id="title">Main Page</h1><h2>Taiji (Tai Chi) </h2>
|
||||||
|
|
||||||
<div id="sidebar">
|
<div id="sidebar">
|
||||||
<h3>Recent News</h3>
|
<h3>Recent News</h3>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Zou Xiaojun was elected as the new club vice president </li>
|
<li>Zou Xiaojun was elected as the new club vice president </li>
|
||||||
|
|
||||||
<li>HX Edison Taiji Club <a href="http://www.taijiclub.org/downloads/Taiji_club_regulation_.pdf">by-law</a> effective 3/28/2006</li>
|
<li>HX Edison Taiji Club <a href="http://www.taijiclub.org/downloads/Taiji_club_regulation_.pdf">by-law</a> effective 3/28/2006</li>
|
||||||
<li>A new email account for our club: HXEdisontaijiclub@yahoo.com</li>
|
<li>A new email account for our club: HXEdisontaijiclub@yahoo.com</li>
|
||||||
|
|
||||||
<li>Workshop conducted by <a href="http://www.taijiclub.org/ch/Digest/LiDeyin">?????</a> Li Deyin is set on June 4, 2006 at Clarion Hotel in Edison from 9:30am-12pm; <a href="http://www.taijiclub.org/en/Registration">Registration</a></li>
|
<li>Workshop conducted by <a href="http://www.taijiclub.org/ch/Digest/LiDeyin">?????</a> Li Deyin is set on June 4, 2006 at Clarion Hotel in Edison from 9:30am-12pm; <a href="http://www.taijiclub.org/en/Registration">Registration</a></li>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<p><i>Taiji</i> is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating. </p>
|
<p><i>Taiji</i> is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating. </p>
|
||||||
|
|
||||||
<p>The founder of Taiji was Zhang Sanfeng (Chang San-feng), who was a monk of the Wu Dang (Wu Tang) Monastery and lived in the period from 1391 to 1459. His exercises stressed suppleness and elasticity as opposed to the hardness and force of other martial art styles. Several centuries old, Taiji was originally developed as a form of self-defense, emphasizing strength, balance, flexibility and speed. Tai Chi also differs from other martial arts in that it is based on the Taoist religion and aims to avoid aggressive forces. </p>
|
<p>The founder of Taiji was Zhang Sanfeng (Chang San-feng), who was a monk of the Wu Dang (Wu Tang) Monastery and lived in the period from 1391 to 1459. His exercises stressed suppleness and elasticity as opposed to the hardness and force of other martial art styles. Several centuries old, Taiji was originally developed as a form of self-defense, emphasizing strength, balance, flexibility and speed. Tai Chi also differs from other martial arts in that it is based on the Taoist religion and aims to avoid aggressive forces. </p>
|
||||||
|
|
||||||
<p>Modern Taiji includes many forms — Quan, Sword and Fan. Impacting the mind and body of the practitioners, Taiji is practiced as a meditative exercise made up of a series of forms, or choreographed motions, requiring slow, gentle movement of the arms, legs and torso. Taiji practitioners learn to center their attention on their breathing and body movements so that the exercise strengthens their overall mental and physical awareness. In a sense, Taiji is similar to yoga in that it is also a form of moving meditation, with the goal of achieving stillness through the motion and awareness of breath. To perform Taiji, practitioners have to empty their mind of thoughts and worries in order to achieve harmony. It is a great aid for reducing stress and improving the quality of life. </p>
|
<p>Modern Taiji includes many forms — Quan, Sword and Fan. Impacting the mind and body of the practitioners, Taiji is practiced as a meditative exercise made up of a series of forms, or choreographed motions, requiring slow, gentle movement of the arms, legs and torso. Taiji practitioners learn to center their attention on their breathing and body movements so that the exercise strengthens their overall mental and physical awareness. In a sense, Taiji is similar to yoga in that it is also a form of moving meditation, with the goal of achieving stillness through the motion and awareness of breath. To perform Taiji, practitioners have to empty their mind of thoughts and worries in order to achieve harmony. It is a great aid for reducing stress and improving the quality of life. </p>
|
||||||
|
|
||||||
<p>In China and in communities all over the world, Taiji is practiced by young and old in the early morning hours. It's a great way to bring a new and fresh day!</p>
|
<p>In China and in communities all over the world, Taiji is practiced by young and old in the early morning hours. It's a great way to bring a new and fresh day!</p>
|
||||||
|
|
||||||
<p>Check out our <a href="/gallery2/main.php">gallery</a>.</p>
|
<p>Check out our <a href="/gallery2/main.php">gallery</a>.</p>
|
||||||
|
|
||||||
<div style="text-align:center;"><a href="http://www.taijiclub.org/gallery2/v/2006/group1b.jpg.html?g2_imageViewsIndex=1"><img src="/gallery2/d/1836-2/group1b.jpg" /></a></div>
|
<div style="text-align:center;"><a href="http://www.taijiclub.org/gallery2/v/2006/group1b.jpg.html?g2_imageViewsIndex=1"><img src="/gallery2/d/1836-2/group1b.jpg" /></a></div>
|
||||||
|
|
||||||
<div style="text-align:center;">Click on photo to see HR version</div></div>
|
<div style="text-align:center;">Click on photo to see HR version</div></div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
@ -1,17 +1,17 @@
|
|||||||
<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
|
<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"><title>Google</title><style><!--
|
||||||
body,td,a,p,.h{font-family:arial,sans-serif;}
|
body,td,a,p,.h{font-family:arial,sans-serif;}
|
||||||
.h{font-size: 20px;}
|
.h{font-size: 20px;}
|
||||||
.q{color:#0000cc;}
|
.q{color:#0000cc;}
|
||||||
//-->
|
//-->
|
||||||
</style>
|
</style>
|
||||||
<script>
|
<script>
|
||||||
<!--
|
<!--
|
||||||
function sf(){document.f.q.focus();}
|
function sf(){document.f.q.focus();}
|
||||||
function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent : escape;el.href="/url?sa=t&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei=fHNBRJDEG4HSaLONmIoP"+sg;el.onmousedown="";return true;}
|
function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent : escape;el.href="/url?sa=t&ct="+e(ct)+"&cd="+e(cd)+"&url="+e(el.href).replace(/\+/g,"%2B")+"&ei=fHNBRJDEG4HSaLONmIoP"+sg;el.onmousedown="";return true;}
|
||||||
// -->
|
// -->
|
||||||
</script>
|
</script>
|
||||||
</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>edwardzyang@gmail.com</b> | <a href="/url?sa=p&pref=ig&pval=2&q=http://www.google.com/ig%3Fhl%3Den" onmousedown="return rwt(this,'pro','hppphou:def','&sig2=hDbTpsWIp9YG37a23n6krQ')">Personalized Home</a> | <a href="/searchhistory/?hl=en">Search History</a> | <a href="https://www.google.com/accounts/ManageAccount">My Account</a> | <a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
|
</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf() topmargin=3 marginheight=3><center><table border=0 cellspacing=0 cellpadding=0 width=100%><tr><td align=right nowrap><font size=-1><b>edwardzyang@gmail.com</b> | <a href="/url?sa=p&pref=ig&pval=2&q=http://www.google.com/ig%3Fhl%3Den" onmousedown="return rwt(this,'pro','hppphou:def','&sig2=hDbTpsWIp9YG37a23n6krQ')">Personalized Home</a> | <a href="/searchhistory/?hl=en">Search History</a> | <a href="https://www.google.com/accounts/ManageAccount">My Account</a> | <a href="http://www.google.com/accounts/Logout?continue=http://www.google.com/">Sign out</a></font></td></tr><tr height=4><td><img alt="" width=1 height=1></td></tr></table><img src="/intl/en/images/logo.gif" width=276 height=110 alt="Google"><br><br>
|
||||||
<form action=/search name=f><script><!--
|
<form action=/search name=f><script><!--
|
||||||
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
||||||
// -->
|
// -->
|
||||||
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
@ -1,128 +1,128 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>Anime Digi-Lib Index</title>
|
<title>Anime Digi-Lib Index</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<div id="tb">
|
<div id="tb">
|
||||||
|
|
||||||
<form name="lycos_search" method="get" target="_new" style="margin: 0px"
|
<form name="lycos_search" method="get" target="_new" style="margin: 0px"
|
||||||
action="http://r.hotbot.com/r/memberpgs_lycos_searchbox_af/http://www.angelfire.lycos.com/cgi-bin/search/pursuit">
|
action="http://r.hotbot.com/r/memberpgs_lycos_searchbox_af/http://www.angelfire.lycos.com/cgi-bin/search/pursuit">
|
||||||
|
|
||||||
<table id="tbtable" cellpadding="0" cellspacing="0" border="0" width="100%" style="border: 1px solid black;">
|
<table id="tbtable" cellpadding="0" cellspacing="0" border="0" width="100%" style="border: 1px solid black;">
|
||||||
<tr style="background-color: #dcf7ff">
|
<tr style="background-color: #dcf7ff">
|
||||||
<td colspan="3">
|
<td colspan="3">
|
||||||
<table cellpadding="0" cellspacing="0" border="0">
|
<table cellpadding="0" cellspacing="0" border="0">
|
||||||
<tr>
|
<tr>
|
||||||
<td> Search:</td>
|
<td> Search:</td>
|
||||||
|
|
||||||
<td><input type="radio" name="cat" value="lycos" checked></td>
|
<td><input type="radio" name="cat" value="lycos" checked></td>
|
||||||
<td nowrap="nowrap">The Web</td>
|
<td nowrap="nowrap">The Web</td>
|
||||||
<td><input type="radio" name="cat" value="angelfire"></td>
|
<td><input type="radio" name="cat" value="angelfire"></td>
|
||||||
<td nowrap="nowrap">Angelfire</td>
|
<td nowrap="nowrap">Angelfire</td>
|
||||||
<td nowrap="nowrap"> <img src="http://af.lygo.com/d/toolbar/planeticon.gif"></td><td nowrap="nowrap"> <a href="http://r.lycos.com/r/tlbr_planet/http://planet.lycos.com" target="_new">Planet</a></td>
|
<td nowrap="nowrap"> <img src="http://af.lygo.com/d/toolbar/planeticon.gif"></td><td nowrap="nowrap"> <a href="http://r.lycos.com/r/tlbr_planet/http://planet.lycos.com" target="_new">Planet</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
<td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/edit/_h_/www.angelfire.lycos.com/build/index.tmpl" target="_top">
|
<td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/edit/_h_/www.angelfire.lycos.com/build/index.tmpl" target="_top">
|
||||||
<span id="build">Edit your Site</span></a> </td>
|
<span id="build">Edit your Site</span></a> </td>
|
||||||
|
|
||||||
<td><img src="http://af.lygo.com/d/toolbar/dir.gif" alt="show site directory" border="0" height="10" hspace="3" width="8"></td>
|
<td><img src="http://af.lygo.com/d/toolbar/dir.gif" alt="show site directory" border="0" height="10" hspace="3" width="8"></td>
|
||||||
<td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/browse/_h_/www.angelfire.lycos.com/directory/index.tmpl" target="_top">Browse Sites</a> </td>
|
<td nowrap="nowrap"><a href="http://lt.angelfire.com/af_toolbar/browse/_h_/www.angelfire.lycos.com/directory/index.tmpl" target="_top">Browse Sites</a> </td>
|
||||||
<td><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_top.gif" alt="hosted by angelfire" border="0" height="26" width="143"></a></td>
|
<td><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_top.gif" alt="hosted by angelfire" border="0" height="26" width="143"></a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr style="background-color: #dcf7ff">
|
<tr style="background-color: #dcf7ff">
|
||||||
<td nowrap="nowrap" valign="middle"> <input size="30" style="font-size: 10px; background-color: #fff;" type="text" name="query" id="searchbox"></td>
|
<td nowrap="nowrap" valign="middle"> <input size="30" style="font-size: 10px; background-color: #fff;" type="text" name="query" id="searchbox"></td>
|
||||||
|
|
||||||
<td style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x; text-align: center;" colspan="3" align="center">
|
<td style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x; text-align: center;" colspan="3" align="center">
|
||||||
<a href="http://clk.atdmt.com/VON/go/lycsnvon0710000019von/direct/01/"><img src="/sys/free_logo_xxxx_157x20.gif" height="20" width="157" border="0" alt="Vonage"></a><img src="http://view.atdmt.com/VON/view/lycsnvon0710000019von/direct/01/"></td>
|
<a href="http://clk.atdmt.com/VON/go/lycsnvon0710000019von/direct/01/"><img src="/sys/free_logo_xxxx_157x20.gif" height="20" width="157" border="0" alt="Vonage"></a><img src="http://view.atdmt.com/VON/view/lycsnvon0710000019von/direct/01/"></td>
|
||||||
|
|
||||||
<span style="font-size: 11px;">
|
<span style="font-size: 11px;">
|
||||||
<span style="color:#00f; font-weight:bold;">«</span>
|
<span style="color:#00f; font-weight:bold;">«</span>
|
||||||
<span id="top100">
|
<span id="top100">
|
||||||
<a href="javascript:void top100('prev')" target="_top">Previous</a> |
|
<a href="javascript:void top100('prev')" target="_top">Previous</a> |
|
||||||
<a href="http://lt.angelfire.com/af_toolbar/top100/_h_/www.angelfire.lycos.com/cgi-bin/top100/pagelist?start=1" target="_top">Top 100</a> |
|
<a href="http://lt.angelfire.com/af_toolbar/top100/_h_/www.angelfire.lycos.com/cgi-bin/top100/pagelist?start=1" target="_top">Top 100</a> |
|
||||||
<a href="javascript:void top100('next')" target="_top">Next</a>
|
<a href="javascript:void top100('next')" target="_top">Next</a>
|
||||||
|
|
||||||
</span>
|
</span>
|
||||||
<span style="color: #00f; font-weight: bold;">»</span>
|
<span style="color: #00f; font-weight: bold;">»</span>
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
<td valign="top" style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x;"><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_bot.gif" alt="hosted by angelfire" border="0" height="22" width="143"></a></td>
|
<td valign="top" style="background: #fff url(http://af.lygo.com/d/toolbar/bg.gif) repeat-x;"><a href="http://lt.angelfire.com/af_toolbar/angelfire/_h_/www.angelfire.lycos.com" target="_top"><img src="http://af.lygo.com/d/toolbar/aflogo_bot.gif" alt="hosted by angelfire" border="0" height="22" width="143"></a></td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<table border="0" cellpadding="0" cellspacing="0" width="728"><tr><td>
|
<table border="0" cellpadding="0" cellspacing="0" width="728"><tr><td>
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
if (objAdMgr.isSlotAvailable("leaderboard")) {
|
if (objAdMgr.isSlotAvailable("leaderboard")) {
|
||||||
objAdMgr.renderSlot("leaderboard")
|
objAdMgr.renderSlot("leaderboard")
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
<noscript>
|
<noscript>
|
||||||
<a href="http://network.realmedia.com/RealMedia/ads/click_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1?x"><img border="0" src="http://network.realmedia.com/RealMedia/ads/adstream_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1" alt="leaderboard ad" /></a>
|
<a href="http://network.realmedia.com/RealMedia/ads/click_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1?x"><img border="0" src="http://network.realmedia.com/RealMedia/ads/adstream_nx.ads/lycosangelfire/ros/728x90/wp/ss/a/491169@Top1" alt="leaderboard ad" /></a>
|
||||||
</noscript>
|
</noscript>
|
||||||
|
|
||||||
</td></tr>
|
</td></tr>
|
||||||
</table>
|
</table>
|
||||||
<table width="86%" border="0" cellspacing="0" cellpadding="2">
|
<table width="86%" border="0" cellspacing="0" cellpadding="2">
|
||||||
<tr>
|
<tr>
|
||||||
<td height="388" width="19%" bgcolor="#FFCCFF" valign="top">
|
<td height="388" width="19%" bgcolor="#FFCCFF" valign="top">
|
||||||
<p>May 1, 2000</p>
|
<p>May 1, 2000</p>
|
||||||
<p><b>Pop Culture</b> </p>
|
<p><b>Pop Culture</b> </p>
|
||||||
<p>by. H. Finkelstein</p>
|
<p>by. H. Finkelstein</p>
|
||||||
|
|
||||||
</td>
|
</td>
|
||||||
<td height="388" width="52%" valign="top">
|
<td height="388" width="52%" valign="top">
|
||||||
<p>Welcome to the <b>Anime Digi-Lib</b>, a virtual index to anime on the
|
<p>Welcome to the <b>Anime Digi-Lib</b>, a virtual index to anime on the
|
||||||
internet. This site strives to house a comprehensive index to both personal
|
internet. This site strives to house a comprehensive index to both personal
|
||||||
and commercial websites and provides reviews to these sites. We hope to
|
and commercial websites and provides reviews to these sites. We hope to
|
||||||
be a gateway for people who've never imagined they'd ever be interested
|
be a gateway for people who've never imagined they'd ever be interested
|
||||||
in Japanese Animation. </p>
|
in Japanese Animation. </p>
|
||||||
<table width="99%" border="1" cellspacing="0" cellpadding="2" height="320" name="Searchnservices">
|
<table width="99%" border="1" cellspacing="0" cellpadding="2" height="320" name="Searchnservices">
|
||||||
<tr>
|
<tr>
|
||||||
<td height="263" valign="top" width="58%">
|
<td height="263" valign="top" width="58%">
|
||||||
<p> </p>
|
<p> </p>
|
||||||
<p> </p>
|
<p> </p>
|
||||||
|
|
||||||
<FORM ACTION="/cgi-bin/script_library/site_search/search" METHOD="GET">
|
<FORM ACTION="/cgi-bin/script_library/site_search/search" METHOD="GET">
|
||||||
|
|
||||||
<table border="0" cellpadding="2" cellspacing="0">
|
<table border="0" cellpadding="2" cellspacing="0">
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="2">Search term: <INPUT NAME="search_term"><br></td>
|
<td colspan="2">Search term: <INPUT NAME="search_term"><br></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="2" align="center">Case-sensitive -
|
<td colspan="2" align="center">Case-sensitive -
|
||||||
<INPUT TYPE="checkbox" NAME="case_sensitive">yes<br></td>
|
<INPUT TYPE="checkbox" NAME="case_sensitive">yes<br></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td align="right"><INPUT TYPE="radio" NAME="search_type" VALUE="exact" CHECKED>exact</td>
|
<td align="right"><INPUT TYPE="radio" NAME="search_type" VALUE="exact" CHECKED>exact</td>
|
||||||
<td><INPUT TYPE="radio" NAME="search_type" VALUE="fuzzy">fuzzy<br></td>
|
<td><INPUT TYPE="radio" NAME="search_type" VALUE="fuzzy">fuzzy<br></td>
|
||||||
|
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="2" align="center"><INPUT TYPE="hidden" NAME="display" VALUE="#FF0000"><INPUT TYPE="submit"></td>
|
<td colspan="2" align="center"><INPUT TYPE="hidden" NAME="display" VALUE="#FF0000"><INPUT TYPE="submit"></td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
|
||||||
<td>
|
<td>
|
||||||
<table border="0" cellpadding="0" cellspacing="0" width="100%">
|
<table border="0" cellpadding="0" cellspacing="0" width="100%">
|
||||||
<tr><td><font face="verdana,geneva" color="#000011" size="1">What is better, subtitled or dubbed anime?</font></td></tr>
|
<tr><td><font face="verdana,geneva" color="#000011" size="1">What is better, subtitled or dubbed anime?</font></td></tr>
|
||||||
<tr><td><input type="radio" name="rd" value="1"><font face="verdana" size="2" color="#000011">Subtitled</font></td></tr>
|
<tr><td><input type="radio" name="rd" value="1"><font face="verdana" size="2" color="#000011">Subtitled</font></td></tr>
|
||||||
|
|
||||||
<tr><td align="middle"><font face="verdana" size="1"><a href="http://pub.alxnet.com/poll?id=2079873&q=view">Current results</a></font></td></tr>
|
<tr><td align="middle"><font face="verdana" size="1"><a href="http://pub.alxnet.com/poll?id=2079873&q=view">Current results</a></font></td></tr>
|
||||||
</table></td></tr>
|
</table></td></tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><font face="verdana" size="1"><a href="http://www.alxnet.com/services/poll/">Free
|
<td><font face="verdana" size="1"><a href="http://www.alxnet.com/services/poll/">Free
|
||||||
Web Polls</a></font></td>
|
Web Polls</a></font></td>
|
||||||
</tr>
|
</tr>
|
||||||
</table></form>
|
</table></form>
|
||||||
<!-- Alxnet.com -- web poll code ends -->
|
<!-- Alxnet.com -- web poll code ends -->
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,36 +1,36 @@
|
|||||||
|
|
||||||
Security
|
Security
|
||||||
|
|
||||||
Like anything that claims to afford security, HTML_Purifier can be circumvented
|
Like anything that claims to afford security, HTML_Purifier can be circumvented
|
||||||
through negligence of people. This class will do its job: no more, no less,
|
through negligence of people. This class will do its job: no more, no less,
|
||||||
and it's up to you to provide it the proper information and proper context
|
and it's up to you to provide it the proper information and proper context
|
||||||
to be effective. Things to remember:
|
to be effective. Things to remember:
|
||||||
|
|
||||||
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
||||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||||
your character encoding, you should switch. Now. (in future versions, however,
|
your character encoding, you should switch. Now. (in future versions, however,
|
||||||
I may make the character encoding configurable, but there's only so much I
|
I may make the character encoding configurable, but there's only so much I
|
||||||
can do). Make sure any input is properly converted to UTF-8, or the parser
|
can do). Make sure any input is properly converted to UTF-8, or the parser
|
||||||
will mangle it badly (though it won't be a security risk if you're outputting
|
will mangle it badly (though it won't be a security risk if you're outputting
|
||||||
it as UTF-8).
|
it as UTF-8).
|
||||||
|
|
||||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
||||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||||
has waaaay too many quirks for a little parser to handle. We did not select
|
has waaaay too many quirks for a little parser to handle. We did not select
|
||||||
strict in order to prevent ourselves from being too draconic on users.
|
strict in order to prevent ourselves from being too draconic on users.
|
||||||
|
|
||||||
3. [PROJECTED] IDs. They need to be unique, but without some knowledge of the
|
3. [PROJECTED] IDs. They need to be unique, but without some knowledge of the
|
||||||
rest of the document, it's difficult to know what's unique. I project default
|
rest of the document, it's difficult to know what's unique. I project default
|
||||||
behavior being a customizable prefix to all ID declarations in the document,
|
behavior being a customizable prefix to all ID declarations in the document,
|
||||||
so make sure you don't use that prefix. Might cause problems for multiple
|
so make sure you don't use that prefix. Might cause problems for multiple
|
||||||
instances of HTML escaped output too (especially when it comes to caching).
|
instances of HTML escaped output too (especially when it comes to caching).
|
||||||
Best to just zap them completely, perhaps. This will be configurable, and you'll
|
Best to just zap them completely, perhaps. This will be configurable, and you'll
|
||||||
have to pick the correct one.
|
have to pick the correct one.
|
||||||
|
|
||||||
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
||||||
some hooks for such a module might be nice) but we may offer the ability to
|
some hooks for such a module might be nice) but we may offer the ability to
|
||||||
only accept relative URLs. Pick the one that's right for you.
|
only accept relative URLs. Pick the one that's right for you.
|
||||||
|
|
||||||
5. [PROJECTED] CSS. What a knotty issue. Probably will have to be configurable.
|
5. [PROJECTED] CSS. What a knotty issue. Probably will have to be configurable.
|
712
docs/spec.txt
712
docs/spec.txt
@ -1,356 +1,356 @@
|
|||||||
|
|
||||||
HTML Purifier Specification
|
HTML Purifier Specification
|
||||||
by Edward Z. Yang
|
by Edward Z. Yang
|
||||||
|
|
||||||
== Introduction ==
|
== Introduction ==
|
||||||
|
|
||||||
There are a number of ad hoc HTML filtering solutions out there on the web
|
There are a number of ad hoc HTML filtering solutions out there on the web
|
||||||
(some examples including HTML_Safe, kses and SafeHtmlChecker.class.php) that
|
(some examples including HTML_Safe, kses and SafeHtmlChecker.class.php) that
|
||||||
claim to filter HTML properly, preventing malicious JavaScript and layout
|
claim to filter HTML properly, preventing malicious JavaScript and layout
|
||||||
breaking HTML from getting through the parser. None of them, however,
|
breaking HTML from getting through the parser. None of them, however,
|
||||||
demonstrates a thorough knowledge of neither the DTD that defines the HTML
|
demonstrates a thorough knowledge of neither the DTD that defines the HTML
|
||||||
nor the caveats of HTML that cannot be expressed by a DTD. Configurable
|
nor the caveats of HTML that cannot be expressed by a DTD. Configurable
|
||||||
filters (such as kses or PHP's built-in striptags() function) have trouble
|
filters (such as kses or PHP's built-in striptags() function) have trouble
|
||||||
validating the contents of attributes and can be subject to security attacks
|
validating the contents of attributes and can be subject to security attacks
|
||||||
due to poor configuration. Other filters take the naive approach of
|
due to poor configuration. Other filters take the naive approach of
|
||||||
blacklisting known threats and tags, failing to account for the introduction
|
blacklisting known threats and tags, failing to account for the introduction
|
||||||
of new technologies, new tags, new attributes or quirky browser behavior.
|
of new technologies, new tags, new attributes or quirky browser behavior.
|
||||||
|
|
||||||
However, HTML Purifier takes a different approach, one that doesn't use
|
However, HTML Purifier takes a different approach, one that doesn't use
|
||||||
specification-ignorant regexes or narrow blacklists. HTML Purifier will
|
specification-ignorant regexes or narrow blacklists. HTML Purifier will
|
||||||
decompose the whole document into tokens, and rigorously process the tokens by:
|
decompose the whole document into tokens, and rigorously process the tokens by:
|
||||||
removing non-whitelisted elements, transforming bad practice tags like <font>
|
removing non-whitelisted elements, transforming bad practice tags like <font>
|
||||||
into <span>, properly checking the nesting of tags and their children and
|
into <span>, properly checking the nesting of tags and their children and
|
||||||
validating all attributes according to their RFCs.
|
validating all attributes according to their RFCs.
|
||||||
|
|
||||||
To my knowledge, there is nothing like this on the web yet. Not even MediaWiki,
|
To my knowledge, there is nothing like this on the web yet. Not even MediaWiki,
|
||||||
which allows an amazingly diverse mix of HTML and wikitext in its documents,
|
which allows an amazingly diverse mix of HTML and wikitext in its documents,
|
||||||
gets all the nesting quirks right. Existing solutions hope that no JavaScript
|
gets all the nesting quirks right. Existing solutions hope that no JavaScript
|
||||||
will slip through, but either do not attempt to ensure that the resulting
|
will slip through, but either do not attempt to ensure that the resulting
|
||||||
output is valid XHTML or send the HTML through a draconic XML parser (and yet
|
output is valid XHTML or send the HTML through a draconic XML parser (and yet
|
||||||
still get the nesting wrong: SafeHtmlChecker.class.php does not prevent <a>
|
still get the nesting wrong: SafeHtmlChecker.class.php does not prevent <a>
|
||||||
tags from being nested within each other).
|
tags from being nested within each other).
|
||||||
|
|
||||||
This document seeks to detail the inner workings of HTML Purifier. The first
|
This document seeks to detail the inner workings of HTML Purifier. The first
|
||||||
draft was drawn up after two rough code sketches and the implementation of a
|
draft was drawn up after two rough code sketches and the implementation of a
|
||||||
forgiving lexer. You may also be interested in the unit tests located in the
|
forgiving lexer. You may also be interested in the unit tests located in the
|
||||||
tests/ folder, which provide a living document on how exactly the filter deals
|
tests/ folder, which provide a living document on how exactly the filter deals
|
||||||
with malformed input.
|
with malformed input.
|
||||||
|
|
||||||
In summary:
|
In summary:
|
||||||
|
|
||||||
1. Parse document into an array of tag and text tokens (Lexer)
|
1. Parse document into an array of tag and text tokens (Lexer)
|
||||||
2. Remove all elements not on whitelist and transform certain other elements
|
2. Remove all elements not on whitelist and transform certain other elements
|
||||||
into acceptable forms (i.e. <font>)
|
into acceptable forms (i.e. <font>)
|
||||||
3. Make document well formed while helpfully taking into account certain quirks,
|
3. Make document well formed while helpfully taking into account certain quirks,
|
||||||
such as the fact that <p> tags traditionally are closed by other block-level
|
such as the fact that <p> tags traditionally are closed by other block-level
|
||||||
elements.
|
elements.
|
||||||
4. Run through all nodes and check children for proper order (especially
|
4. Run through all nodes and check children for proper order (especially
|
||||||
important for tables).
|
important for tables).
|
||||||
5. Validate attributes according to more restrictive definitions based on the
|
5. Validate attributes according to more restrictive definitions based on the
|
||||||
RFCs.
|
RFCs.
|
||||||
6. Translate back into a string. (Generator)
|
6. Translate back into a string. (Generator)
|
||||||
|
|
||||||
HTML Purifier is best suited for documents that require a rich array of
|
HTML Purifier is best suited for documents that require a rich array of
|
||||||
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||||
written in an extremely restrictive set of markup that doesn't require
|
written in an extremely restrictive set of markup that doesn't require
|
||||||
all this functionality (or not written in HTML at all).
|
all this functionality (or not written in HTML at all).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
== STAGE 1 - parsing ==
|
== STAGE 1 - parsing ==
|
||||||
|
|
||||||
Status: A (see source, mainly internals and UTF-8)
|
Status: A (see source, mainly internals and UTF-8)
|
||||||
|
|
||||||
The Lexer (currently we have three choices) handles parsing into Tokens.
|
The Lexer (currently we have three choices) handles parsing into Tokens.
|
||||||
|
|
||||||
Here are the mappings for Lexer_PEARSax3
|
Here are the mappings for Lexer_PEARSax3
|
||||||
|
|
||||||
* Start(name, attributes) is openHandler
|
* Start(name, attributes) is openHandler
|
||||||
* End(name) is closeHandler
|
* End(name) is closeHandler
|
||||||
* Empty(name, attributes) is openHandler (is in array of empties)
|
* Empty(name, attributes) is openHandler (is in array of empties)
|
||||||
* Data(parse(text)) is dataHandler
|
* Data(parse(text)) is dataHandler
|
||||||
* Comment(text) is escapeHandler (has leading -)
|
* Comment(text) is escapeHandler (has leading -)
|
||||||
* Data(text) is escapeHandler (has leading [, CDATA)
|
* Data(text) is escapeHandler (has leading [, CDATA)
|
||||||
|
|
||||||
Ignorable/not being implemented (although we probably want to output them raw):
|
Ignorable/not being implemented (although we probably want to output them raw):
|
||||||
* ProcessingInstructions(text) is piHandler
|
* ProcessingInstructions(text) is piHandler
|
||||||
* JavaOrASPInstructions(text) is jaspHandler
|
* JavaOrASPInstructions(text) is jaspHandler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
== STAGE 2 - remove foreign elements ==
|
== STAGE 2 - remove foreign elements ==
|
||||||
|
|
||||||
Status: A- (transformations need to be implemented)
|
Status: A- (transformations need to be implemented)
|
||||||
|
|
||||||
At this point, the parser needs to start knowing about the DTD. Since we
|
At this point, the parser needs to start knowing about the DTD. Since we
|
||||||
hold everything in an associative $info array, if it's set, it's valid, and
|
hold everything in an associative $info array, if it's set, it's valid, and
|
||||||
we can include. Otherwise zap it, or attempt to figure out what they meant.
|
we can include. Otherwise zap it, or attempt to figure out what they meant.
|
||||||
<stronf>? A misspelling of <strong>! This feature may be too sugary though.
|
<stronf>? A misspelling of <strong>! This feature may be too sugary though.
|
||||||
|
|
||||||
While we're at it, we can change the Processing Instructions and Java/ASP
|
While we're at it, we can change the Processing Instructions and Java/ASP
|
||||||
Instructions into data blocks, scratch comment blocks, change CharacterData
|
Instructions into data blocks, scratch comment blocks, change CharacterData
|
||||||
into Data (although I don't see why we can't do that at the start).
|
into Data (although I don't see why we can't do that at the start).
|
||||||
|
|
||||||
One last thing: the remove foreign elements has to do the element
|
One last thing: the remove foreign elements has to do the element
|
||||||
transformations, from FONT to SPAN, etc.
|
transformations, from FONT to SPAN, etc.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
== STAGE 3 - make well formed ==
|
== STAGE 3 - make well formed ==
|
||||||
|
|
||||||
Status: A- (not as good as possible)
|
Status: A- (not as good as possible)
|
||||||
|
|
||||||
Now we step through the whole thing and correct nesting issues. Most of the
|
Now we step through the whole thing and correct nesting issues. Most of the
|
||||||
time, it's making sure the tags match up, but there's some trickery going on
|
time, it's making sure the tags match up, but there's some trickery going on
|
||||||
for HTML's quirks. They are:
|
for HTML's quirks. They are:
|
||||||
|
|
||||||
* Set of tags that close P
|
* Set of tags that close P
|
||||||
'address', 'blockquote', 'dd', 'dir', 'div',
|
'address', 'blockquote', 'dd', 'dir', 'div',
|
||||||
'dl', 'dt', 'h1', 'h2', 'h3', 'h4',
|
'dl', 'dt', 'h1', 'h2', 'h3', 'h4',
|
||||||
'h5', 'h6', 'hr',
|
'h5', 'h6', 'hr',
|
||||||
'ol', 'p', 'pre',
|
'ol', 'p', 'pre',
|
||||||
'table', 'ul'
|
'table', 'ul'
|
||||||
* Li closes li
|
* Li closes li
|
||||||
* more?
|
* more?
|
||||||
|
|
||||||
We also want to do translations, like from FONT to SPAN with STYLE.
|
We also want to do translations, like from FONT to SPAN with STYLE.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
== STAGE 4 - check nesting ==
|
== STAGE 4 - check nesting ==
|
||||||
|
|
||||||
Status: B (table custom definition needs to be implemented)
|
Status: B (table custom definition needs to be implemented)
|
||||||
|
|
||||||
We know that the document is now well formed. The tokenizer should now take
|
We know that the document is now well formed. The tokenizer should now take
|
||||||
things in nodes: when you hit a start tag, keep on going until you get its
|
things in nodes: when you hit a start tag, keep on going until you get its
|
||||||
ending tag, and then handle everything inside there. Fortunantely, no
|
ending tag, and then handle everything inside there. Fortunantely, no
|
||||||
fancy recursion is necessary as going to the next node is as simple as
|
fancy recursion is necessary as going to the next node is as simple as
|
||||||
scrolling to the next start tag.
|
scrolling to the next start tag.
|
||||||
|
|
||||||
Suppose we have a node and encounter a problem with one of its children.
|
Suppose we have a node and encounter a problem with one of its children.
|
||||||
Depending on the complexity of the rule, we will either delete the children,
|
Depending on the complexity of the rule, we will either delete the children,
|
||||||
or delete the entire node itself.
|
or delete the entire node itself.
|
||||||
|
|
||||||
The simplest type of rule is zero or more valid elements, denoted like:
|
The simplest type of rule is zero or more valid elements, denoted like:
|
||||||
|
|
||||||
( el1 | el2 | el3 )*
|
( el1 | el2 | el3 )*
|
||||||
|
|
||||||
The next simplest is with one or more valid elements:
|
The next simplest is with one or more valid elements:
|
||||||
|
|
||||||
( li )+
|
( li )+
|
||||||
|
|
||||||
And then you have complex cases:
|
And then you have complex cases:
|
||||||
|
|
||||||
table (caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))
|
table (caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))
|
||||||
map ((%block; | form | %misc;)+ | area+)
|
map ((%block; | form | %misc;)+ | area+)
|
||||||
html (head, body)
|
html (head, body)
|
||||||
head (%head.misc;,
|
head (%head.misc;,
|
||||||
((title, %head.misc;, (base, %head.misc;)?) |
|
((title, %head.misc;, (base, %head.misc;)?) |
|
||||||
(base, %head.misc;, (title, %head.misc;))))
|
(base, %head.misc;, (title, %head.misc;))))
|
||||||
|
|
||||||
Each of these has to be dealt with. Case 1 is a joy, because you can zap
|
Each of these has to be dealt with. Case 1 is a joy, because you can zap
|
||||||
as many as you want, but you'll never actually have to kill the node. Two
|
as many as you want, but you'll never actually have to kill the node. Two
|
||||||
and three need the entire node to be killed if you have a problem. This
|
and three need the entire node to be killed if you have a problem. This
|
||||||
can be problematic, as the missing node might cause its parent node to now
|
can be problematic, as the missing node might cause its parent node to now
|
||||||
be incorrect. Granted, it's unlikely, and I'm fairly certain that HTML, let
|
be incorrect. Granted, it's unlikely, and I'm fairly certain that HTML, let
|
||||||
alone the simplified set I'm allowing will have this problem, but it's worth
|
alone the simplified set I'm allowing will have this problem, but it's worth
|
||||||
checking for.
|
checking for.
|
||||||
|
|
||||||
The way, I suppose, one would check for it, is whenever a node is removed,
|
The way, I suppose, one would check for it, is whenever a node is removed,
|
||||||
scroll to it's parent start, and re-evaluate it. Make sure you're able to do
|
scroll to it's parent start, and re-evaluate it. Make sure you're able to do
|
||||||
that with minimal code repetition.
|
that with minimal code repetition.
|
||||||
|
|
||||||
EDITOR'S NOTE: this behavior is not implemented by default, because the
|
EDITOR'S NOTE: this behavior is not implemented by default, because the
|
||||||
default configuration has a setup that ensures that cascading node removals
|
default configuration has a setup that ensures that cascading node removals
|
||||||
will never happen. However, there will be warning signs in case someone tries
|
will never happen. However, there will be warning signs in case someone tries
|
||||||
to hack it further.
|
to hack it further.
|
||||||
|
|
||||||
The most complex case can probably be done by using some fancy regexp
|
The most complex case can probably be done by using some fancy regexp
|
||||||
expressions and transformations. However, it doesn't seem right that, say,
|
expressions and transformations. However, it doesn't seem right that, say,
|
||||||
a stray <b> in a <table> can cause the entire table to be removed. Fixing it,
|
a stray <b> in a <table> can cause the entire table to be removed. Fixing it,
|
||||||
however, may be too difficult (or not, see below).
|
however, may be too difficult (or not, see below).
|
||||||
|
|
||||||
This code was excerpted from the PEAR class XML_DTD. It implements regexp
|
This code was excerpted from the PEAR class XML_DTD. It implements regexp
|
||||||
checking.
|
checking.
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
// # This actually does the validation
|
// # This actually does the validation
|
||||||
|
|
||||||
// Validate the order of the children
|
// Validate the order of the children
|
||||||
if (!$was_error && count($dtd_children)) {
|
if (!$was_error && count($dtd_children)) {
|
||||||
$children_list = implode(',', $children);
|
$children_list = implode(',', $children);
|
||||||
$regex = $this->dtd->getPcreRegex($name);
|
$regex = $this->dtd->getPcreRegex($name);
|
||||||
if (!preg_match('/^'.$regex.'$/', $children_list)) {
|
if (!preg_match('/^'.$regex.'$/', $children_list)) {
|
||||||
$dtd_regex = $this->dtd->getDTDRegex($name);
|
$dtd_regex = $this->dtd->getDTDRegex($name);
|
||||||
$this->_errors("In element <$name> the children list found:\n'$children_list', ".
|
$this->_errors("In element <$name> the children list found:\n'$children_list', ".
|
||||||
"does not conform the DTD definition: '$dtd_regex'", $lineno);
|
"does not conform the DTD definition: '$dtd_regex'", $lineno);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
// # This figures out the PcreRegex
|
// # This figures out the PcreRegex
|
||||||
|
|
||||||
//$ch is a string of the allowed childs
|
//$ch is a string of the allowed childs
|
||||||
$children = preg_split('/([^#a-zA-Z0-9_.-]+)/', $ch, -1, PREG_SPLIT_NO_EMPTY);
|
$children = preg_split('/([^#a-zA-Z0-9_.-]+)/', $ch, -1, PREG_SPLIT_NO_EMPTY);
|
||||||
// check for parsed character data special case
|
// check for parsed character data special case
|
||||||
if (in_array('#PCDATA', $children)) {
|
if (in_array('#PCDATA', $children)) {
|
||||||
$content = '#PCDATA';
|
$content = '#PCDATA';
|
||||||
if (count($children) == 1) {
|
if (count($children) == 1) {
|
||||||
$children = array();
|
$children = array();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// $children is not used after this
|
// $children is not used after this
|
||||||
|
|
||||||
$this->dtd['elements'][$elem_name]['child_validation_dtd_regex'] = $ch;
|
$this->dtd['elements'][$elem_name]['child_validation_dtd_regex'] = $ch;
|
||||||
// Convert the DTD regex language into PCRE regex format
|
// Convert the DTD regex language into PCRE regex format
|
||||||
$reg = str_replace(',', ',?', $ch);
|
$reg = str_replace(',', ',?', $ch);
|
||||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||||
$this->dtd['elements'][$elem_name]['child_validation_pcre_regex'] = $reg;
|
$this->dtd['elements'][$elem_name]['child_validation_pcre_regex'] = $reg;
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
We can probably loot and steal all of this. This brilliance of this code is
|
We can probably loot and steal all of this. This brilliance of this code is
|
||||||
amazing. I'm lovin' it!
|
amazing. I'm lovin' it!
|
||||||
|
|
||||||
So, the way we define these cases should work like this:
|
So, the way we define these cases should work like this:
|
||||||
|
|
||||||
class ChildDef with validateChildren($children_tags)
|
class ChildDef with validateChildren($children_tags)
|
||||||
|
|
||||||
The function needs to parse into nodes, then into the regex array.
|
The function needs to parse into nodes, then into the regex array.
|
||||||
It can result in one of three actions: the removal of the entire parent node,
|
It can result in one of three actions: the removal of the entire parent node,
|
||||||
replacement of all of the original child tags with a new set of child
|
replacement of all of the original child tags with a new set of child
|
||||||
tags which it returns, or no changes. They shall be denoted as, respectively,
|
tags which it returns, or no changes. They shall be denoted as, respectively,
|
||||||
|
|
||||||
Remove entire parent node = false
|
Remove entire parent node = false
|
||||||
Replace child tags with this = array of tags
|
Replace child tags with this = array of tags
|
||||||
No changes = true
|
No changes = true
|
||||||
|
|
||||||
If we remove the entire parent node, we must scroll back to the parent of the
|
If we remove the entire parent node, we must scroll back to the parent of the
|
||||||
parent.
|
parent.
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
Another few problems: EXCLUSIONS!
|
Another few problems: EXCLUSIONS!
|
||||||
|
|
||||||
a
|
a
|
||||||
must not contain other a elements.
|
must not contain other a elements.
|
||||||
pre
|
pre
|
||||||
must not contain the img, object, big, small, sub, or sup elements.
|
must not contain the img, object, big, small, sub, or sup elements.
|
||||||
button
|
button
|
||||||
must not contain the input, select, textarea, label, button, form, fieldset,
|
must not contain the input, select, textarea, label, button, form, fieldset,
|
||||||
iframe or isindex elements.
|
iframe or isindex elements.
|
||||||
label
|
label
|
||||||
must not contain other label elements.
|
must not contain other label elements.
|
||||||
form
|
form
|
||||||
must not contain other form elements.
|
must not contain other form elements.
|
||||||
|
|
||||||
Normative exclusions straight from the horses mouth. These are SGML style,
|
Normative exclusions straight from the horses mouth. These are SGML style,
|
||||||
not XML style, so we need to modify the ruleset slightly. However, the DTD
|
not XML style, so we need to modify the ruleset slightly. However, the DTD
|
||||||
may have done this for us already.
|
may have done this for us already.
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
Also, what do we do with elements if they're not allowed somewhere? We need
|
Also, what do we do with elements if they're not allowed somewhere? We need
|
||||||
some sort of default behavior. I reckon that we should be allowed to:
|
some sort of default behavior. I reckon that we should be allowed to:
|
||||||
|
|
||||||
1. Delete the node
|
1. Delete the node
|
||||||
2. Translate it into text (not okay for areas that don't allow #PCDATA)
|
2. Translate it into text (not okay for areas that don't allow #PCDATA)
|
||||||
3. Move the node to somewhere where it is okay
|
3. Move the node to somewhere where it is okay
|
||||||
|
|
||||||
What complicates the matter is that Firefox has the ability to construct
|
What complicates the matter is that Firefox has the ability to construct
|
||||||
DOMs and render invalid nestings of elements (like <b><div>asdf</div></b>).
|
DOMs and render invalid nestings of elements (like <b><div>asdf</div></b>).
|
||||||
This means that behavior for stray pcdata in ul/ol is undefined. Behavior
|
This means that behavior for stray pcdata in ul/ol is undefined. Behavior
|
||||||
with data in a table gets bubbled to the start of the table (assuming
|
with data in a table gets bubbled to the start of the table (assuming
|
||||||
that we actually custom-make the table child validation class).
|
that we actually custom-make the table child validation class).
|
||||||
|
|
||||||
So... I say delete the node when PCDATA isn't allowed (or the regex is too
|
So... I say delete the node when PCDATA isn't allowed (or the regex is too
|
||||||
complicated to determine where PCDATA could be inserted), and translate the node
|
complicated to determine where PCDATA could be inserted), and translate the node
|
||||||
to text when PCDATA is allowed.
|
to text when PCDATA is allowed.
|
||||||
|
|
||||||
--
|
--
|
||||||
|
|
||||||
Note that generic child definitions are not usually desirable: we should
|
Note that generic child definitions are not usually desirable: we should
|
||||||
implement custom handlers for each one that specify the stuff correctly.
|
implement custom handlers for each one that specify the stuff correctly.
|
||||||
|
|
||||||
== STAGE 4 - check attributes ==
|
== STAGE 4 - check attributes ==
|
||||||
|
|
||||||
STATUS: N (not started)
|
STATUS: N (not started)
|
||||||
|
|
||||||
While we're doing all this nesting hocus-pocus, attributes are also being
|
While we're doing all this nesting hocus-pocus, attributes are also being
|
||||||
checked. The reason why we need this to be done with the nesting stuff
|
checked. The reason why we need this to be done with the nesting stuff
|
||||||
is if a REQUIRED attribute is not there, we might need to kill the tag (or
|
is if a REQUIRED attribute is not there, we might need to kill the tag (or
|
||||||
replace it with data). Fortunantely, this is rare enough that we only have
|
replace it with data). Fortunantely, this is rare enough that we only have
|
||||||
to worry about it for certain things:
|
to worry about it for certain things:
|
||||||
|
|
||||||
* ! bdo - dir > replace with span, preserve attributes
|
* ! bdo - dir > replace with span, preserve attributes
|
||||||
* basefont - size
|
* basefont - size
|
||||||
* param - name
|
* param - name
|
||||||
* applet - width, height
|
* applet - width, height
|
||||||
* ! img - src, alt > if only alt is missing, insert filename, else remove img
|
* ! img - src, alt > if only alt is missing, insert filename, else remove img
|
||||||
* map - id
|
* map - id
|
||||||
* area - alt
|
* area - alt
|
||||||
* form - action
|
* form - action
|
||||||
* optgroup - label
|
* optgroup - label
|
||||||
* textarea - rows, cols
|
* textarea - rows, cols
|
||||||
|
|
||||||
As you can see, only two of them we would remotely consider for our simplified
|
As you can see, only two of them we would remotely consider for our simplified
|
||||||
tag set. But each has a different set of challenges. For the img tag, we'd
|
tag set. But each has a different set of challenges. For the img tag, we'd
|
||||||
have to be careful about deleting it. If we do hit a snag, we can supply
|
have to be careful about deleting it. If we do hit a snag, we can supply
|
||||||
a default "blank" image.
|
a default "blank" image.
|
||||||
|
|
||||||
So after that's all said and done, each of the different types of content
|
So after that's all said and done, each of the different types of content
|
||||||
inside the attributes needs to be handled differently.
|
inside the attributes needs to be handled differently.
|
||||||
|
|
||||||
ContentType(s) [RFC2045]
|
ContentType(s) [RFC2045]
|
||||||
Charset(s) [RFC2045]
|
Charset(s) [RFC2045]
|
||||||
LanguageCode [RFC3066] (NMTOKEN)
|
LanguageCode [RFC3066] (NMTOKEN)
|
||||||
Character [XML][2.2] (a single character)
|
Character [XML][2.2] (a single character)
|
||||||
Number /^\d+$/
|
Number /^\d+$/
|
||||||
LinkTypes [HTML][6.12] <space>
|
LinkTypes [HTML][6.12] <space>
|
||||||
MediaDesc [HTML][6.13] <comma>
|
MediaDesc [HTML][6.13] <comma>
|
||||||
URI/UriList [RFC2396] <space>
|
URI/UriList [RFC2396] <space>
|
||||||
Datetime (ISO date format)
|
Datetime (ISO date format)
|
||||||
Script ...
|
Script ...
|
||||||
StyleSheet [CSS] (complex)
|
StyleSheet [CSS] (complex)
|
||||||
Text CDATA
|
Text CDATA
|
||||||
FrameTarget NMTOKEN
|
FrameTarget NMTOKEN
|
||||||
Length (pixel, percentage) (?:px suffix allowed?)
|
Length (pixel, percentage) (?:px suffix allowed?)
|
||||||
MultiLength (pixel, percentage, or relative)
|
MultiLength (pixel, percentage, or relative)
|
||||||
Pixels (integer)
|
Pixels (integer)
|
||||||
// map attributes omitted
|
// map attributes omitted
|
||||||
ImgAlign (top|middle|bottom|left|right)
|
ImgAlign (top|middle|bottom|left|right)
|
||||||
Color #NNNNNN, #NNN or color name (translate it
|
Color #NNNNNN, #NNN or color name (translate it
|
||||||
Black = #000000 Green = #008000
|
Black = #000000 Green = #008000
|
||||||
Silver = #C0C0C0 Lime = #00FF00
|
Silver = #C0C0C0 Lime = #00FF00
|
||||||
Gray = #808080 Olive = #808000
|
Gray = #808080 Olive = #808000
|
||||||
White = #FFFFFF Yellow = #FFFF00
|
White = #FFFFFF Yellow = #FFFF00
|
||||||
Maroon = #800000 Navy = #000080
|
Maroon = #800000 Navy = #000080
|
||||||
Red = #FF0000 Blue = #0000FF
|
Red = #FF0000 Blue = #0000FF
|
||||||
Purple = #800080 Teal = #008080
|
Purple = #800080 Teal = #008080
|
||||||
Fuchsia= #FF00FF Aqua = #00FFFF
|
Fuchsia= #FF00FF Aqua = #00FFFF
|
||||||
// plus some directly defined in the spec
|
// plus some directly defined in the spec
|
||||||
|
|
||||||
Everything else is either ID, or defined as a certain set of values.
|
Everything else is either ID, or defined as a certain set of values.
|
||||||
|
|
||||||
Unless we use reflection (which then we have to make sure the attribute exists),
|
Unless we use reflection (which then we have to make sure the attribute exists),
|
||||||
we probably want to have a function like...
|
we probably want to have a function like...
|
||||||
|
|
||||||
validate($type, $value) where $type is like ContentType or Number
|
validate($type, $value) where $type is like ContentType or Number
|
||||||
|
|
||||||
and then pass it to a switch.
|
and then pass it to a switch.
|
||||||
|
|
||||||
The final problem is CSS. Get intimate with the syntax here:
|
The final problem is CSS. Get intimate with the syntax here:
|
||||||
http://www.w3.org/TR/CSS21/syndata.html and also note the "bad" CSS elements
|
http://www.w3.org/TR/CSS21/syndata.html and also note the "bad" CSS elements
|
||||||
that HTML_Safe defines to help determine a whitelist.
|
that HTML_Safe defines to help determine a whitelist.
|
||||||
|
|
||||||
== PART 5 - stringify ==
|
== PART 5 - stringify ==
|
||||||
|
|
||||||
Status: A+ (done completely!)
|
Status: A+ (done completely!)
|
||||||
|
|
||||||
Should be fairly simple as long as we delegate to appropriate functions.
|
Should be fairly simple as long as we delegate to appropriate functions.
|
||||||
It's probably too much trouble to indent the stuff properly, so just output
|
It's probably too much trouble to indent the stuff properly, so just output
|
||||||
stuff raw.
|
stuff raw.
|
||||||
|
@ -1,28 +1,28 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/Definition.php';
|
require_once 'HTMLPurifier/Definition.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
var $lexer;
|
var $lexer;
|
||||||
var $definition;
|
var $definition;
|
||||||
var $generator;
|
var $generator;
|
||||||
|
|
||||||
function HTMLPurifier() {
|
function HTMLPurifier() {
|
||||||
$this->lexer = new HTMLPurifier_Lexer();
|
$this->lexer = new HTMLPurifier_Lexer();
|
||||||
$this->definition = new HTMLPurifier_Definition();
|
$this->definition = new HTMLPurifier_Definition();
|
||||||
$this->generator = new HTMLPurifier_Generator();
|
$this->generator = new HTMLPurifier_Generator();
|
||||||
}
|
}
|
||||||
|
|
||||||
function purify($html) {
|
function purify($html) {
|
||||||
$tokens = $this->lexer->tokenizeHTML($html);
|
$tokens = $this->lexer->tokenizeHTML($html);
|
||||||
$tokens = $this->definition->purifyTokens($tokens);
|
$tokens = $this->definition->purifyTokens($tokens);
|
||||||
return $this->generator->generateFromTokens($tokens);
|
return $this->generator->generateFromTokens($tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,11 +1,11 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
var $def;
|
var $def;
|
||||||
function HTMLPurifier_AttrDef($def) {
|
function HTMLPurifier_AttrDef($def) {
|
||||||
$this->def = $def;
|
$this->def = $def;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,169 +1,169 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
||||||
// true = leave nodes as is
|
// true = leave nodes as is
|
||||||
// false = delete parent node and all children
|
// false = delete parent node and all children
|
||||||
// array(...) = replace children nodes with these
|
// array(...) = replace children nodes with these
|
||||||
|
|
||||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
// this is the hardest one to implement. We'll use fancy regexp tricks
|
||||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
||||||
// to fix the tree)
|
// to fix the tree)
|
||||||
|
|
||||||
// we may end up writing custom code for each HTML case
|
// we may end up writing custom code for each HTML case
|
||||||
// in order to make it self correcting
|
// in order to make it self correcting
|
||||||
class HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
var $type = 'custom';
|
var $type = 'custom';
|
||||||
var $dtd_regex;
|
var $dtd_regex;
|
||||||
var $_pcre_regex;
|
var $_pcre_regex;
|
||||||
function HTMLPurifier_ChildDef($dtd_regex) {
|
function HTMLPurifier_ChildDef($dtd_regex) {
|
||||||
$this->dtd_regex = $dtd_regex;
|
$this->dtd_regex = $dtd_regex;
|
||||||
$this->_compileRegex();
|
$this->_compileRegex();
|
||||||
}
|
}
|
||||||
function _compileRegex() {
|
function _compileRegex() {
|
||||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||||
if ($raw{0} != '(') {
|
if ($raw{0} != '(') {
|
||||||
$raw = "($raw)";
|
$raw = "($raw)";
|
||||||
}
|
}
|
||||||
$reg = str_replace(',', ',?', $raw);
|
$reg = str_replace(',', ',?', $raw);
|
||||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||||
$this->_pcre_regex = $reg;
|
$this->_pcre_regex = $reg;
|
||||||
}
|
}
|
||||||
function validateChildren($tokens_of_children) {
|
function validateChildren($tokens_of_children) {
|
||||||
$list_of_children = '';
|
$list_of_children = '';
|
||||||
$nesting = 0; // depth into the nest
|
$nesting = 0; // depth into the nest
|
||||||
foreach ($tokens_of_children as $token) {
|
foreach ($tokens_of_children as $token) {
|
||||||
if (!empty($token->is_whitespace)) continue;
|
if (!empty($token->is_whitespace)) continue;
|
||||||
|
|
||||||
$is_child = ($nesting == 0); // direct
|
$is_child = ($nesting == 0); // direct
|
||||||
|
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$nesting++;
|
$nesting++;
|
||||||
} elseif ($token->type == 'end') {
|
} elseif ($token->type == 'end') {
|
||||||
$nesting--;
|
$nesting--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($is_child) {
|
if ($is_child) {
|
||||||
$list_of_children .= $token->name . ',';
|
$list_of_children .= $token->name . ',';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$list_of_children = rtrim($list_of_children, ',');
|
$list_of_children = rtrim($list_of_children, ',');
|
||||||
|
|
||||||
$okay =
|
$okay =
|
||||||
preg_match(
|
preg_match(
|
||||||
'/^'.$this->_pcre_regex.'$/',
|
'/^'.$this->_pcre_regex.'$/',
|
||||||
$list_of_children
|
$list_of_children
|
||||||
);
|
);
|
||||||
|
|
||||||
return (bool) $okay;
|
return (bool) $okay;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class HTMLPurifier_ChildDef_Simple extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Simple extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
var $elements = array();
|
var $elements = array();
|
||||||
function HTMLPurifier_ChildDef_Simple($elements) {
|
function HTMLPurifier_ChildDef_Simple($elements) {
|
||||||
if (is_string($elements)) {
|
if (is_string($elements)) {
|
||||||
$elements = str_replace(' ', '', $elements);
|
$elements = str_replace(' ', '', $elements);
|
||||||
$elements = explode('|', $elements);
|
$elements = explode('|', $elements);
|
||||||
}
|
}
|
||||||
$elements = array_flip($elements);
|
$elements = array_flip($elements);
|
||||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||||
$this->elements = $elements;
|
$this->elements = $elements;
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
}
|
}
|
||||||
function validateChildren() {
|
function validateChildren() {
|
||||||
trigger_error('Cannot call abstract function!', E_USER_ERROR);
|
trigger_error('Cannot call abstract function!', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Simple
|
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef_Simple
|
||||||
{
|
{
|
||||||
var $type = 'required';
|
var $type = 'required';
|
||||||
function validateChildren($tokens_of_children) {
|
function validateChildren($tokens_of_children) {
|
||||||
// if there are no tokens, delete parent node
|
// if there are no tokens, delete parent node
|
||||||
if (empty($tokens_of_children)) return false;
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
// the new set of children
|
// the new set of children
|
||||||
$result = array();
|
$result = array();
|
||||||
|
|
||||||
// current depth into the nest
|
// current depth into the nest
|
||||||
$nesting = 0;
|
$nesting = 0;
|
||||||
|
|
||||||
// whether or not we're deleting a node
|
// whether or not we're deleting a node
|
||||||
$is_deleting = false;
|
$is_deleting = false;
|
||||||
|
|
||||||
// whether or not parsed character data is allowed
|
// whether or not parsed character data is allowed
|
||||||
// this controls whether or not we silently drop a tag
|
// this controls whether or not we silently drop a tag
|
||||||
// or generate escaped HTML from it
|
// or generate escaped HTML from it
|
||||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||||
|
|
||||||
// a little sanity check to make sure it's not ALL whitespace
|
// a little sanity check to make sure it's not ALL whitespace
|
||||||
$all_whitespace = true;
|
$all_whitespace = true;
|
||||||
|
|
||||||
foreach ($tokens_of_children as $token) {
|
foreach ($tokens_of_children as $token) {
|
||||||
if (!empty($token->is_whitespace)) {
|
if (!empty($token->is_whitespace)) {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||||
|
|
||||||
$is_child = ($nesting == 0);
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$nesting++;
|
$nesting++;
|
||||||
} elseif ($token->type == 'end') {
|
} elseif ($token->type == 'end') {
|
||||||
$nesting--;
|
$nesting--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($is_child) {
|
if ($is_child) {
|
||||||
$is_deleting = false;
|
$is_deleting = false;
|
||||||
if (!isset($this->elements[$token->name])) {
|
if (!isset($this->elements[$token->name])) {
|
||||||
$is_deleting = true;
|
$is_deleting = true;
|
||||||
if ($pcdata_allowed) {
|
if ($pcdata_allowed) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
$this->gen->generateFromToken($token)
|
$this->gen->generateFromToken($token)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!$is_deleting) {
|
if (!$is_deleting) {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
} elseif ($pcdata_allowed) {
|
} elseif ($pcdata_allowed) {
|
||||||
$result[] =
|
$result[] =
|
||||||
new HTMLPurifier_Token_Text(
|
new HTMLPurifier_Token_Text(
|
||||||
$this->gen->generateFromToken( $token )
|
$this->gen->generateFromToken( $token )
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
// drop silently
|
// drop silently
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (empty($result)) return false;
|
if (empty($result)) return false;
|
||||||
if ($all_whitespace) return false;
|
if ($all_whitespace) return false;
|
||||||
if ($tokens_of_children == $result) return true;
|
if ($tokens_of_children == $result) return true;
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// only altered behavior is that it returns an empty array
|
// only altered behavior is that it returns an empty array
|
||||||
// instead of a false (to delete the node)
|
// instead of a false (to delete the node)
|
||||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||||
{
|
{
|
||||||
var $type = 'optional';
|
var $type = 'optional';
|
||||||
function validateChildren($tokens_of_children) {
|
function validateChildren($tokens_of_children) {
|
||||||
$result = parent::validateChildren($tokens_of_children);
|
$result = parent::validateChildren($tokens_of_children);
|
||||||
if ($result === false) return array();
|
if ($result === false) return array();
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// placeholder
|
// placeholder
|
||||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
var $type = 'empty';
|
var $type = 'empty';
|
||||||
function HTMLPurifier_ChildDef_Empty() {}
|
function HTMLPurifier_ChildDef_Empty() {}
|
||||||
function validateChildren() {
|
function validateChildren() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,445 +1,445 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
require_once 'HTMLPurifier/Token.php';
|
require_once 'HTMLPurifier/Token.php';
|
||||||
|
|
||||||
class HTMLPurifier_Definition
|
class HTMLPurifier_Definition
|
||||||
{
|
{
|
||||||
|
|
||||||
var $generator;
|
var $generator;
|
||||||
var $info = array();
|
var $info = array();
|
||||||
var $info_closes_p = array(
|
var $info_closes_p = array(
|
||||||
// these are all block elements: blocks aren't allowed in P
|
// these are all block elements: blocks aren't allowed in P
|
||||||
'address' => true,
|
'address' => true,
|
||||||
'blockquote' => true,
|
'blockquote' => true,
|
||||||
'dd' => true,
|
'dd' => true,
|
||||||
'dir' => true,
|
'dir' => true,
|
||||||
'div' => true,
|
'div' => true,
|
||||||
'dl' => true,
|
'dl' => true,
|
||||||
'dt' => true,
|
'dt' => true,
|
||||||
'h1' => true,
|
'h1' => true,
|
||||||
'h2' => true,
|
'h2' => true,
|
||||||
'h3' => true,
|
'h3' => true,
|
||||||
'h4' => true,
|
'h4' => true,
|
||||||
'h5' => true,
|
'h5' => true,
|
||||||
'h6' => true,
|
'h6' => true,
|
||||||
'hr' => true,
|
'hr' => true,
|
||||||
'ol' => true,
|
'ol' => true,
|
||||||
'p' => true,
|
'p' => true,
|
||||||
'pre' => true,
|
'pre' => true,
|
||||||
'table' => true,
|
'table' => true,
|
||||||
'ul' => true
|
'ul' => true
|
||||||
);
|
);
|
||||||
|
|
||||||
function HTMLPurifier_Definition() {
|
function HTMLPurifier_Definition() {
|
||||||
$this->generator = new HTMLPurifier_Generator();
|
$this->generator = new HTMLPurifier_Generator();
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadData() {
|
function loadData() {
|
||||||
// emulates the structure of the DTD
|
// emulates the structure of the DTD
|
||||||
|
|
||||||
// entities: prefixed with e_ and _ replaces .
|
// entities: prefixed with e_ and _ replaces .
|
||||||
// we don't use an array because that complicates interpolation
|
// we don't use an array because that complicates interpolation
|
||||||
// strings are used instead of arrays because if you use arrays,
|
// strings are used instead of arrays because if you use arrays,
|
||||||
// you have to do some hideous manipulation with array_merge()
|
// you have to do some hideous manipulation with array_merge()
|
||||||
|
|
||||||
// these are condensed, remember, with bad stuff taken out
|
// these are condensed, remember, with bad stuff taken out
|
||||||
|
|
||||||
// transforms: font, menu, dir, center
|
// transforms: font, menu, dir, center
|
||||||
|
|
||||||
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
// DON'T MONKEY AROUND THIS unless you know what you are doing
|
||||||
// and also know the assumptions the code makes about what this
|
// and also know the assumptions the code makes about what this
|
||||||
// contains for optimization purposes (see fixNesting)
|
// contains for optimization purposes (see fixNesting)
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
$e_special_extra = 'img';
|
||||||
$e_special_basic = 'br | span | bdo';
|
$e_special_basic = 'br | span | bdo';
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
$e_special = "$e_special_basic | $e_special_extra";
|
||||||
$e_fontstyle_extra = 'big | small';
|
$e_fontstyle_extra = 'big | small';
|
||||||
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
||||||
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
||||||
$e_phrase_extra = 'sub | sup';
|
$e_phrase_extra = 'sub | sup';
|
||||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||||
' | cite | abbr | acronym';
|
' | cite | abbr | acronym';
|
||||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||||
$e_inline_forms = ''; // humor the dtd
|
$e_inline_forms = ''; // humor the dtd
|
||||||
$e_misc_inline = 'ins | del';
|
$e_misc_inline = 'ins | del';
|
||||||
$e_misc = "$e_misc_inline";
|
$e_misc = "$e_misc_inline";
|
||||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
||||||
" | $e_inline_forms";
|
" | $e_inline_forms";
|
||||||
// note the casing
|
// note the casing
|
||||||
$e_Inline = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_inline".
|
$e_Inline = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_inline".
|
||||||
" | $e_misc_inline");
|
" | $e_misc_inline");
|
||||||
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
||||||
$e_lists = 'ul | ol | dl';
|
$e_lists = 'ul | ol | dl';
|
||||||
$e_blocktext = 'pre | hr | blockquote | address';
|
$e_blocktext = 'pre | hr | blockquote | address';
|
||||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||||
$e_Flow = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block".
|
$e_Flow = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block".
|
||||||
" | $e_inline | $e_misc");
|
" | $e_inline | $e_misc");
|
||||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
||||||
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
||||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||||
" | $e_inline_forms | $e_misc_inline");
|
" | $e_inline_forms | $e_misc_inline");
|
||||||
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
||||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
||||||
|
|
||||||
$this->info['ins'] =
|
$this->info['ins'] =
|
||||||
$this->info['del'] =
|
$this->info['del'] =
|
||||||
$this->info['blockquote'] =
|
$this->info['blockquote'] =
|
||||||
$this->info['dd'] =
|
$this->info['dd'] =
|
||||||
$this->info['li'] =
|
$this->info['li'] =
|
||||||
$this->info['div'] = new HTMLPurifier_ElementDef($e_Flow);
|
$this->info['div'] = new HTMLPurifier_ElementDef($e_Flow);
|
||||||
|
|
||||||
$this->info['em'] =
|
$this->info['em'] =
|
||||||
$this->info['strong'] =
|
$this->info['strong'] =
|
||||||
$this->info['dfn'] =
|
$this->info['dfn'] =
|
||||||
$this->info['code'] =
|
$this->info['code'] =
|
||||||
$this->info['samp'] =
|
$this->info['samp'] =
|
||||||
$this->info['kbd'] =
|
$this->info['kbd'] =
|
||||||
$this->info['var'] =
|
$this->info['var'] =
|
||||||
$this->info['code'] =
|
$this->info['code'] =
|
||||||
$this->info['samp'] =
|
$this->info['samp'] =
|
||||||
$this->info['kbd'] =
|
$this->info['kbd'] =
|
||||||
$this->info['var'] =
|
$this->info['var'] =
|
||||||
$this->info['cite'] =
|
$this->info['cite'] =
|
||||||
$this->info['abbr'] =
|
$this->info['abbr'] =
|
||||||
$this->info['acronym'] =
|
$this->info['acronym'] =
|
||||||
$this->info['q'] =
|
$this->info['q'] =
|
||||||
$this->info['sub'] =
|
$this->info['sub'] =
|
||||||
$this->info['tt'] =
|
$this->info['tt'] =
|
||||||
$this->info['sup'] =
|
$this->info['sup'] =
|
||||||
$this->info['i'] =
|
$this->info['i'] =
|
||||||
$this->info['b'] =
|
$this->info['b'] =
|
||||||
$this->info['big'] =
|
$this->info['big'] =
|
||||||
$this->info['small'] =
|
$this->info['small'] =
|
||||||
$this->info['u'] =
|
$this->info['u'] =
|
||||||
$this->info['s'] =
|
$this->info['s'] =
|
||||||
$this->info['strike'] =
|
$this->info['strike'] =
|
||||||
$this->info['bdo'] =
|
$this->info['bdo'] =
|
||||||
$this->info['span'] =
|
$this->info['span'] =
|
||||||
$this->info['dt'] =
|
$this->info['dt'] =
|
||||||
$this->info['p'] =
|
$this->info['p'] =
|
||||||
$this->info['h1'] =
|
$this->info['h1'] =
|
||||||
$this->info['h2'] =
|
$this->info['h2'] =
|
||||||
$this->info['h3'] =
|
$this->info['h3'] =
|
||||||
$this->info['h4'] =
|
$this->info['h4'] =
|
||||||
$this->info['h5'] =
|
$this->info['h5'] =
|
||||||
$this->info['h6'] = new HTMLPurifier_ElementDef($e_Inline);
|
$this->info['h6'] = new HTMLPurifier_ElementDef($e_Inline);
|
||||||
|
|
||||||
$this->info['ol'] =
|
$this->info['ol'] =
|
||||||
$this->info['ul'] =
|
$this->info['ul'] =
|
||||||
new HTMLPurifier_ElementDef(
|
new HTMLPurifier_ElementDef(
|
||||||
new HTMLPurifier_ChildDef_Required('li')
|
new HTMLPurifier_ChildDef_Required('li')
|
||||||
);
|
);
|
||||||
|
|
||||||
$this->info['dl'] =
|
$this->info['dl'] =
|
||||||
new HTMLPurifier_ElementDef(
|
new HTMLPurifier_ElementDef(
|
||||||
new HTMLPurifier_ChildDef_Required('dt|dd')
|
new HTMLPurifier_ChildDef_Required('dt|dd')
|
||||||
);
|
);
|
||||||
$this->info['address'] =
|
$this->info['address'] =
|
||||||
new HTMLPurifier_ElementDef(
|
new HTMLPurifier_ElementDef(
|
||||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||||
" | $e_misc_inline")
|
" | $e_misc_inline")
|
||||||
);
|
);
|
||||||
|
|
||||||
$this->info['img'] =
|
$this->info['img'] =
|
||||||
$this->info['br'] =
|
$this->info['br'] =
|
||||||
$this->info['hr'] = new HTMLPurifier_ElementDef(new HTMLPurifier_ChildDef_Empty());
|
$this->info['hr'] = new HTMLPurifier_ElementDef(new HTMLPurifier_ChildDef_Empty());
|
||||||
|
|
||||||
$this->info['pre'] = new HTMLPurifier_ElementDef($e_pre_content);
|
$this->info['pre'] = new HTMLPurifier_ElementDef($e_pre_content);
|
||||||
|
|
||||||
$this->info['a'] = new HTMLPurifier_ElementDef($e_a_content);
|
$this->info['a'] = new HTMLPurifier_ElementDef($e_a_content);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function purifyTokens($tokens) {
|
function purifyTokens($tokens) {
|
||||||
if (empty($this->info)) $this->loadData();
|
if (empty($this->info)) $this->loadData();
|
||||||
$tokens = $this->removeForeignElements($tokens);
|
$tokens = $this->removeForeignElements($tokens);
|
||||||
$tokens = $this->makeWellFormed($tokens);
|
$tokens = $this->makeWellFormed($tokens);
|
||||||
$tokens = $this->fixNesting($tokens);
|
$tokens = $this->fixNesting($tokens);
|
||||||
$tokens = $this->validateAttributes($tokens);
|
$tokens = $this->validateAttributes($tokens);
|
||||||
return $tokens;
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeForeignElements($tokens) {
|
function removeForeignElements($tokens) {
|
||||||
if (empty($this->info)) $this->loadData();
|
if (empty($this->info)) $this->loadData();
|
||||||
$result = array();
|
$result = array();
|
||||||
foreach($tokens as $token) {
|
foreach($tokens as $token) {
|
||||||
if (!empty( $token->is_tag )) {
|
if (!empty( $token->is_tag )) {
|
||||||
if (!isset($this->info[$token->name])) {
|
if (!isset($this->info[$token->name])) {
|
||||||
// invalid tag, generate HTML and insert in
|
// invalid tag, generate HTML and insert in
|
||||||
$token = new HTMLPurifier_Token_Text(
|
$token = new HTMLPurifier_Token_Text(
|
||||||
$this->generator->generateFromToken($token)
|
$this->generator->generateFromToken($token)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} elseif ($token->type == 'comment') {
|
} elseif ($token->type == 'comment') {
|
||||||
// strip comments
|
// strip comments
|
||||||
continue;
|
continue;
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
}
|
}
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function makeWellFormed($tokens) {
|
function makeWellFormed($tokens) {
|
||||||
if (empty($this->info)) $this->loadData();
|
if (empty($this->info)) $this->loadData();
|
||||||
$result = array();
|
$result = array();
|
||||||
$current_nesting = array();
|
$current_nesting = array();
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
if (empty( $token->is_tag )) {
|
if (empty( $token->is_tag )) {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$info = $this->info[$token->name]; // assumption but valid
|
$info = $this->info[$token->name]; // assumption but valid
|
||||||
|
|
||||||
// test if it claims to be a start tag but is empty
|
// test if it claims to be a start tag but is empty
|
||||||
if ($info->child_def->type == 'empty' &&
|
if ($info->child_def->type == 'empty' &&
|
||||||
$token->type == 'start' ) {
|
$token->type == 'start' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||||
$token->attributes);
|
$token->attributes);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// test if it claims to be empty but really is a start tag
|
// test if it claims to be empty but really is a start tag
|
||||||
if ($info->child_def->type != 'empty' &&
|
if ($info->child_def->type != 'empty' &&
|
||||||
$token->type == 'empty' ) {
|
$token->type == 'empty' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||||
$token->attributes);
|
$token->attributes);
|
||||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// automatically insert empty tags
|
// automatically insert empty tags
|
||||||
if ($token->type == 'empty') {
|
if ($token->type == 'empty') {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we give start tags precedence, so automatically accept unless...
|
// we give start tags precedence, so automatically accept unless...
|
||||||
// it's one of those special cases
|
// it's one of those special cases
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
|
|
||||||
// if there's a parent, check for special case
|
// if there's a parent, check for special case
|
||||||
if (!empty($current_nesting)) {
|
if (!empty($current_nesting)) {
|
||||||
$current_parent = array_pop($current_nesting);
|
$current_parent = array_pop($current_nesting);
|
||||||
|
|
||||||
// check if we're closing a P tag
|
// check if we're closing a P tag
|
||||||
if ($current_parent->name == 'p' &&
|
if ($current_parent->name == 'p' &&
|
||||||
isset($this->info_closes_p[$token->name])
|
isset($this->info_closes_p[$token->name])
|
||||||
) {
|
) {
|
||||||
$result[] = new HTMLPurifier_Token_End('p');
|
$result[] = new HTMLPurifier_Token_End('p');
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
$current_nesting[] = $token;
|
$current_nesting[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we're closing a LI tag
|
// check if we're closing a LI tag
|
||||||
if ($current_parent->name == 'li' &&
|
if ($current_parent->name == 'li' &&
|
||||||
$token->name == 'li'
|
$token->name == 'li'
|
||||||
) {
|
) {
|
||||||
$result[] = new HTMLPurifier_Token_End('li');
|
$result[] = new HTMLPurifier_Token_End('li');
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
$current_nesting[] = $token;
|
$current_nesting[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is more TIDY stuff
|
// this is more TIDY stuff
|
||||||
// we should also get some TABLE related code
|
// we should also get some TABLE related code
|
||||||
// mismatched h#
|
// mismatched h#
|
||||||
|
|
||||||
$current_nesting[] = $current_parent; // undo the pop
|
$current_nesting[] = $current_parent; // undo the pop
|
||||||
}
|
}
|
||||||
|
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
$current_nesting[] = $token;
|
$current_nesting[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanity check
|
// sanity check
|
||||||
if ($token->type != 'end') continue;
|
if ($token->type != 'end') continue;
|
||||||
|
|
||||||
// okay, we're dealing with a closing tag
|
// okay, we're dealing with a closing tag
|
||||||
|
|
||||||
// make sure that we have something open
|
// make sure that we have something open
|
||||||
if (empty($current_nesting)) {
|
if (empty($current_nesting)) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
$this->generator->generateFromToken($token)
|
$this->generator->generateFromToken($token)
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// first, check for the simplest case: everything closes neatly
|
// first, check for the simplest case: everything closes neatly
|
||||||
|
|
||||||
// current_nesting is modified
|
// current_nesting is modified
|
||||||
$current_parent = array_pop($current_nesting);
|
$current_parent = array_pop($current_nesting);
|
||||||
if ($current_parent->name == $token->name) {
|
if ($current_parent->name == $token->name) {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// undo the array_pop
|
// undo the array_pop
|
||||||
$current_nesting[] = $current_parent;
|
$current_nesting[] = $current_parent;
|
||||||
|
|
||||||
// okay, so we're trying to close the wrong tag
|
// okay, so we're trying to close the wrong tag
|
||||||
|
|
||||||
// scroll back the entire nest, trying to find our tag
|
// scroll back the entire nest, trying to find our tag
|
||||||
// feature could be to specify how far you'd like to go
|
// feature could be to specify how far you'd like to go
|
||||||
$size = count($current_nesting);
|
$size = count($current_nesting);
|
||||||
// -2 because -1 is the last element, but we already checked that
|
// -2 because -1 is the last element, but we already checked that
|
||||||
$skipped_tags = false;
|
$skipped_tags = false;
|
||||||
for ($i = $size - 2; $i >= 0; $i--) {
|
for ($i = $size - 2; $i >= 0; $i--) {
|
||||||
if ($current_nesting[$i]->name == $token->name) {
|
if ($current_nesting[$i]->name == $token->name) {
|
||||||
// current nesting is modified
|
// current nesting is modified
|
||||||
$skipped_tags = array_splice($current_nesting, $i);
|
$skipped_tags = array_splice($current_nesting, $i);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// we still didn't find the tag, so translate to text
|
// we still didn't find the tag, so translate to text
|
||||||
if ($skipped_tags === false) {
|
if ($skipped_tags === false) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
$this->generator->generateFromToken($token)
|
$this->generator->generateFromToken($token)
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// okay, we found it, close all the skipped tags
|
// okay, we found it, close all the skipped tags
|
||||||
// note that skipped tags contains the element we need closed
|
// note that skipped tags contains the element we need closed
|
||||||
$size = count($skipped_tags);
|
$size = count($skipped_tags);
|
||||||
for ($i = $size - 1; $i >= 0; $i--) {
|
for ($i = $size - 1; $i >= 0; $i--) {
|
||||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
// done!
|
// done!
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// we're at the end now, fix all still unclosed tags
|
// we're at the end now, fix all still unclosed tags
|
||||||
|
|
||||||
if (!empty($current_nesting)) {
|
if (!empty($current_nesting)) {
|
||||||
$size = count($current_nesting);
|
$size = count($current_nesting);
|
||||||
for ($i = $size - 1; $i >= 0; $i--) {
|
for ($i = $size - 1; $i >= 0; $i--) {
|
||||||
$result[] =
|
$result[] =
|
||||||
new HTMLPurifier_Token_End($current_nesting[$i]->name);
|
new HTMLPurifier_Token_End($current_nesting[$i]->name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function fixNesting($tokens) {
|
function fixNesting($tokens) {
|
||||||
if (empty($this->info)) $this->loadData();
|
if (empty($this->info)) $this->loadData();
|
||||||
|
|
||||||
// insert implicit "parent" node, will be removed at end
|
// insert implicit "parent" node, will be removed at end
|
||||||
array_unshift($tokens, new HTMLPurifier_Token_Start('div'));
|
array_unshift($tokens, new HTMLPurifier_Token_Start('div'));
|
||||||
$tokens[] = new HTMLPurifier_Token_End('div');
|
$tokens[] = new HTMLPurifier_Token_End('div');
|
||||||
|
|
||||||
for ($i = 0, $size = count($tokens) ; $i < $size; ) {
|
for ($i = 0, $size = count($tokens) ; $i < $size; ) {
|
||||||
|
|
||||||
$child_tokens = array();
|
$child_tokens = array();
|
||||||
|
|
||||||
// scroll to the end of this node, and report number
|
// scroll to the end of this node, and report number
|
||||||
for ($j = $i, $depth = 0; ; $j++) {
|
for ($j = $i, $depth = 0; ; $j++) {
|
||||||
if ($tokens[$j]->type == 'start') {
|
if ($tokens[$j]->type == 'start') {
|
||||||
$depth++;
|
$depth++;
|
||||||
// skip token assignment on first iteration
|
// skip token assignment on first iteration
|
||||||
if ($depth == 1) continue;
|
if ($depth == 1) continue;
|
||||||
} elseif ($tokens[$j]->type == 'end') {
|
} elseif ($tokens[$j]->type == 'end') {
|
||||||
$depth--;
|
$depth--;
|
||||||
// skip token assignment on last iteration
|
// skip token assignment on last iteration
|
||||||
if ($depth == 0) break;
|
if ($depth == 0) break;
|
||||||
}
|
}
|
||||||
$child_tokens[] = $tokens[$j];
|
$child_tokens[] = $tokens[$j];
|
||||||
}
|
}
|
||||||
|
|
||||||
// $i is index of start token
|
// $i is index of start token
|
||||||
// $j is index of end token
|
// $j is index of end token
|
||||||
|
|
||||||
// have DTD child def validate children
|
// have DTD child def validate children
|
||||||
$element_def = $this->info[$tokens[$i]->name];
|
$element_def = $this->info[$tokens[$i]->name];
|
||||||
$result = $element_def->child_def->validateChildren($child_tokens);
|
$result = $element_def->child_def->validateChildren($child_tokens);
|
||||||
|
|
||||||
// process result
|
// process result
|
||||||
if ($result === true) {
|
if ($result === true) {
|
||||||
|
|
||||||
// leave the nodes as is
|
// leave the nodes as is
|
||||||
|
|
||||||
} elseif($result === false) {
|
} elseif($result === false) {
|
||||||
|
|
||||||
// WARNING WARNING WARNING!!!
|
// WARNING WARNING WARNING!!!
|
||||||
// While for the original DTD, there will never be
|
// While for the original DTD, there will never be
|
||||||
// cascading removal, more complex ones may have such
|
// cascading removal, more complex ones may have such
|
||||||
// a problem.
|
// a problem.
|
||||||
|
|
||||||
// If you modify the info array such that an element
|
// If you modify the info array such that an element
|
||||||
// that requires children may contain a child that requires
|
// that requires children may contain a child that requires
|
||||||
// children, you need to also scroll back and re-check that
|
// children, you need to also scroll back and re-check that
|
||||||
// elements parent node
|
// elements parent node
|
||||||
|
|
||||||
$length = $j - $i + 1;
|
$length = $j - $i + 1;
|
||||||
|
|
||||||
// remove entire node
|
// remove entire node
|
||||||
array_splice($tokens, $i, $length);
|
array_splice($tokens, $i, $length);
|
||||||
|
|
||||||
// change size
|
// change size
|
||||||
$size -= $length;
|
$size -= $length;
|
||||||
|
|
||||||
// ensure that we scroll to the next node
|
// ensure that we scroll to the next node
|
||||||
$i--;
|
$i--;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$length = $j - $i - 1;
|
$length = $j - $i - 1;
|
||||||
|
|
||||||
// replace node with $result
|
// replace node with $result
|
||||||
array_splice($tokens, $i + 1, $length, $result);
|
array_splice($tokens, $i + 1, $length, $result);
|
||||||
|
|
||||||
// change size
|
// change size
|
||||||
$size -= $length;
|
$size -= $length;
|
||||||
$size += count($result);
|
$size += count($result);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// scroll to next node
|
// scroll to next node
|
||||||
$i++;
|
$i++;
|
||||||
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
while ($i < $size and $tokens[$i]->type != 'start') $i++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove implicit divs
|
// remove implicit divs
|
||||||
array_shift($tokens);
|
array_shift($tokens);
|
||||||
array_pop($tokens);
|
array_pop($tokens);
|
||||||
|
|
||||||
return $tokens;
|
return $tokens;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function validateAttributes($tokens) {
|
function validateAttributes($tokens) {
|
||||||
if (empty($this->info)) $this->loadData();
|
if (empty($this->info)) $this->loadData();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HTMLPurifier_ElementDef
|
class HTMLPurifier_ElementDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $child_def;
|
var $child_def;
|
||||||
var $attr_def = array();
|
var $attr_def = array();
|
||||||
|
|
||||||
function HTMLPurifier_ElementDef($child_def, $attr_def = array()) {
|
function HTMLPurifier_ElementDef($child_def, $attr_def = array()) {
|
||||||
$this->child_def = $child_def;
|
$this->child_def = $child_def;
|
||||||
$this->attr_def = $attr_def;
|
$this->attr_def = $attr_def;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,45 +1,45 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
class HTMLPurifier_Generator
|
class HTMLPurifier_Generator
|
||||||
{
|
{
|
||||||
|
|
||||||
function generateFromTokens($tokens) {
|
function generateFromTokens($tokens) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
}
|
}
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateFromToken($token) {
|
function generateFromToken($token) {
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'end') {
|
} elseif ($token->type == 'end') {
|
||||||
return '</' . $token->name . '>';
|
return '</' . $token->name . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||||
|
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return '';
|
return '';
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateAttributes($assoc_array_of_attributes) {
|
function generateAttributes($assoc_array_of_attributes) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||||
$html .= $key.'="'.htmlentities($value, ENT_COMPAT, 'UTF-8').'" ';
|
$html .= $key.'="'.htmlentities($value, ENT_COMPAT, 'UTF-8').'" ';
|
||||||
}
|
}
|
||||||
return rtrim($html);
|
return rtrim($html);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,354 +1,354 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
* Reread the XML spec and make sure I got everything right
|
* Reread the XML spec and make sure I got everything right
|
||||||
* Add support for CDATA sections
|
* Add support for CDATA sections
|
||||||
* Have comments output with the leading and trailing --s
|
* Have comments output with the leading and trailing --s
|
||||||
* Optimize and benchmark
|
* Optimize and benchmark
|
||||||
* Check MF_Text behavior: shouldn't the info in there be raw (entities parsed?)
|
* Check MF_Text behavior: shouldn't the info in there be raw (entities parsed?)
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
// does this version of PHP support utf8 as entity function charset?
|
// does this version of PHP support utf8 as entity function charset?
|
||||||
var $_entity_utf8;
|
var $_entity_utf8;
|
||||||
|
|
||||||
function HTMLPurifier_Lexer() {
|
function HTMLPurifier_Lexer() {
|
||||||
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
|
$this->_entity_utf8 = version_compare(PHP_VERSION, '5', '>=');
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is QUITE a knotty problem
|
// this is QUITE a knotty problem
|
||||||
//
|
//
|
||||||
// The main trouble is that, even while assuming UTF-8 is what we're
|
// The main trouble is that, even while assuming UTF-8 is what we're
|
||||||
// using, we've got to deal with HTML entities (like —)
|
// using, we've got to deal with HTML entities (like —)
|
||||||
// Not even sure if the PHP 5 decoding function does that. Plus,
|
// Not even sure if the PHP 5 decoding function does that. Plus,
|
||||||
// SimpleTest doesn't use UTF-8!
|
// SimpleTest doesn't use UTF-8!
|
||||||
//
|
//
|
||||||
// However, we MUST parse everything possible, because once you get
|
// However, we MUST parse everything possible, because once you get
|
||||||
// to the HTML generator, it will escape everything possible (although
|
// to the HTML generator, it will escape everything possible (although
|
||||||
// that may not be correct, and we should be using htmlspecialchars() ).
|
// that may not be correct, and we should be using htmlspecialchars() ).
|
||||||
//
|
//
|
||||||
// Nevertheless, strictly XML speaking, we cannot assume any character
|
// Nevertheless, strictly XML speaking, we cannot assume any character
|
||||||
// entities are defined except the htmlspecialchars() ones, so leaving
|
// entities are defined except the htmlspecialchars() ones, so leaving
|
||||||
// the entities inside HERE is not acceptable. (plus, htmlspecialchars
|
// the entities inside HERE is not acceptable. (plus, htmlspecialchars
|
||||||
// might convert them anyway). So EVERYTHING must get parsed.
|
// might convert them anyway). So EVERYTHING must get parsed.
|
||||||
//
|
//
|
||||||
// We may need to roll our own character entity lookup table. It's only
|
// We may need to roll our own character entity lookup table. It's only
|
||||||
// about 250, fortunantely, the decimal/hex ones map cleanly to UTF-8.
|
// about 250, fortunantely, the decimal/hex ones map cleanly to UTF-8.
|
||||||
function parseData($string) {
|
function parseData($string) {
|
||||||
// we may want to let the user do a different char encoding,
|
// we may want to let the user do a different char encoding,
|
||||||
// although there is NO REASON why they shouldn't be able
|
// although there is NO REASON why they shouldn't be able
|
||||||
// to convert it to UTF-8 before they pass it to us
|
// to convert it to UTF-8 before they pass it to us
|
||||||
|
|
||||||
// no support for less than PHP 4.3
|
// no support for less than PHP 4.3
|
||||||
if ($this->_entity_utf8) {
|
if ($this->_entity_utf8) {
|
||||||
// PHP 5+, UTF-8 is nicely supported
|
// PHP 5+, UTF-8 is nicely supported
|
||||||
return @html_entity_decode($string, ENT_QUOTES, 'UTF-8');
|
return @html_entity_decode($string, ENT_QUOTES, 'UTF-8');
|
||||||
} else {
|
} else {
|
||||||
// PHP 4, do compat stuff
|
// PHP 4, do compat stuff
|
||||||
$string = html_entity_decode($string, ENT_QUOTES, 'ISO-8859-1');
|
$string = html_entity_decode($string, ENT_QUOTES, 'ISO-8859-1');
|
||||||
// get the numeric UTF-8 stuff
|
// get the numeric UTF-8 stuff
|
||||||
$string = preg_replace('/&#(\d+);/me', "chr(\\1)", $string);
|
$string = preg_replace('/&#(\d+);/me', "chr(\\1)", $string);
|
||||||
$string = preg_replace('/&#x([a-f0-9]+);/mei',"chr(0x\\1)",$string);
|
$string = preg_replace('/&#x([a-f0-9]+);/mei',"chr(0x\\1)",$string);
|
||||||
// get the stringy UTF-8 stuff
|
// get the stringy UTF-8 stuff
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function nextQuote($string, $offset = 0) {
|
function nextQuote($string, $offset = 0) {
|
||||||
$next = strcspn($string, '"\'', $offset) + $offset;
|
$next = strcspn($string, '"\'', $offset) + $offset;
|
||||||
return strlen($string) == $next ? false : $next;
|
return strlen($string) == $next ? false : $next;
|
||||||
}
|
}
|
||||||
|
|
||||||
function nextWhiteSpace($string, $offset = 0) {
|
function nextWhiteSpace($string, $offset = 0) {
|
||||||
$next = strcspn($string, "\x20\x09\x0D\x0A", $offset) + $offset;
|
$next = strcspn($string, "\x20\x09\x0D\x0A", $offset) + $offset;
|
||||||
return strlen($string) == $next ? false : $next;
|
return strlen($string) == $next ? false : $next;
|
||||||
}
|
}
|
||||||
|
|
||||||
function tokenizeHTML($string) {
|
function tokenizeHTML($string) {
|
||||||
|
|
||||||
// some quick checking (if empty, return empty)
|
// some quick checking (if empty, return empty)
|
||||||
$string = @ (string) $string;
|
$string = @ (string) $string;
|
||||||
if ($string == '') return array();
|
if ($string == '') return array();
|
||||||
|
|
||||||
$cursor = 0; // our location in the text
|
$cursor = 0; // our location in the text
|
||||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||||
$array = array(); // result array
|
$array = array(); // result array
|
||||||
|
|
||||||
// infinite loop protection
|
// infinite loop protection
|
||||||
// has to be pretty big, since html docs can be big
|
// has to be pretty big, since html docs can be big
|
||||||
// we're allow two hundred thousand tags... more than enough?
|
// we're allow two hundred thousand tags... more than enough?
|
||||||
$loops = 0;
|
$loops = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
|
|
||||||
// infinite loop protection
|
// infinite loop protection
|
||||||
if (++$loops > 200000) return array();
|
if (++$loops > 200000) return array();
|
||||||
|
|
||||||
$position_next_lt = strpos($string, '<', $cursor);
|
$position_next_lt = strpos($string, '<', $cursor);
|
||||||
$position_next_gt = strpos($string, '>', $cursor);
|
$position_next_gt = strpos($string, '>', $cursor);
|
||||||
|
|
||||||
// triggers on "<b>asdf</b>" but not "asdf <b></b>"
|
// triggers on "<b>asdf</b>" but not "asdf <b></b>"
|
||||||
if ($position_next_lt === $cursor) {
|
if ($position_next_lt === $cursor) {
|
||||||
$inside_tag = true;
|
$inside_tag = true;
|
||||||
$cursor++;
|
$cursor++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$inside_tag && $position_next_lt !== false) {
|
if (!$inside_tag && $position_next_lt !== false) {
|
||||||
// We are not inside tag and there still is another tag to parse
|
// We are not inside tag and there still is another tag to parse
|
||||||
$array[] = new
|
$array[] = new
|
||||||
HTMLPurifier_Token_Text(
|
HTMLPurifier_Token_Text(
|
||||||
html_entity_decode(
|
html_entity_decode(
|
||||||
substr(
|
substr(
|
||||||
$string, $cursor, $position_next_lt - $cursor
|
$string, $cursor, $position_next_lt - $cursor
|
||||||
),
|
),
|
||||||
ENT_QUOTES
|
ENT_QUOTES
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
$cursor = $position_next_lt + 1;
|
$cursor = $position_next_lt + 1;
|
||||||
$inside_tag = true;
|
$inside_tag = true;
|
||||||
continue;
|
continue;
|
||||||
} elseif (!$inside_tag) {
|
} elseif (!$inside_tag) {
|
||||||
// We are not inside tag but there are no more tags
|
// We are not inside tag but there are no more tags
|
||||||
// If we're already at the end, break
|
// If we're already at the end, break
|
||||||
if ($cursor === strlen($string)) break;
|
if ($cursor === strlen($string)) break;
|
||||||
// Create Text of rest of string
|
// Create Text of rest of string
|
||||||
$array[] = new
|
$array[] = new
|
||||||
HTMLPurifier_Token_Text(
|
HTMLPurifier_Token_Text(
|
||||||
html_entity_decode(
|
html_entity_decode(
|
||||||
substr(
|
substr(
|
||||||
$string, $cursor
|
$string, $cursor
|
||||||
),
|
),
|
||||||
ENT_QUOTES
|
ENT_QUOTES
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
} elseif ($inside_tag && $position_next_gt !== false) {
|
} elseif ($inside_tag && $position_next_gt !== false) {
|
||||||
// We are in tag and it is well formed
|
// We are in tag and it is well formed
|
||||||
// Grab the internals of the tag
|
// Grab the internals of the tag
|
||||||
$segment = substr($string, $cursor, $position_next_gt-$cursor);
|
$segment = substr($string, $cursor, $position_next_gt-$cursor);
|
||||||
|
|
||||||
// Check if it's a comment
|
// Check if it's a comment
|
||||||
if (
|
if (
|
||||||
substr($segment,0,3) == '!--' &&
|
substr($segment,0,3) == '!--' &&
|
||||||
substr($segment,strlen($segment)-2,2) == '--'
|
substr($segment,strlen($segment)-2,2) == '--'
|
||||||
) {
|
) {
|
||||||
$array[] = new
|
$array[] = new
|
||||||
HTMLPurifier_Token_Comment(
|
HTMLPurifier_Token_Comment(
|
||||||
substr(
|
substr(
|
||||||
$segment, 3, strlen($segment) - 5
|
$segment, 3, strlen($segment) - 5
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if it's an end tag
|
// Check if it's an end tag
|
||||||
$is_end_tag = (strpos($segment,'/') === 0);
|
$is_end_tag = (strpos($segment,'/') === 0);
|
||||||
if ($is_end_tag) {
|
if ($is_end_tag) {
|
||||||
$type = substr($segment, 1);
|
$type = substr($segment, 1);
|
||||||
$array[] = new HTMLPurifier_Token_End($type);
|
$array[] = new HTMLPurifier_Token_End($type);
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if it is explicitly self closing, if so, remove
|
// Check if it is explicitly self closing, if so, remove
|
||||||
// trailing slash. Remember, we could have a tag like <br>, so
|
// trailing slash. Remember, we could have a tag like <br>, so
|
||||||
// any later token processing scripts must convert improperly
|
// any later token processing scripts must convert improperly
|
||||||
// classified EmptyTags from StartTags.
|
// classified EmptyTags from StartTags.
|
||||||
$is_self_closing= (strpos($segment,'/') === strlen($segment)-1);
|
$is_self_closing= (strpos($segment,'/') === strlen($segment)-1);
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$segment = substr($segment, 0, strlen($segment) - 1);
|
$segment = substr($segment, 0, strlen($segment) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if there are any attributes
|
// Check if there are any attributes
|
||||||
$position_first_space = $this->nextWhiteSpace($segment);
|
$position_first_space = $this->nextWhiteSpace($segment);
|
||||||
if ($position_first_space === false) {
|
if ($position_first_space === false) {
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$array[] = new HTMLPurifier_Token_Empty($segment);
|
$array[] = new HTMLPurifier_Token_Empty($segment);
|
||||||
} else {
|
} else {
|
||||||
$array[] = new HTMLPurifier_Token_Start($segment);
|
$array[] = new HTMLPurifier_Token_Start($segment);
|
||||||
}
|
}
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab out all the data
|
// Grab out all the data
|
||||||
$type = substr($segment, 0, $position_first_space);
|
$type = substr($segment, 0, $position_first_space);
|
||||||
$attribute_string =
|
$attribute_string =
|
||||||
trim(
|
trim(
|
||||||
substr(
|
substr(
|
||||||
$segment, $position_first_space
|
$segment, $position_first_space
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
if ($attribute_string) {
|
if ($attribute_string) {
|
||||||
$attributes = $this->tokenizeAttributeString(
|
$attributes = $this->tokenizeAttributeString(
|
||||||
$attribute_string
|
$attribute_string
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
$attributes = array();
|
$attributes = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
|
||||||
} else {
|
} else {
|
||||||
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
|
||||||
}
|
}
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
$array[] = new
|
$array[] = new
|
||||||
HTMLPurifier_Token_Text(
|
HTMLPurifier_Token_Text(
|
||||||
'<' .
|
'<' .
|
||||||
html_entity_decode(
|
html_entity_decode(
|
||||||
substr($string, $cursor),
|
substr($string, $cursor),
|
||||||
ENT_QUOTES
|
ENT_QUOTES
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return $array;
|
return $array;
|
||||||
}
|
}
|
||||||
|
|
||||||
function tokenizeAttributeString($string) {
|
function tokenizeAttributeString($string) {
|
||||||
$string = (string) $string; // quick typecast
|
$string = (string) $string; // quick typecast
|
||||||
|
|
||||||
if ($string == '') return array(); // no attributes
|
if ($string == '') return array(); // no attributes
|
||||||
|
|
||||||
// let's see if we can abort as quickly as possible
|
// let's see if we can abort as quickly as possible
|
||||||
// one equal sign, no spaces => one attribute
|
// one equal sign, no spaces => one attribute
|
||||||
$num_equal = substr_count($string, '=');
|
$num_equal = substr_count($string, '=');
|
||||||
$has_space = strpos($string, ' ');
|
$has_space = strpos($string, ' ');
|
||||||
if ($num_equal === 0 && !$has_space) {
|
if ($num_equal === 0 && !$has_space) {
|
||||||
// bool attribute
|
// bool attribute
|
||||||
return array($string => $string);
|
return array($string => $string);
|
||||||
} elseif ($num_equal === 1 && !$has_space) {
|
} elseif ($num_equal === 1 && !$has_space) {
|
||||||
// only one attribute
|
// only one attribute
|
||||||
list($key, $quoted_value) = explode('=', $string);
|
list($key, $quoted_value) = explode('=', $string);
|
||||||
$quoted_value = trim($quoted_value);
|
$quoted_value = trim($quoted_value);
|
||||||
if (!$key) return array();
|
if (!$key) return array();
|
||||||
if (!$quoted_value) return array($key => '');
|
if (!$quoted_value) return array($key => '');
|
||||||
$first_char = @$quoted_value[0];
|
$first_char = @$quoted_value[0];
|
||||||
$last_char = @$quoted_value[strlen($quoted_value)-1];
|
$last_char = @$quoted_value[strlen($quoted_value)-1];
|
||||||
|
|
||||||
$same_quote = ($first_char == $last_char);
|
$same_quote = ($first_char == $last_char);
|
||||||
$open_quote = ($first_char == '"' || $first_char == "'");
|
$open_quote = ($first_char == '"' || $first_char == "'");
|
||||||
|
|
||||||
if ( $same_quote && $open_quote) {
|
if ( $same_quote && $open_quote) {
|
||||||
// well behaved
|
// well behaved
|
||||||
$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
|
$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
|
||||||
} else {
|
} else {
|
||||||
// not well behaved
|
// not well behaved
|
||||||
if ($open_quote) {
|
if ($open_quote) {
|
||||||
$value = substr($quoted_value, 1);
|
$value = substr($quoted_value, 1);
|
||||||
} else {
|
} else {
|
||||||
$value = $quoted_value;
|
$value = $quoted_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return array($key => $value);
|
return array($key => $value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// setup loop environment
|
// setup loop environment
|
||||||
$array = array(); // return assoc array of attributes
|
$array = array(); // return assoc array of attributes
|
||||||
$cursor = 0; // current position in string (moves forward)
|
$cursor = 0; // current position in string (moves forward)
|
||||||
$size = strlen($string); // size of the string (stays the same)
|
$size = strlen($string); // size of the string (stays the same)
|
||||||
|
|
||||||
// if we have unquoted attributes, the parser expects a terminating
|
// if we have unquoted attributes, the parser expects a terminating
|
||||||
// space, so let's guarantee that there's always a terminating space.
|
// space, so let's guarantee that there's always a terminating space.
|
||||||
$string .= ' ';
|
$string .= ' ';
|
||||||
|
|
||||||
// infinite loop protection
|
// infinite loop protection
|
||||||
$loops = 0;
|
$loops = 0;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
|
|
||||||
// infinite loop protection
|
// infinite loop protection
|
||||||
if (++$loops > 1000) return array();
|
if (++$loops > 1000) return array();
|
||||||
|
|
||||||
if ($cursor >= $size) {
|
if ($cursor >= $size) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$cursor += ($value = strspn($string, "\x20\x09\x0D\x0A", $cursor));
|
$cursor += ($value = strspn($string, "\x20\x09\x0D\x0A", $cursor));
|
||||||
|
|
||||||
$position_next_space = $this->nextWhiteSpace($string, $cursor);
|
$position_next_space = $this->nextWhiteSpace($string, $cursor);
|
||||||
$position_next_equal = strpos($string, '=', $cursor);
|
$position_next_equal = strpos($string, '=', $cursor);
|
||||||
|
|
||||||
// grab the key
|
// grab the key
|
||||||
|
|
||||||
$key_begin = $cursor; //we're currently at the start of the key
|
$key_begin = $cursor; //we're currently at the start of the key
|
||||||
|
|
||||||
// scroll past all characters that are the key (not whitespace or =)
|
// scroll past all characters that are the key (not whitespace or =)
|
||||||
$cursor += strcspn($string, "\x20\x09\x0D\x0A=", $cursor);
|
$cursor += strcspn($string, "\x20\x09\x0D\x0A=", $cursor);
|
||||||
|
|
||||||
$key_end = $cursor; // now at the end of the key
|
$key_end = $cursor; // now at the end of the key
|
||||||
|
|
||||||
$key = substr($string, $key_begin, $key_end - $key_begin);
|
$key = substr($string, $key_begin, $key_end - $key_begin);
|
||||||
|
|
||||||
if (!$key) continue; // empty key
|
if (!$key) continue; // empty key
|
||||||
|
|
||||||
// scroll past all whitespace
|
// scroll past all whitespace
|
||||||
$cursor += strspn($string, "\x20\x09\x0D\x0A", $cursor);
|
$cursor += strspn($string, "\x20\x09\x0D\x0A", $cursor);
|
||||||
|
|
||||||
if ($cursor >= $size) {
|
if ($cursor >= $size) {
|
||||||
$array[$key] = $key;
|
$array[$key] = $key;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the next character is an equal sign, we've got a regular
|
// if the next character is an equal sign, we've got a regular
|
||||||
// pair, otherwise, it's a bool attribute
|
// pair, otherwise, it's a bool attribute
|
||||||
$first_char = @$string[$cursor];
|
$first_char = @$string[$cursor];
|
||||||
|
|
||||||
if ($first_char == '=') {
|
if ($first_char == '=') {
|
||||||
// key="value"
|
// key="value"
|
||||||
|
|
||||||
$cursor++;
|
$cursor++;
|
||||||
$cursor += strspn($string, "\x20\x09\x0D\x0A", $cursor);
|
$cursor += strspn($string, "\x20\x09\x0D\x0A", $cursor);
|
||||||
|
|
||||||
// we might be in front of a quote right now
|
// we might be in front of a quote right now
|
||||||
|
|
||||||
$char = @$string[$cursor];
|
$char = @$string[$cursor];
|
||||||
|
|
||||||
if ($char == '"' || $char == "'") {
|
if ($char == '"' || $char == "'") {
|
||||||
// it's quoted, end bound is $char
|
// it's quoted, end bound is $char
|
||||||
$cursor++;
|
$cursor++;
|
||||||
$value_begin = $cursor;
|
$value_begin = $cursor;
|
||||||
$cursor = strpos($string, $char, $cursor);
|
$cursor = strpos($string, $char, $cursor);
|
||||||
$value_end = $cursor;
|
$value_end = $cursor;
|
||||||
} else {
|
} else {
|
||||||
// it's not quoted, end bound is whitespace
|
// it's not quoted, end bound is whitespace
|
||||||
$value_begin = $cursor;
|
$value_begin = $cursor;
|
||||||
$cursor += strcspn($string, "\x20\x09\x0D\x0A", $cursor);
|
$cursor += strcspn($string, "\x20\x09\x0D\x0A", $cursor);
|
||||||
$value_end = $cursor;
|
$value_end = $cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
$value = substr($string, $value_begin, $value_end - $value_begin);
|
$value = substr($string, $value_begin, $value_end - $value_begin);
|
||||||
$array[$key] = $value;
|
$array[$key] = $value;
|
||||||
$cursor++;
|
$cursor++;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// boolattr
|
// boolattr
|
||||||
if ($key !== '') {
|
if ($key !== '') {
|
||||||
$array[$key] = $key;
|
$array[$key] = $key;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $array;
|
return $array;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,58 +1,58 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'XML/HTMLSax3.php'; // PEAR
|
require_once 'XML/HTMLSax3.php'; // PEAR
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
// uses the PEAR class XML_HTMLSax3 to parse XML
|
// uses the PEAR class XML_HTMLSax3 to parse XML
|
||||||
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
var $tokens;
|
var $tokens;
|
||||||
|
|
||||||
function tokenizeHTML($html) {
|
function tokenizeHTML($html) {
|
||||||
$this->tokens = array();
|
$this->tokens = array();
|
||||||
$parser=& new XML_HTMLSax3();
|
$parser=& new XML_HTMLSax3();
|
||||||
$parser->set_object($this);
|
$parser->set_object($this);
|
||||||
$parser->set_element_handler('openHandler','closeHandler');
|
$parser->set_element_handler('openHandler','closeHandler');
|
||||||
$parser->set_data_handler('dataHandler');
|
$parser->set_data_handler('dataHandler');
|
||||||
$parser->set_escape_handler('escapeHandler');
|
$parser->set_escape_handler('escapeHandler');
|
||||||
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
||||||
$parser->parse($html);
|
$parser->parse($html);
|
||||||
return $this->tokens;
|
return $this->tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
function openHandler(&$parser, $name, $attrs, $closed) {
|
function openHandler(&$parser, $name, $attrs, $closed) {
|
||||||
if ($closed) {
|
if ($closed) {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
||||||
} else {
|
} else {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
|
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeHandler(&$parser, $name) {
|
function closeHandler(&$parser, $name) {
|
||||||
// HTMLSax3 seems to always send empty tags an extra close tag
|
// HTMLSax3 seems to always send empty tags an extra close tag
|
||||||
// check and ignore if you see it:
|
// check and ignore if you see it:
|
||||||
// [TESTME] to make sure it doesn't overreach
|
// [TESTME] to make sure it doesn't overreach
|
||||||
if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
|
if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
$this->tokens[] = new HTMLPurifier_Token_End($name);
|
$this->tokens[] = new HTMLPurifier_Token_End($name);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function dataHandler(&$parser, $data) {
|
function dataHandler(&$parser, $data) {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Text($data);
|
$this->tokens[] = new HTMLPurifier_Token_Text($data);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeHandler(&$parser, $data) {
|
function escapeHandler(&$parser, $data) {
|
||||||
if (strpos($data, '-') === 0) {
|
if (strpos($data, '-') === 0) {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
|
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,60 +1,60 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// all objects here are immutable
|
// all objects here are immutable
|
||||||
|
|
||||||
class HTMLPurifier_Token {} // abstract
|
class HTMLPurifier_Token {} // abstract
|
||||||
|
|
||||||
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
||||||
{
|
{
|
||||||
var $is_tag = true;
|
var $is_tag = true;
|
||||||
var $name;
|
var $name;
|
||||||
var $attributes = array();
|
var $attributes = array();
|
||||||
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
||||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||||
$this->attributes = $attributes;
|
$this->attributes = $attributes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// start CONCRETE ones
|
// start CONCRETE ones
|
||||||
|
|
||||||
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
|
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
|
||||||
{
|
{
|
||||||
var $type = 'start';
|
var $type = 'start';
|
||||||
}
|
}
|
||||||
|
|
||||||
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
|
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
|
||||||
{
|
{
|
||||||
var $type = 'empty';
|
var $type = 'empty';
|
||||||
}
|
}
|
||||||
|
|
||||||
// accepts attributes even though it really can't, for optimization reasons
|
// accepts attributes even though it really can't, for optimization reasons
|
||||||
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
|
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
|
||||||
{
|
{
|
||||||
var $type = 'end';
|
var $type = 'end';
|
||||||
}
|
}
|
||||||
|
|
||||||
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
|
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
|
||||||
{
|
{
|
||||||
var $name = '#PCDATA';
|
var $name = '#PCDATA';
|
||||||
var $type = 'text';
|
var $type = 'text';
|
||||||
var $data;
|
var $data;
|
||||||
var $is_whitespace = false;
|
var $is_whitespace = false;
|
||||||
function HTMLPurifier_Token_Text($data) {
|
function HTMLPurifier_Token_Text($data) {
|
||||||
$this->data = $data;
|
$this->data = $data;
|
||||||
if (ctype_space($data)) $this->is_whitespace = true;
|
if (ctype_space($data)) $this->is_whitespace = true;
|
||||||
}
|
}
|
||||||
function append($text) {
|
function append($text) {
|
||||||
return new HTMLPurifier_Token_Text($this->data . $text->data);
|
return new HTMLPurifier_Token_Text($this->data . $text->data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
|
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
|
||||||
{
|
{
|
||||||
var $data;
|
var $data;
|
||||||
var $type = 'comment';
|
var $type = 'comment';
|
||||||
function HTMLPurifier_Token_Comment($data) {
|
function HTMLPurifier_Token_Comment($data) {
|
||||||
$this->data = $data;
|
$this->data = $data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,132 +1,132 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $lex;
|
var $lex;
|
||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function HTMLPurifier_ChildDefTest() {
|
function HTMLPurifier_ChildDefTest() {
|
||||||
$this->lex = HTMLPurifier_Lexer::create();
|
$this->lex = HTMLPurifier_Lexer::create();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
function assertSeries($inputs, $expect, $def) {
|
function assertSeries($inputs, $expect, $def) {
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$tokens = $this->lex->tokenizeHTML($input);
|
$tokens = $this->lex->tokenizeHTML($input);
|
||||||
$result = $def->validateChildren($tokens);
|
$result = $def->validateChildren($tokens);
|
||||||
if (is_bool($expect[$i])) {
|
if (is_bool($expect[$i])) {
|
||||||
$this->assertIdentical($expect[$i], $result);
|
$this->assertIdentical($expect[$i], $result);
|
||||||
} else {
|
} else {
|
||||||
$result_html = $this->gen->generateFromTokens($result);
|
$result_html = $this->gen->generateFromTokens($result);
|
||||||
$this->assertEqual($expect[$i], $result_html);
|
$this->assertEqual($expect[$i], $result_html);
|
||||||
paintIf($result_html, $result_html != $expect[$i]);
|
paintIf($result_html, $result_html != $expect[$i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_complex() {
|
function test_complex() {
|
||||||
|
|
||||||
// the table definition
|
// the table definition
|
||||||
$def = new HTMLPurifier_ChildDef(
|
$def = new HTMLPurifier_ChildDef(
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
||||||
|
|
||||||
$inputs[0] = '';
|
$inputs[0] = '';
|
||||||
$expect[0] = false;
|
$expect[0] = false;
|
||||||
|
|
||||||
// we really don't care what's inside, because if it turns out
|
// we really don't care what's inside, because if it turns out
|
||||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
// this tr is illegal, we'll end up re-evaluating the parent node
|
||||||
// anyway.
|
// anyway.
|
||||||
$inputs[1] = '<tr></tr>';
|
$inputs[1] = '<tr></tr>';
|
||||||
$expect[1] = true;
|
$expect[1] = true;
|
||||||
|
|
||||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
||||||
'<tfoot></tfoot><tbody></tbody>';
|
'<tfoot></tfoot><tbody></tbody>';
|
||||||
$expect[2] = true;
|
$expect[2] = true;
|
||||||
|
|
||||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
||||||
$expect[3] = true;
|
$expect[3] = true;
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $def);
|
$this->assertSeries($inputs, $expect, $def);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_simple() {
|
function test_simple() {
|
||||||
|
|
||||||
// simple is actually an abstract class
|
// simple is actually an abstract class
|
||||||
// but we're unit testing some of the conv. functions it gives
|
// but we're unit testing some of the conv. functions it gives
|
||||||
|
|
||||||
$def = new HTMLPurifier_ChildDef_Simple('foobar | bang |gizmo');
|
$def = new HTMLPurifier_ChildDef_Simple('foobar | bang |gizmo');
|
||||||
$this->assertEqual($def->elements,
|
$this->assertEqual($def->elements,
|
||||||
array(
|
array(
|
||||||
'foobar' => true
|
'foobar' => true
|
||||||
,'bang' => true
|
,'bang' => true
|
||||||
,'gizmo' => true
|
,'gizmo' => true
|
||||||
));
|
));
|
||||||
|
|
||||||
$def = new HTMLPurifier_ChildDef_Simple(array('href', 'src'));
|
$def = new HTMLPurifier_ChildDef_Simple(array('href', 'src'));
|
||||||
$this->assertEqual($def->elements,
|
$this->assertEqual($def->elements,
|
||||||
array(
|
array(
|
||||||
'href' => true
|
'href' => true
|
||||||
,'src' => true
|
,'src' => true
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_required_pcdata_forbidden() {
|
function test_required_pcdata_forbidden() {
|
||||||
|
|
||||||
$def = new HTMLPurifier_ChildDef_Required('dt | dd');
|
$def = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||||
|
|
||||||
$inputs[0] = '';
|
$inputs[0] = '';
|
||||||
$expect[0] = false;
|
$expect[0] = false;
|
||||||
|
|
||||||
$inputs[1] = '<dt>Term</dt>Text in an illegal location'.
|
$inputs[1] = '<dt>Term</dt>Text in an illegal location'.
|
||||||
'<dd>Definition</dd><b>Illegal tag</b>';
|
'<dd>Definition</dd><b>Illegal tag</b>';
|
||||||
|
|
||||||
$expect[1] = '<dt>Term</dt><dd>Definition</dd>';
|
$expect[1] = '<dt>Term</dt><dd>Definition</dd>';
|
||||||
|
|
||||||
$inputs[2] = 'How do you do!';
|
$inputs[2] = 'How do you do!';
|
||||||
$expect[2] = false;
|
$expect[2] = false;
|
||||||
|
|
||||||
// whitespace shouldn't trigger it
|
// whitespace shouldn't trigger it
|
||||||
$inputs[3] = "\n<dd>Definition</dd> ";
|
$inputs[3] = "\n<dd>Definition</dd> ";
|
||||||
$expect[3] = true;
|
$expect[3] = true;
|
||||||
|
|
||||||
$inputs[4] ='<dd>Definition</dd> <b></b> ';
|
$inputs[4] ='<dd>Definition</dd> <b></b> ';
|
||||||
$expect[4] = '<dd>Definition</dd> ';
|
$expect[4] = '<dd>Definition</dd> ';
|
||||||
|
|
||||||
$inputs[5] = "\t ";
|
$inputs[5] = "\t ";
|
||||||
$expect[5] = false;
|
$expect[5] = false;
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $def);
|
$this->assertSeries($inputs, $expect, $def);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_required_pcdata_allowed() {
|
function test_required_pcdata_allowed() {
|
||||||
$def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
$def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||||
|
|
||||||
$inputs[0] = '<b>Bold text</b><img />';
|
$inputs[0] = '<b>Bold text</b><img />';
|
||||||
$expect[0] = '<b>Bold text</b><img />';
|
$expect[0] = '<b>Bold text</b><img />';
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $def);
|
$this->assertSeries($inputs, $expect, $def);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_optional() {
|
function test_optional() {
|
||||||
$def = new HTMLPurifier_ChildDef_Optional('b | i');
|
$def = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||||
|
|
||||||
$inputs[0] = '<b>Bold text</b><img />';
|
$inputs[0] = '<b>Bold text</b><img />';
|
||||||
$expect[0] = '<b>Bold text</b>';
|
$expect[0] = '<b>Bold text</b>';
|
||||||
|
|
||||||
$inputs[1] = 'Not allowed text';
|
$inputs[1] = 'Not allowed text';
|
||||||
$expect[1] = '';
|
$expect[1] = '';
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $def);
|
$this->assertSeries($inputs, $expect, $def);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,267 +1,267 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Definition.php';
|
require_once 'HTMLPurifier/Definition.php';
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
class HTMLPurifier_DefinitionTest extends UnitTestCase
|
class HTMLPurifier_DefinitionTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $def, $lex;
|
var $def, $lex;
|
||||||
|
|
||||||
function HTMLPurifier_DefinitionTest() {
|
function HTMLPurifier_DefinitionTest() {
|
||||||
$this->UnitTestCase();
|
$this->UnitTestCase();
|
||||||
$this->def = new HTMLPurifier_Definition();
|
$this->def = new HTMLPurifier_Definition();
|
||||||
$this->def->loadData();
|
$this->def->loadData();
|
||||||
$this->lex = new HTMLPurifier_Lexer();
|
$this->lex = new HTMLPurifier_Lexer();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_removeForeignElements() {
|
function test_removeForeignElements() {
|
||||||
|
|
||||||
$inputs = array();
|
$inputs = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
|
|
||||||
$inputs[0] = array();
|
$inputs[0] = array();
|
||||||
$expect[0] = $inputs[0];
|
$expect[0] = $inputs[0];
|
||||||
|
|
||||||
$inputs[1] = array(
|
$inputs[1] = array(
|
||||||
new HTMLPurifier_Token_Text('This is ')
|
new HTMLPurifier_Token_Text('This is ')
|
||||||
,new HTMLPurifier_Token_Start('b', array())
|
,new HTMLPurifier_Token_Start('b', array())
|
||||||
,new HTMLPurifier_Token_Text('bold')
|
,new HTMLPurifier_Token_Text('bold')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
,new HTMLPurifier_Token_Text(' text')
|
,new HTMLPurifier_Token_Text(' text')
|
||||||
);
|
);
|
||||||
$expect[1] = $inputs[1];
|
$expect[1] = $inputs[1];
|
||||||
|
|
||||||
$inputs[2] = array(
|
$inputs[2] = array(
|
||||||
new HTMLPurifier_Token_Start('asdf')
|
new HTMLPurifier_Token_Start('asdf')
|
||||||
,new HTMLPurifier_Token_End('asdf')
|
,new HTMLPurifier_Token_End('asdf')
|
||||||
,new HTMLPurifier_Token_Start('d', array('href' => 'bang!'))
|
,new HTMLPurifier_Token_Start('d', array('href' => 'bang!'))
|
||||||
,new HTMLPurifier_Token_End('d')
|
,new HTMLPurifier_Token_End('d')
|
||||||
,new HTMLPurifier_Token_Start('pooloka')
|
,new HTMLPurifier_Token_Start('pooloka')
|
||||||
,new HTMLPurifier_Token_Start('poolasdf')
|
,new HTMLPurifier_Token_Start('poolasdf')
|
||||||
,new HTMLPurifier_Token_Start('ds', array('moogle' => '&'))
|
,new HTMLPurifier_Token_Start('ds', array('moogle' => '&'))
|
||||||
,new HTMLPurifier_Token_End('asdf')
|
,new HTMLPurifier_Token_End('asdf')
|
||||||
,new HTMLPurifier_Token_End('asdf')
|
,new HTMLPurifier_Token_End('asdf')
|
||||||
);
|
);
|
||||||
$expect[2] = array(
|
$expect[2] = array(
|
||||||
new HTMLPurifier_Token_Text('<asdf>')
|
new HTMLPurifier_Token_Text('<asdf>')
|
||||||
,new HTMLPurifier_Token_Text('</asdf>')
|
,new HTMLPurifier_Token_Text('</asdf>')
|
||||||
,new HTMLPurifier_Token_Text('<d href="bang!">')
|
,new HTMLPurifier_Token_Text('<d href="bang!">')
|
||||||
,new HTMLPurifier_Token_Text('</d>')
|
,new HTMLPurifier_Token_Text('</d>')
|
||||||
,new HTMLPurifier_Token_Text('<pooloka>')
|
,new HTMLPurifier_Token_Text('<pooloka>')
|
||||||
,new HTMLPurifier_Token_Text('<poolasdf>')
|
,new HTMLPurifier_Token_Text('<poolasdf>')
|
||||||
,new HTMLPurifier_Token_Text('<ds moogle="&">')
|
,new HTMLPurifier_Token_Text('<ds moogle="&">')
|
||||||
,new HTMLPurifier_Token_Text('</asdf>')
|
,new HTMLPurifier_Token_Text('</asdf>')
|
||||||
,new HTMLPurifier_Token_Text('</asdf>')
|
,new HTMLPurifier_Token_Text('</asdf>')
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->def->removeForeignElements($input);
|
$result = $this->def->removeForeignElements($input);
|
||||||
$this->assertEqual($expect[$i], $result);
|
$this->assertEqual($expect[$i], $result);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_makeWellFormed() {
|
function test_makeWellFormed() {
|
||||||
|
|
||||||
$inputs = array();
|
$inputs = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
|
|
||||||
$inputs[0] = array();
|
$inputs[0] = array();
|
||||||
$expect[0] = $inputs[0];
|
$expect[0] = $inputs[0];
|
||||||
|
|
||||||
$inputs[1] = array(
|
$inputs[1] = array(
|
||||||
new HTMLPurifier_Token_Text('This is ')
|
new HTMLPurifier_Token_Text('This is ')
|
||||||
,new HTMLPurifier_Token_Start('b')
|
,new HTMLPurifier_Token_Start('b')
|
||||||
,new HTMLPurifier_Token_Text('bold')
|
,new HTMLPurifier_Token_Text('bold')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
,new HTMLPurifier_Token_Text(' text')
|
,new HTMLPurifier_Token_Text(' text')
|
||||||
,new HTMLPurifier_Token_Empty('br')
|
,new HTMLPurifier_Token_Empty('br')
|
||||||
);
|
);
|
||||||
$expect[1] = $inputs[1];
|
$expect[1] = $inputs[1];
|
||||||
|
|
||||||
$inputs[2] = array(
|
$inputs[2] = array(
|
||||||
new HTMLPurifier_Token_Start('b')
|
new HTMLPurifier_Token_Start('b')
|
||||||
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
||||||
);
|
);
|
||||||
$expect[2] = array(
|
$expect[2] = array(
|
||||||
new HTMLPurifier_Token_Start('b')
|
new HTMLPurifier_Token_Start('b')
|
||||||
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
);
|
);
|
||||||
|
|
||||||
$inputs[3] = array(
|
$inputs[3] = array(
|
||||||
new HTMLPurifier_Token_Start('b')
|
new HTMLPurifier_Token_Start('b')
|
||||||
,new HTMLPurifier_Token_Start('i')
|
,new HTMLPurifier_Token_Start('i')
|
||||||
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
);
|
);
|
||||||
$expect[3] = array(
|
$expect[3] = array(
|
||||||
new HTMLPurifier_Token_Start('b')
|
new HTMLPurifier_Token_Start('b')
|
||||||
,new HTMLPurifier_Token_Start('i')
|
,new HTMLPurifier_Token_Start('i')
|
||||||
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
|
||||||
,new HTMLPurifier_Token_End('i')
|
,new HTMLPurifier_Token_End('i')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
);
|
);
|
||||||
|
|
||||||
$inputs[4] = array(
|
$inputs[4] = array(
|
||||||
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
);
|
);
|
||||||
$expect[4] = array(
|
$expect[4] = array(
|
||||||
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
|
||||||
,new HTMLPurifier_Token_Text('</b>')
|
,new HTMLPurifier_Token_Text('</b>')
|
||||||
);
|
);
|
||||||
|
|
||||||
$inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;')));
|
$inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;')));
|
||||||
$expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;')));
|
$expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;')));
|
||||||
|
|
||||||
$inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;')));
|
$inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;')));
|
||||||
$expect[6] = array(
|
$expect[6] = array(
|
||||||
new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;'))
|
new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;'))
|
||||||
,new HTMLPurifier_Token_End('div')
|
,new HTMLPurifier_Token_End('div')
|
||||||
);
|
);
|
||||||
|
|
||||||
// test automatic paragraph closing
|
// test automatic paragraph closing
|
||||||
|
|
||||||
$inputs[7] = array(
|
$inputs[7] = array(
|
||||||
new HTMLPurifier_Token_Start('p')
|
new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 1')
|
,new HTMLPurifier_Token_Text('Paragraph 1')
|
||||||
,new HTMLPurifier_Token_Start('p')
|
,new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 2')
|
,new HTMLPurifier_Token_Text('Paragraph 2')
|
||||||
);
|
);
|
||||||
$expect[7] = array(
|
$expect[7] = array(
|
||||||
new HTMLPurifier_Token_Start('p')
|
new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 1')
|
,new HTMLPurifier_Token_Text('Paragraph 1')
|
||||||
,new HTMLPurifier_Token_End('p')
|
,new HTMLPurifier_Token_End('p')
|
||||||
,new HTMLPurifier_Token_Start('p')
|
,new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 2')
|
,new HTMLPurifier_Token_Text('Paragraph 2')
|
||||||
,new HTMLPurifier_Token_End('p')
|
,new HTMLPurifier_Token_End('p')
|
||||||
);
|
);
|
||||||
|
|
||||||
$inputs[8] = array(
|
$inputs[8] = array(
|
||||||
new HTMLPurifier_Token_Start('div')
|
new HTMLPurifier_Token_Start('div')
|
||||||
,new HTMLPurifier_Token_Start('p')
|
,new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
||||||
,new HTMLPurifier_Token_End('div')
|
,new HTMLPurifier_Token_End('div')
|
||||||
);
|
);
|
||||||
$expect[8] = array(
|
$expect[8] = array(
|
||||||
new HTMLPurifier_Token_Start('div')
|
new HTMLPurifier_Token_Start('div')
|
||||||
,new HTMLPurifier_Token_Start('p')
|
,new HTMLPurifier_Token_Start('p')
|
||||||
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
|
||||||
,new HTMLPurifier_Token_End('p')
|
,new HTMLPurifier_Token_End('p')
|
||||||
,new HTMLPurifier_Token_End('div')
|
,new HTMLPurifier_Token_End('div')
|
||||||
);
|
);
|
||||||
|
|
||||||
// automatic list closing
|
// automatic list closing
|
||||||
|
|
||||||
$inputs[9] = array(
|
$inputs[9] = array(
|
||||||
new HTMLPurifier_Token_Start('ol')
|
new HTMLPurifier_Token_Start('ol')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_Start('li')
|
,new HTMLPurifier_Token_Start('li')
|
||||||
,new HTMLPurifier_Token_Text('Item 1')
|
,new HTMLPurifier_Token_Text('Item 1')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_Start('li')
|
,new HTMLPurifier_Token_Start('li')
|
||||||
,new HTMLPurifier_Token_Text('Item 2')
|
,new HTMLPurifier_Token_Text('Item 2')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_End('ol')
|
,new HTMLPurifier_Token_End('ol')
|
||||||
);
|
);
|
||||||
$expect[9] = array(
|
$expect[9] = array(
|
||||||
new HTMLPurifier_Token_Start('ol')
|
new HTMLPurifier_Token_Start('ol')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_Start('li')
|
,new HTMLPurifier_Token_Start('li')
|
||||||
,new HTMLPurifier_Token_Text('Item 1')
|
,new HTMLPurifier_Token_Text('Item 1')
|
||||||
,new HTMLPurifier_Token_End('li')
|
,new HTMLPurifier_Token_End('li')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_Start('li')
|
,new HTMLPurifier_Token_Start('li')
|
||||||
,new HTMLPurifier_Token_Text('Item 2')
|
,new HTMLPurifier_Token_Text('Item 2')
|
||||||
,new HTMLPurifier_Token_End('li')
|
,new HTMLPurifier_Token_End('li')
|
||||||
|
|
||||||
,new HTMLPurifier_Token_End('ol')
|
,new HTMLPurifier_Token_End('ol')
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->def->makeWellFormed($input);
|
$result = $this->def->makeWellFormed($input);
|
||||||
$this->assertEqual($expect[$i], $result);
|
$this->assertEqual($expect[$i], $result);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_fixNesting() {
|
function test_fixNesting() {
|
||||||
$inputs = array();
|
$inputs = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
|
|
||||||
// next id = 4
|
// next id = 4
|
||||||
|
|
||||||
// legal inline nesting
|
// legal inline nesting
|
||||||
$inputs[0] = array(
|
$inputs[0] = array(
|
||||||
new HTMLPurifier_Token_Start('b'),
|
new HTMLPurifier_Token_Start('b'),
|
||||||
new HTMLPurifier_Token_Text('Bold text'),
|
new HTMLPurifier_Token_Text('Bold text'),
|
||||||
new HTMLPurifier_Token_End('b'),
|
new HTMLPurifier_Token_End('b'),
|
||||||
);
|
);
|
||||||
$expect[0] = $inputs[0];
|
$expect[0] = $inputs[0];
|
||||||
|
|
||||||
// legal inline and block
|
// legal inline and block
|
||||||
// as the parent element is considered FLOW
|
// as the parent element is considered FLOW
|
||||||
$inputs[1] = array(
|
$inputs[1] = array(
|
||||||
new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')),
|
new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')),
|
||||||
new HTMLPurifier_Token_Text('Linky'),
|
new HTMLPurifier_Token_Text('Linky'),
|
||||||
new HTMLPurifier_Token_End('a'),
|
new HTMLPurifier_Token_End('a'),
|
||||||
new HTMLPurifier_Token_Start('div'),
|
new HTMLPurifier_Token_Start('div'),
|
||||||
new HTMLPurifier_Token_Text('Block element'),
|
new HTMLPurifier_Token_Text('Block element'),
|
||||||
new HTMLPurifier_Token_End('div'),
|
new HTMLPurifier_Token_End('div'),
|
||||||
);
|
);
|
||||||
$expect[1] = $inputs[1];
|
$expect[1] = $inputs[1];
|
||||||
|
|
||||||
// illegal block in inline, element -> text
|
// illegal block in inline, element -> text
|
||||||
$inputs[2] = array(
|
$inputs[2] = array(
|
||||||
new HTMLPurifier_Token_Start('b'),
|
new HTMLPurifier_Token_Start('b'),
|
||||||
new HTMLPurifier_Token_Start('div'),
|
new HTMLPurifier_Token_Start('div'),
|
||||||
new HTMLPurifier_Token_Text('Illegal Div'),
|
new HTMLPurifier_Token_Text('Illegal Div'),
|
||||||
new HTMLPurifier_Token_End('div'),
|
new HTMLPurifier_Token_End('div'),
|
||||||
new HTMLPurifier_Token_End('b'),
|
new HTMLPurifier_Token_End('b'),
|
||||||
);
|
);
|
||||||
$expect[2] = array(
|
$expect[2] = array(
|
||||||
new HTMLPurifier_Token_Start('b'),
|
new HTMLPurifier_Token_Start('b'),
|
||||||
new HTMLPurifier_Token_Text('<div>'),
|
new HTMLPurifier_Token_Text('<div>'),
|
||||||
new HTMLPurifier_Token_Text('Illegal Div'),
|
new HTMLPurifier_Token_Text('Illegal Div'),
|
||||||
new HTMLPurifier_Token_Text('</div>'),
|
new HTMLPurifier_Token_Text('</div>'),
|
||||||
new HTMLPurifier_Token_End('b'),
|
new HTMLPurifier_Token_End('b'),
|
||||||
);
|
);
|
||||||
|
|
||||||
// test of empty set that's required, resulting in removal of node
|
// test of empty set that's required, resulting in removal of node
|
||||||
$inputs[3] = array(
|
$inputs[3] = array(
|
||||||
new HTMLPurifier_Token_Start('ul'),
|
new HTMLPurifier_Token_Start('ul'),
|
||||||
new HTMLPurifier_Token_End('ul')
|
new HTMLPurifier_Token_End('ul')
|
||||||
);
|
);
|
||||||
$expect[3] = array();
|
$expect[3] = array();
|
||||||
|
|
||||||
// test illegal text which gets removed
|
// test illegal text which gets removed
|
||||||
$inputs[4] = array(
|
$inputs[4] = array(
|
||||||
new HTMLPurifier_Token_Start('ul'),
|
new HTMLPurifier_Token_Start('ul'),
|
||||||
new HTMLPurifier_Token_Text('Illegal Text'),
|
new HTMLPurifier_Token_Text('Illegal Text'),
|
||||||
new HTMLPurifier_Token_Start('li'),
|
new HTMLPurifier_Token_Start('li'),
|
||||||
new HTMLPurifier_Token_Text('Legal item'),
|
new HTMLPurifier_Token_Text('Legal item'),
|
||||||
new HTMLPurifier_Token_End('li'),
|
new HTMLPurifier_Token_End('li'),
|
||||||
new HTMLPurifier_Token_End('ul')
|
new HTMLPurifier_Token_End('ul')
|
||||||
);
|
);
|
||||||
$expect[4] = array(
|
$expect[4] = array(
|
||||||
new HTMLPurifier_Token_Start('ul'),
|
new HTMLPurifier_Token_Start('ul'),
|
||||||
new HTMLPurifier_Token_Start('li'),
|
new HTMLPurifier_Token_Start('li'),
|
||||||
new HTMLPurifier_Token_Text('Legal item'),
|
new HTMLPurifier_Token_Text('Legal item'),
|
||||||
new HTMLPurifier_Token_End('li'),
|
new HTMLPurifier_Token_End('li'),
|
||||||
new HTMLPurifier_Token_End('ul')
|
new HTMLPurifier_Token_End('ul')
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->def->fixNesting($input);
|
$result = $this->def->fixNesting($input);
|
||||||
$this->assertEqual($expect[$i], $result);
|
$this->assertEqual($expect[$i], $result);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,89 +1,89 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier_GeneratorTest extends UnitTestCase
|
class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function HTMLPurifier_GeneratorTest() {
|
function HTMLPurifier_GeneratorTest() {
|
||||||
$this->UnitTestCase();
|
$this->UnitTestCase();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_generateFromToken() {
|
function test_generateFromToken() {
|
||||||
|
|
||||||
$inputs = array();
|
$inputs = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
|
|
||||||
$inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>');
|
$inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>');
|
||||||
$expect[0] = 'Foobar.<>';
|
$expect[0] = 'Foobar.<>';
|
||||||
|
|
||||||
$inputs[1] = new HTMLPurifier_Token_Start('a',
|
$inputs[1] = new HTMLPurifier_Token_Start('a',
|
||||||
array('href' => 'dyn?a=foo&b=bar')
|
array('href' => 'dyn?a=foo&b=bar')
|
||||||
);
|
);
|
||||||
$expect[1] = '<a href="dyn?a=foo&b=bar">';
|
$expect[1] = '<a href="dyn?a=foo&b=bar">';
|
||||||
|
|
||||||
$inputs[2] = new HTMLPurifier_Token_End('b');
|
$inputs[2] = new HTMLPurifier_Token_End('b');
|
||||||
$expect[2] = '</b>';
|
$expect[2] = '</b>';
|
||||||
|
|
||||||
$inputs[3] = new HTMLPurifier_Token_Empty('br',
|
$inputs[3] = new HTMLPurifier_Token_Empty('br',
|
||||||
array('style' => 'font-family:"Courier New";')
|
array('style' => 'font-family:"Courier New";')
|
||||||
);
|
);
|
||||||
$expect[3] = '<br style="font-family:"Courier New";" />';
|
$expect[3] = '<br style="font-family:"Courier New";" />';
|
||||||
|
|
||||||
$inputs[4] = new HTMLPurifier_Token_Start('asdf');
|
$inputs[4] = new HTMLPurifier_Token_Start('asdf');
|
||||||
$expect[4] = '<asdf>';
|
$expect[4] = '<asdf>';
|
||||||
|
|
||||||
$inputs[5] = new HTMLPurifier_Token_Empty('br');
|
$inputs[5] = new HTMLPurifier_Token_Empty('br');
|
||||||
$expect[5] = '<br />';
|
$expect[5] = '<br />';
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->gen->generateFromToken($input);
|
$result = $this->gen->generateFromToken($input);
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_generateAttributes() {
|
function test_generateAttributes() {
|
||||||
|
|
||||||
$inputs = array();
|
$inputs = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
|
|
||||||
$inputs[0] = array();
|
$inputs[0] = array();
|
||||||
$expect[0] = '';
|
$expect[0] = '';
|
||||||
|
|
||||||
$inputs[1] = array('href' => 'dyn?a=foo&b=bar');
|
$inputs[1] = array('href' => 'dyn?a=foo&b=bar');
|
||||||
$expect[1] = 'href="dyn?a=foo&b=bar"';
|
$expect[1] = 'href="dyn?a=foo&b=bar"';
|
||||||
|
|
||||||
$inputs[2] = array('style' => 'font-family:"Courier New";');
|
$inputs[2] = array('style' => 'font-family:"Courier New";');
|
||||||
$expect[2] = 'style="font-family:"Courier New";"';
|
$expect[2] = 'style="font-family:"Courier New";"';
|
||||||
|
|
||||||
$inputs[3] = array('src' => 'picture.jpg', 'alt' => 'Short & interesting');
|
$inputs[3] = array('src' => 'picture.jpg', 'alt' => 'Short & interesting');
|
||||||
$expect[3] = 'src="picture.jpg" alt="Short & interesting"';
|
$expect[3] = 'src="picture.jpg" alt="Short & interesting"';
|
||||||
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
$result = $this->gen->generateAttributes($input);
|
$result = $this->gen->generateAttributes($input);
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_generateFromTokens() {
|
function test_generateFromTokens() {
|
||||||
|
|
||||||
$tokens = array(
|
$tokens = array(
|
||||||
new HTMLPurifier_Token_Start('b'),
|
new HTMLPurifier_Token_Start('b'),
|
||||||
new HTMLPurifier_Token_Text('Foobar!'),
|
new HTMLPurifier_Token_Text('Foobar!'),
|
||||||
new HTMLPurifier_Token_End('b')
|
new HTMLPurifier_Token_End('b')
|
||||||
);
|
);
|
||||||
$expect = '<b>Foobar!</b>';
|
$expect = '<b>Foobar!</b>';
|
||||||
$this->assertEqual($expect, $this->gen->generateFromTokens($tokens));
|
$this->assertEqual($expect, $this->gen->generateFromTokens($tokens));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,83 +1,83 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
|
class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $DirectLex;
|
var $DirectLex;
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_nextWhiteSpace() {
|
function test_nextWhiteSpace() {
|
||||||
$HP =& $this->DirectLex;
|
$HP =& $this->DirectLex;
|
||||||
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
|
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
|
||||||
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
|
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
|
||||||
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
|
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
|
||||||
$this->assertIdentical(1, $HP->nextWhiteSpace("a\tsdf"));
|
$this->assertIdentical(1, $HP->nextWhiteSpace("a\tsdf"));
|
||||||
$this->assertIdentical(4, $HP->nextWhiteSpace("asdf\r"));
|
$this->assertIdentical(4, $HP->nextWhiteSpace("asdf\r"));
|
||||||
$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
|
$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
|
||||||
$this->assertIdentical(3, $HP->nextWhiteSpace('a a ', 2));
|
$this->assertIdentical(3, $HP->nextWhiteSpace('a a ', 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_parseData() {
|
function test_parseData() {
|
||||||
$HP =& $this->DirectLex;
|
$HP =& $this->DirectLex;
|
||||||
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
||||||
$this->assertIdentical('&', $HP->parseData('&'));
|
$this->assertIdentical('&', $HP->parseData('&'));
|
||||||
$this->assertIdentical('"', $HP->parseData('"'));
|
$this->assertIdentical('"', $HP->parseData('"'));
|
||||||
$this->assertIdentical("'", $HP->parseData('''));
|
$this->assertIdentical("'", $HP->parseData('''));
|
||||||
$this->assertIdentical('-', $HP->parseData('-'));
|
$this->assertIdentical('-', $HP->parseData('-'));
|
||||||
// UTF-8 needed!!!
|
// UTF-8 needed!!!
|
||||||
}
|
}
|
||||||
|
|
||||||
// internals testing
|
// internals testing
|
||||||
function test_tokenizeAttributeString() {
|
function test_tokenizeAttributeString() {
|
||||||
|
|
||||||
$input[0] = 'href="asdf" boom="assdf"';
|
$input[0] = 'href="asdf" boom="assdf"';
|
||||||
$expect[0] = array('href'=>'asdf', 'boom'=>'assdf');
|
$expect[0] = array('href'=>'asdf', 'boom'=>'assdf');
|
||||||
|
|
||||||
$input[1] = "href='r'";
|
$input[1] = "href='r'";
|
||||||
$expect[1] = array('href'=>'r');
|
$expect[1] = array('href'=>'r');
|
||||||
|
|
||||||
$input[2] = 'onclick="javascript:alert(\'asdf\');"';
|
$input[2] = 'onclick="javascript:alert(\'asdf\');"';
|
||||||
$expect[2] = array('onclick' => "javascript:alert('asdf');");
|
$expect[2] = array('onclick' => "javascript:alert('asdf');");
|
||||||
|
|
||||||
$input[3] = 'selected';
|
$input[3] = 'selected';
|
||||||
$expect[3] = array('selected'=>'selected');
|
$expect[3] = array('selected'=>'selected');
|
||||||
|
|
||||||
$input[4] = '="asdf"';
|
$input[4] = '="asdf"';
|
||||||
$expect[4] = array();
|
$expect[4] = array();
|
||||||
|
|
||||||
$input[5] = 'missile=launch';
|
$input[5] = 'missile=launch';
|
||||||
$expect[5] = array('missile' => 'launch');
|
$expect[5] = array('missile' => 'launch');
|
||||||
|
|
||||||
$input[6] = 'href="foo';
|
$input[6] = 'href="foo';
|
||||||
$expect[6] = array('href' => 'foo');
|
$expect[6] = array('href' => 'foo');
|
||||||
|
|
||||||
$input[7] = '"=';
|
$input[7] = '"=';
|
||||||
$expect[7] = array('"' => '');
|
$expect[7] = array('"' => '');
|
||||||
|
|
||||||
$input[8] = 'href ="about:blank"rel ="nofollow"';
|
$input[8] = 'href ="about:blank"rel ="nofollow"';
|
||||||
$expect[8] = array('href' => 'about:blank', 'rel' => 'nofollow');
|
$expect[8] = array('href' => 'about:blank', 'rel' => 'nofollow');
|
||||||
|
|
||||||
$input[9] = 'foo bar';
|
$input[9] = 'foo bar';
|
||||||
$expect[9] = array('foo' => 'foo', 'bar' => 'bar');
|
$expect[9] = array('foo' => 'foo', 'bar' => 'bar');
|
||||||
|
|
||||||
$input[10] = 'foo="bar" blue';
|
$input[10] = 'foo="bar" blue';
|
||||||
$expect[10] = array('foo' => 'bar', 'blue' => 'blue');
|
$expect[10] = array('foo' => 'bar', 'blue' => 'blue');
|
||||||
|
|
||||||
$size = count($input);
|
$size = count($input);
|
||||||
for($i = 0; $i < $size; $i++) {
|
for($i = 0; $i < $size; $i++) {
|
||||||
$result = $this->DirectLex->tokenizeAttributeString($input[$i]);
|
$result = $this->DirectLex->tokenizeAttributeString($input[$i]);
|
||||||
$this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s');
|
$this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s');
|
||||||
paintIf($result, $expect[$i] != $result);
|
paintIf($result, $expect[$i] != $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,197 +1,197 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
|
||||||
class HTMLPurifier_LexerTest extends UnitTestCase
|
class HTMLPurifier_LexerTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $DirectLex, $PEARSax3, $DOMLex;
|
var $DirectLex, $PEARSax3, $DOMLex;
|
||||||
var $_has_dom;
|
var $_has_dom;
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
$this->PEARSax3 = new HTMLPurifier_Lexer_PEARSax3();
|
$this->PEARSax3 = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
|
||||||
$this->_has_dom = version_compare(PHP_VERSION, '5', '>=');
|
$this->_has_dom = version_compare(PHP_VERSION, '5', '>=');
|
||||||
|
|
||||||
if ($this->_has_dom) {
|
if ($this->_has_dom) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
$this->DOMLex = new HTMLPurifier_Lexer_DOMLex();
|
$this->DOMLex = new HTMLPurifier_Lexer_DOMLex();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_tokenizeHTML() {
|
function test_tokenizeHTML() {
|
||||||
|
|
||||||
$input = array();
|
$input = array();
|
||||||
$expect = array();
|
$expect = array();
|
||||||
$sax_expect = array();
|
$sax_expect = array();
|
||||||
|
|
||||||
$input[0] = '';
|
$input[0] = '';
|
||||||
$expect[0] = array();
|
$expect[0] = array();
|
||||||
|
|
||||||
$input[1] = 'This is regular text.';
|
$input[1] = 'This is regular text.';
|
||||||
$expect[1] = array(
|
$expect[1] = array(
|
||||||
new HTMLPurifier_Token_Text('This is regular text.')
|
new HTMLPurifier_Token_Text('This is regular text.')
|
||||||
);
|
);
|
||||||
|
|
||||||
$input[2] = 'This is <b>bold</b> text';
|
$input[2] = 'This is <b>bold</b> text';
|
||||||
$expect[2] = array(
|
$expect[2] = array(
|
||||||
new HTMLPurifier_Token_Text('This is ')
|
new HTMLPurifier_Token_Text('This is ')
|
||||||
,new HTMLPurifier_Token_Start('b', array())
|
,new HTMLPurifier_Token_Start('b', array())
|
||||||
,new HTMLPurifier_Token_Text('bold')
|
,new HTMLPurifier_Token_Text('bold')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
,new HTMLPurifier_Token_Text(' text')
|
,new HTMLPurifier_Token_Text(' text')
|
||||||
);
|
);
|
||||||
|
|
||||||
$input[3] = '<DIV>Totally rad dude. <b>asdf</b></div>';
|
$input[3] = '<DIV>Totally rad dude. <b>asdf</b></div>';
|
||||||
$expect[3] = array(
|
$expect[3] = array(
|
||||||
new HTMLPurifier_Token_Start('DIV', array())
|
new HTMLPurifier_Token_Start('DIV', array())
|
||||||
,new HTMLPurifier_Token_Text('Totally rad dude. ')
|
,new HTMLPurifier_Token_Text('Totally rad dude. ')
|
||||||
,new HTMLPurifier_Token_Start('b', array())
|
,new HTMLPurifier_Token_Start('b', array())
|
||||||
,new HTMLPurifier_Token_Text('asdf')
|
,new HTMLPurifier_Token_Text('asdf')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
,new HTMLPurifier_Token_End('div')
|
,new HTMLPurifier_Token_End('div')
|
||||||
);
|
);
|
||||||
|
|
||||||
// [XML-INVALID]
|
// [XML-INVALID]
|
||||||
$input[4] = '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>';
|
$input[4] = '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>';
|
||||||
$expect[4] = array(
|
$expect[4] = array(
|
||||||
new HTMLPurifier_Token_Start('asdf')
|
new HTMLPurifier_Token_Start('asdf')
|
||||||
,new HTMLPurifier_Token_End('asdf')
|
,new HTMLPurifier_Token_End('asdf')
|
||||||
,new HTMLPurifier_Token_Start('d')
|
,new HTMLPurifier_Token_Start('d')
|
||||||
,new HTMLPurifier_Token_End('d')
|
,new HTMLPurifier_Token_End('d')
|
||||||
,new HTMLPurifier_Token_Start('poOloka')
|
,new HTMLPurifier_Token_Start('poOloka')
|
||||||
,new HTMLPurifier_Token_Start('poolasdf')
|
,new HTMLPurifier_Token_Start('poolasdf')
|
||||||
,new HTMLPurifier_Token_Start('ds')
|
,new HTMLPurifier_Token_Start('ds')
|
||||||
,new HTMLPurifier_Token_End('asdf')
|
,new HTMLPurifier_Token_End('asdf')
|
||||||
,new HTMLPurifier_Token_End('ASDF')
|
,new HTMLPurifier_Token_End('ASDF')
|
||||||
);
|
);
|
||||||
// DOM is different because it condenses empty tags into REAL empty ones
|
// DOM is different because it condenses empty tags into REAL empty ones
|
||||||
// as well as makes it well-formed
|
// as well as makes it well-formed
|
||||||
$dom_expect[4] = array(
|
$dom_expect[4] = array(
|
||||||
new HTMLPurifier_Token_Empty('asdf')
|
new HTMLPurifier_Token_Empty('asdf')
|
||||||
,new HTMLPurifier_Token_Empty('d')
|
,new HTMLPurifier_Token_Empty('d')
|
||||||
,new HTMLPurifier_Token_Start('pooloka')
|
,new HTMLPurifier_Token_Start('pooloka')
|
||||||
,new HTMLPurifier_Token_Start('poolasdf')
|
,new HTMLPurifier_Token_Start('poolasdf')
|
||||||
,new HTMLPurifier_Token_Empty('ds')
|
,new HTMLPurifier_Token_Empty('ds')
|
||||||
,new HTMLPurifier_Token_End('poolasdf')
|
,new HTMLPurifier_Token_End('poolasdf')
|
||||||
,new HTMLPurifier_Token_End('pooloka')
|
,new HTMLPurifier_Token_End('pooloka')
|
||||||
);
|
);
|
||||||
|
|
||||||
$input[5] = '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>';
|
$input[5] = '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>';
|
||||||
$expect[5] = array(
|
$expect[5] = array(
|
||||||
new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!'))
|
new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!'))
|
||||||
,new HTMLPurifier_Token_Text('Link to ')
|
,new HTMLPurifier_Token_Text('Link to ')
|
||||||
,new HTMLPurifier_Token_Start('b',array('id'=>'asdf'))
|
,new HTMLPurifier_Token_Start('b',array('id'=>'asdf'))
|
||||||
,new HTMLPurifier_Token_Text('foobar')
|
,new HTMLPurifier_Token_Text('foobar')
|
||||||
,new HTMLPurifier_Token_End('b')
|
,new HTMLPurifier_Token_End('b')
|
||||||
,new HTMLPurifier_Token_End('a')
|
,new HTMLPurifier_Token_End('a')
|
||||||
);
|
);
|
||||||
|
|
||||||
$input[6] = '<br />';
|
$input[6] = '<br />';
|
||||||
$expect[6] = array(
|
$expect[6] = array(
|
||||||
new HTMLPurifier_Token_Empty('br')
|
new HTMLPurifier_Token_Empty('br')
|
||||||
);
|
);
|
||||||
|
|
||||||
// [SGML-INVALID] [RECOVERABLE]
|
// [SGML-INVALID] [RECOVERABLE]
|
||||||
$input[7] = '<!-- Comment --> <!-- not so well formed --->';
|
$input[7] = '<!-- Comment --> <!-- not so well formed --->';
|
||||||
$expect[7] = array(
|
$expect[7] = array(
|
||||||
new HTMLPurifier_Token_Comment(' Comment ')
|
new HTMLPurifier_Token_Comment(' Comment ')
|
||||||
,new HTMLPurifier_Token_Text(' ')
|
,new HTMLPurifier_Token_Text(' ')
|
||||||
,new HTMLPurifier_Token_Comment(' not so well formed -')
|
,new HTMLPurifier_Token_Comment(' not so well formed -')
|
||||||
);
|
);
|
||||||
$sax_expect[7] = false; // we need to figure out proper comment output
|
$sax_expect[7] = false; // we need to figure out proper comment output
|
||||||
|
|
||||||
// [SGML-INVALID]
|
// [SGML-INVALID]
|
||||||
$input[8] = '<a href=""';
|
$input[8] = '<a href=""';
|
||||||
$expect[8] = array(
|
$expect[8] = array(
|
||||||
new HTMLPurifier_Token_Text('<a href=""')
|
new HTMLPurifier_Token_Text('<a href=""')
|
||||||
);
|
);
|
||||||
// SAX parses it into a tag
|
// SAX parses it into a tag
|
||||||
$sax_expect[8] = array(
|
$sax_expect[8] = array(
|
||||||
new HTMLPurifier_Token_Start('a', array('href'=>''))
|
new HTMLPurifier_Token_Start('a', array('href'=>''))
|
||||||
);
|
);
|
||||||
// DOM parses it into an empty tag
|
// DOM parses it into an empty tag
|
||||||
$dom_expect[8] = array(
|
$dom_expect[8] = array(
|
||||||
new HTMLPurifier_Token_Empty('a', array('href'=>''))
|
new HTMLPurifier_Token_Empty('a', array('href'=>''))
|
||||||
);
|
);
|
||||||
|
|
||||||
$input[9] = '<b>';
|
$input[9] = '<b>';
|
||||||
$expect[9] = array(
|
$expect[9] = array(
|
||||||
new HTMLPurifier_Token_Text('<b>')
|
new HTMLPurifier_Token_Text('<b>')
|
||||||
);
|
);
|
||||||
$sax_expect[9] = array(
|
$sax_expect[9] = array(
|
||||||
new HTMLPurifier_Token_Text('<')
|
new HTMLPurifier_Token_Text('<')
|
||||||
,new HTMLPurifier_Token_Text('b')
|
,new HTMLPurifier_Token_Text('b')
|
||||||
,new HTMLPurifier_Token_Text('>')
|
,new HTMLPurifier_Token_Text('>')
|
||||||
);
|
);
|
||||||
// note that SAX can clump text nodes together. We won't be
|
// note that SAX can clump text nodes together. We won't be
|
||||||
// too picky though
|
// too picky though
|
||||||
|
|
||||||
// [SGML-INVALID]
|
// [SGML-INVALID]
|
||||||
$input[10] = '<a "=>';
|
$input[10] = '<a "=>';
|
||||||
// We barf on this, aim for no attributes
|
// We barf on this, aim for no attributes
|
||||||
$expect[10] = array(
|
$expect[10] = array(
|
||||||
new HTMLPurifier_Token_Start('a', array('"' => ''))
|
new HTMLPurifier_Token_Start('a', array('"' => ''))
|
||||||
);
|
);
|
||||||
// DOM correctly has no attributes, but also closes the tag
|
// DOM correctly has no attributes, but also closes the tag
|
||||||
$dom_expect[10] = array(
|
$dom_expect[10] = array(
|
||||||
new HTMLPurifier_Token_Empty('a')
|
new HTMLPurifier_Token_Empty('a')
|
||||||
);
|
);
|
||||||
// SAX barfs on this
|
// SAX barfs on this
|
||||||
$sax_expect[10] = array(
|
$sax_expect[10] = array(
|
||||||
new HTMLPurifier_Token_Start('a', array('"' => ''))
|
new HTMLPurifier_Token_Start('a', array('"' => ''))
|
||||||
);
|
);
|
||||||
|
|
||||||
// [INVALID] [RECOVERABLE]
|
// [INVALID] [RECOVERABLE]
|
||||||
$input[11] = '"';
|
$input[11] = '"';
|
||||||
$expect[11] = array( new HTMLPurifier_Token_Text('"') );
|
$expect[11] = array( new HTMLPurifier_Token_Text('"') );
|
||||||
|
|
||||||
// compare with this valid one:
|
// compare with this valid one:
|
||||||
$input[12] = '"';
|
$input[12] = '"';
|
||||||
$expect[12] = array( new HTMLPurifier_Token_Text('"') );
|
$expect[12] = array( new HTMLPurifier_Token_Text('"') );
|
||||||
$sax_expect[12] = false;
|
$sax_expect[12] = false;
|
||||||
// SAX chokes on this? We do have entity parsing on, so it should work!
|
// SAX chokes on this? We do have entity parsing on, so it should work!
|
||||||
|
|
||||||
foreach($input as $i => $discard) {
|
foreach($input as $i => $discard) {
|
||||||
$result = $this->DirectLex->tokenizeHTML($input[$i]);
|
$result = $this->DirectLex->tokenizeHTML($input[$i]);
|
||||||
$this->assertEqual($expect[$i], $result, 'Test '.$i.': %s');
|
$this->assertEqual($expect[$i], $result, 'Test '.$i.': %s');
|
||||||
paintIf($result, $expect[$i] != $result);
|
paintIf($result, $expect[$i] != $result);
|
||||||
|
|
||||||
// assert unless I say otherwise
|
// assert unless I say otherwise
|
||||||
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i]);
|
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i]);
|
||||||
if (!isset($sax_expect[$i])) {
|
if (!isset($sax_expect[$i])) {
|
||||||
// by default, assert with normal result
|
// by default, assert with normal result
|
||||||
$this->assertEqual($expect[$i], $sax_result, 'Test '.$i.': %s');
|
$this->assertEqual($expect[$i], $sax_result, 'Test '.$i.': %s');
|
||||||
paintIf($sax_result, $expect[$i] != $sax_result);
|
paintIf($sax_result, $expect[$i] != $sax_result);
|
||||||
} elseif ($sax_expect[$i] === false) {
|
} elseif ($sax_expect[$i] === false) {
|
||||||
// assertions were turned off, optionally dump
|
// assertions were turned off, optionally dump
|
||||||
// paintIf($sax_expect, $i == NUMBER);
|
// paintIf($sax_expect, $i == NUMBER);
|
||||||
} else {
|
} else {
|
||||||
// match with a custom SAX result array
|
// match with a custom SAX result array
|
||||||
$this->assertEqual($sax_expect[$i], $sax_result, 'Test '.$i.': %s');
|
$this->assertEqual($sax_expect[$i], $sax_result, 'Test '.$i.': %s');
|
||||||
paintIf($sax_result, $sax_expect[$i] != $sax_result);
|
paintIf($sax_result, $sax_expect[$i] != $sax_result);
|
||||||
}
|
}
|
||||||
if ($this->_has_dom) {
|
if ($this->_has_dom) {
|
||||||
$dom_result = $this->DOMLex->tokenizeHTML($input[$i]);
|
$dom_result = $this->DOMLex->tokenizeHTML($input[$i]);
|
||||||
// same structure as SAX
|
// same structure as SAX
|
||||||
if (!isset($dom_expect[$i])) {
|
if (!isset($dom_expect[$i])) {
|
||||||
$this->assertEqual($expect[$i], $dom_result, 'Test '.$i.': %s');
|
$this->assertEqual($expect[$i], $dom_result, 'Test '.$i.': %s');
|
||||||
paintIf($dom_result, $expect[$i] != $dom_result);
|
paintIf($dom_result, $expect[$i] != $dom_result);
|
||||||
} elseif ($dom_expect[$i] === false) {
|
} elseif ($dom_expect[$i] === false) {
|
||||||
// paintIf($dom_result, $i == NUMBER);
|
// paintIf($dom_result, $i == NUMBER);
|
||||||
} else {
|
} else {
|
||||||
$this->assertEqual($dom_expect[$i], $dom_result, 'Test '.$i.': %s');
|
$this->assertEqual($dom_expect[$i], $dom_result, 'Test '.$i.': %s');
|
||||||
paintIf($dom_result, $dom_expect[$i] != $dom_result);
|
paintIf($dom_result, $dom_expect[$i] != $dom_result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@ -1,25 +1,25 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
error_reporting(E_ALL);
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
require_once 'simpletest/unit_tester.php';
|
require_once 'simpletest/unit_tester.php';
|
||||||
require_once 'simpletest/reporter.php';
|
require_once 'simpletest/reporter.php';
|
||||||
require_once 'simpletest/mock_objects.php';
|
require_once 'simpletest/mock_objects.php';
|
||||||
|
|
||||||
require_once 'Debugger.php';
|
require_once 'Debugger.php';
|
||||||
|
|
||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library');
|
||||||
|
|
||||||
$test = new GroupTest('HTMLPurifier');
|
$test = new GroupTest('HTMLPurifier');
|
||||||
|
|
||||||
$test->addTestFile('HTMLPurifier/LexerTest.php');
|
$test->addTestFile('HTMLPurifier/LexerTest.php');
|
||||||
$test->addTestFile('HTMLPurifier/Lexer/DirectLexTest.php');
|
$test->addTestFile('HTMLPurifier/Lexer/DirectLexTest.php');
|
||||||
//$test->addTestFile('TokenTest.php');
|
//$test->addTestFile('TokenTest.php');
|
||||||
$test->addTestFile('HTMLPurifier/DefinitionTest.php');
|
$test->addTestFile('HTMLPurifier/DefinitionTest.php');
|
||||||
$test->addTestFile('HTMLPurifier/ChildDefTest.php');
|
$test->addTestFile('HTMLPurifier/ChildDefTest.php');
|
||||||
$test->addTestFile('HTMLPurifier/GeneratorTest.php');
|
$test->addTestFile('HTMLPurifier/GeneratorTest.php');
|
||||||
|
|
||||||
$test->run( new HtmlReporter() );
|
$test->run( new HtmlReporter() );
|
||||||
|
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue
Block a user