mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
- XHTML generation can now be turned off, allowing things like <br>
- Docs updated in preparation for 1.1 release git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@422 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
6a33945499
commit
6740ba61af
28
INSTALL
28
INSTALL
@ -26,7 +26,7 @@ any earlier versions.
|
|||||||
|
|
||||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
||||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
wants to test that, be my guest. All tests were done on Windows XP Home,
|
||||||
but operating system is quite irrelevant in this particular case.
|
but operating system should not be a major factor in the library.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -70,21 +70,36 @@ I cannot stress the importance of these two bullets enough. Omitting either
|
|||||||
of them could have dire consequences not only for security but for plain
|
of them could have dire consequences not only for security but for plain
|
||||||
old usability. You can find a more in-depth discussion of why this is needed
|
old usability. You can find a more in-depth discussion of why this is needed
|
||||||
in docs/security.txt, in the meantime, try to change your output so this is
|
in docs/security.txt, in the meantime, try to change your output so this is
|
||||||
the case.
|
the case. If you can't, well, we might be able to accomodate you (read
|
||||||
|
section 3).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
3. Configuring HTML Purifier
|
||||||
|
|
||||||
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
|
Purifier needs to be told what to do.
|
||||||
|
|
||||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
||||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
switch HTML Purifier's encoding. Note that the availability of encodings is
|
||||||
dependent on iconv, and you'll be missing characters if the charset you
|
dependent on iconv, and you'll be missing characters if the charset you
|
||||||
choose doesn't have them.
|
choose doesn't have them.
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||||
|
|
||||||
An example usage for Latin-1 websites:
|
An example usage for Latin-1 websites:
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
|
||||||
|
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||||
|
XHTML output like this:
|
||||||
|
|
||||||
|
$config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
However, I strongly recommend that you use XHTML. Currently, we can only
|
||||||
|
guarantee transitional-complaint output, future versions will also allow strict
|
||||||
|
output.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
3. Using the code
|
3. Using the code
|
||||||
@ -106,7 +121,7 @@ advice on what to do if HTML Purifier is slowing down your application.
|
|||||||
|
|
||||||
4. Quick install
|
4. Quick install
|
||||||
|
|
||||||
If your website is in UTF-8, use this code:
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
set_include_path('/path/to/htmlpurifier/library'
|
set_include_path('/path/to/htmlpurifier/library'
|
||||||
@ -116,7 +131,7 @@ If your website is in UTF-8, use this code:
|
|||||||
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
|
||||||
If your website is in a different encoding, use this code:
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
set_include_path('/path/to/htmlpurifier/library'
|
set_include_path('/path/to/htmlpurifier/library'
|
||||||
@ -125,6 +140,7 @@ If your website is in a different encoding, use this code:
|
|||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||||
|
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
6
NEWS
6
NEWS
@ -1,15 +1,13 @@
|
|||||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||
|
|
||||||
1.1.0, unknown release date
|
1.1.0, projected 2006-09-16
|
||||||
- Made URI validator more forgiving: will ignore leading and trailing
|
- Made URI validator more forgiving: will ignore leading and trailing
|
||||||
quotes, apostrophes and less than or greater than signs.
|
quotes, apostrophes and less than or greater than signs.
|
||||||
- Enforce alphanumeric namespace and directive names for configuration.
|
- Enforce alphanumeric namespace and directive names for configuration.
|
||||||
- Directive documentation generation using XSLT
|
- Directive documentation generation using XSLT
|
||||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||||
|
- XHTML generation can now be turned off, allowing things like <br>
|
||||||
1.0.2, unknown release date
|
|
||||||
(bugfix release may be dropped if no bugs found)
|
|
||||||
|
|
||||||
1.0.1, released 2006-09-04
|
1.0.1, released 2006-09-04
|
||||||
- Fixed slight bug in DOMLex attribute parsing
|
- Fixed slight bug in DOMLex attribute parsing
|
||||||
|
3
TODO
3
TODO
@ -5,9 +5,6 @@ Ongoing
|
|||||||
- Lots of profiling, make it faster!
|
- Lots of profiling, make it faster!
|
||||||
- Plugins for major CMSes (very tricky issue)
|
- Plugins for major CMSes (very tricky issue)
|
||||||
|
|
||||||
1.1 release
|
|
||||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
|
||||||
|
|
||||||
1.2 release
|
1.2 release
|
||||||
- Additional support for poorly written HTML
|
- Additional support for poorly written HTML
|
||||||
- Implement all non-essential attribute transforms
|
- Implement all non-essential attribute transforms
|
||||||
|
7
WYSIWYG
7
WYSIWYG
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
WYSIWYG - What You See Is What You Get
|
WYSIWYG - What You See Is What You Get
|
||||||
HTMLPurifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
HTML Purifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||||
|
|
||||||
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
||||||
always been wary about using them due to security issues: they handle the
|
always been wary about using them due to security issues: they handle the
|
||||||
@ -13,6 +13,9 @@ other markup languages still reign supreme. Put simply: filtering HTML is
|
|||||||
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
||||||
trouble. Therein lies the solution:
|
trouble. Therein lies the solution:
|
||||||
|
|
||||||
HTMLPurifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||||
|
|
||||||
Enough said.
|
Enough said.
|
||||||
|
|
||||||
|
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||||
|
bugtracker at http://hp.jpsband.org/mantis/
|
||||||
|
@ -15,6 +15,12 @@ require_once 'HTMLPurifier/AttrContext.php';
|
|||||||
class HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||||
|
* boolean attribute vapourware would use this.
|
||||||
|
*/
|
||||||
|
var $minimized = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract function defined for functions that validate and clean strings.
|
* Abstract function defined for functions that validate and clean strings.
|
||||||
*
|
*
|
||||||
|
@ -15,6 +15,14 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'generateFromTokens.'
|
'generateFromTokens.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigDef::define(
|
||||||
|
'Core', 'XHTML', true, 'bool',
|
||||||
|
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||||
|
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||||
|
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||||
|
'This directive was available since 1.1.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from tokens.
|
* Generates HTML from tokens.
|
||||||
*/
|
*/
|
||||||
@ -22,11 +30,16 @@ class HTMLPurifier_Generator
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool cache of the CleanUTF8DuringGeneration directive.
|
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
var $_clean_utf8 = false;
|
var $_clean_utf8 = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bool cache of %Core.XHTML
|
||||||
|
*/
|
||||||
|
var $_xhtml = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from an array of tokens.
|
* Generates HTML from an array of tokens.
|
||||||
* @param $tokens Array of HTMLPurifier_Token
|
* @param $tokens Array of HTMLPurifier_Token
|
||||||
@ -38,6 +51,7 @@ class HTMLPurifier_Generator
|
|||||||
$html = '';
|
$html = '';
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||||
|
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||||
if (!$tokens) return '';
|
if (!$tokens) return '';
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
@ -61,7 +75,9 @@ class HTMLPurifier_Generator
|
|||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||||
|
( $this->_xhtml ? ' /': '' )
|
||||||
|
. '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
return $this->escape($token->data);
|
return $this->escape($token->data);
|
||||||
@ -80,6 +96,11 @@ class HTMLPurifier_Generator
|
|||||||
function generateAttributes($assoc_array_of_attributes) {
|
function generateAttributes($assoc_array_of_attributes) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||||
|
if (!$this->_xhtml) {
|
||||||
|
// remove namespaced attributes
|
||||||
|
if (strpos($key, ':') !== false) continue;
|
||||||
|
// also needed: check for attribute minimization
|
||||||
|
}
|
||||||
$html .= $key.'="'.$this->escape($value).'" ';
|
$html .= $key.'="'.$this->escape($value).'" ';
|
||||||
}
|
}
|
||||||
return rtrim($html);
|
return rtrim($html);
|
||||||
|
@ -52,10 +52,8 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||||
$expect[7] = $theta_char;
|
$expect[7] = $theta_char;
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
$result = $this->gen->generateFromToken($input);
|
||||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
@ -122,6 +120,34 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var $config;
|
||||||
|
function assertGeneration($tokens, $expect) {
|
||||||
|
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||||
|
$this->assertEqual($expect, $result);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_XHTMLoff() {
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
// omit trailing slash
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Empty('br') ),
|
||||||
|
'<br>'
|
||||||
|
);
|
||||||
|
|
||||||
|
// there should be a test for attribute minimization, but it is
|
||||||
|
// impossible for something like that to happen due to our current
|
||||||
|
// definitions! fix it later
|
||||||
|
|
||||||
|
// namespaced attributes must be dropped
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Start('p', array('xml:lang'=>'fr')) ),
|
||||||
|
'<p>'
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Loading…
Reference in New Issue
Block a user