mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
- XHTML generation can now be turned off, allowing things like <br>
- Docs updated in preparation for 1.1 release git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@422 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
6a33945499
commit
6740ba61af
28
INSTALL
28
INSTALL
@ -26,7 +26,7 @@ any earlier versions.
|
||||
|
||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
||||
but operating system is quite irrelevant in this particular case.
|
||||
but operating system should not be a major factor in the library.
|
||||
|
||||
|
||||
|
||||
@ -70,21 +70,36 @@ I cannot stress the importance of these two bullets enough. Omitting either
|
||||
of them could have dire consequences not only for security but for plain
|
||||
old usability. You can find a more in-depth discussion of why this is needed
|
||||
in docs/security.txt, in the meantime, try to change your output so this is
|
||||
the case.
|
||||
the case. If you can't, well, we might be able to accomodate you (read
|
||||
section 3).
|
||||
|
||||
|
||||
|
||||
3. Configuring HTML Purifier
|
||||
|
||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||
Purifier needs to be told what to do.
|
||||
|
||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
||||
dependent on iconv, and you'll be missing characters if the charset you
|
||||
choose doesn't have them.
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||
|
||||
An example usage for Latin-1 websites:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
|
||||
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||
XHTML output like this:
|
||||
|
||||
$config->set('Core', 'XHTML', false);
|
||||
|
||||
However, I strongly recommend that you use XHTML. Currently, we can only
|
||||
guarantee transitional-complaint output, future versions will also allow strict
|
||||
output.
|
||||
|
||||
|
||||
|
||||
3. Using the code
|
||||
@ -106,7 +121,7 @@ advice on what to do if HTML Purifier is slowing down your application.
|
||||
|
||||
4. Quick install
|
||||
|
||||
If your website is in UTF-8, use this code:
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
set_include_path('/path/to/htmlpurifier/library'
|
||||
@ -116,7 +131,7 @@ If your website is in UTF-8, use this code:
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
|
||||
If your website is in a different encoding, use this code:
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
set_include_path('/path/to/htmlpurifier/library'
|
||||
@ -125,6 +140,7 @@ If your website is in a different encoding, use this code:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
|
6
NEWS
6
NEWS
@ -1,15 +1,13 @@
|
||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
|
||||
1.1.0, unknown release date
|
||||
1.1.0, projected 2006-09-16
|
||||
- Made URI validator more forgiving: will ignore leading and trailing
|
||||
quotes, apostrophes and less than or greater than signs.
|
||||
- Enforce alphanumeric namespace and directive names for configuration.
|
||||
- Directive documentation generation using XSLT
|
||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||
|
||||
1.0.2, unknown release date
|
||||
(bugfix release may be dropped if no bugs found)
|
||||
- XHTML generation can now be turned off, allowing things like <br>
|
||||
|
||||
1.0.1, released 2006-09-04
|
||||
- Fixed slight bug in DOMLex attribute parsing
|
||||
|
3
TODO
3
TODO
@ -5,9 +5,6 @@ Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
1.1 release
|
||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
||||
|
||||
1.2 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
|
7
WYSIWYG
7
WYSIWYG
@ -1,6 +1,6 @@
|
||||
|
||||
WYSIWYG - What You See Is What You Get
|
||||
HTMLPurifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
HTML Purifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
|
||||
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
||||
always been wary about using them due to security issues: they handle the
|
||||
@ -13,6 +13,9 @@ other markup languages still reign supreme. Put simply: filtering HTML is
|
||||
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
||||
trouble. Therein lies the solution:
|
||||
|
||||
HTMLPurifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
|
||||
Enough said.
|
||||
|
||||
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||
bugtracker at http://hp.jpsband.org/mantis/
|
||||
|
@ -15,6 +15,12 @@ require_once 'HTMLPurifier/AttrContext.php';
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||
* boolean attribute vapourware would use this.
|
||||
*/
|
||||
var $minimized = false;
|
||||
|
||||
/**
|
||||
* Abstract function defined for functions that validate and clean strings.
|
||||
*
|
||||
|
@ -15,6 +15,14 @@ HTMLPurifier_ConfigDef::define(
|
||||
'generateFromTokens.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
'Core', 'XHTML', true, 'bool',
|
||||
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||
'This directive was available since 1.1.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Generates HTML from tokens.
|
||||
*/
|
||||
@ -22,11 +30,16 @@ class HTMLPurifier_Generator
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool cache of the CleanUTF8DuringGeneration directive.
|
||||
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||
* @private
|
||||
*/
|
||||
var $_clean_utf8 = false;
|
||||
|
||||
/**
|
||||
* Bool cache of %Core.XHTML
|
||||
*/
|
||||
var $_xhtml = true;
|
||||
|
||||
/**
|
||||
* Generates HTML from an array of tokens.
|
||||
* @param $tokens Array of HTMLPurifier_Token
|
||||
@ -38,6 +51,7 @@ class HTMLPurifier_Generator
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||
if (!$tokens) return '';
|
||||
foreach ($tokens as $token) {
|
||||
$html .= $this->generateFromToken($token);
|
||||
@ -61,7 +75,9 @@ class HTMLPurifier_Generator
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||
( $this->_xhtml ? ' /': '' )
|
||||
. '>';
|
||||
|
||||
} elseif ($token->type == 'text') {
|
||||
return $this->escape($token->data);
|
||||
@ -80,6 +96,11 @@ class HTMLPurifier_Generator
|
||||
function generateAttributes($assoc_array_of_attributes) {
|
||||
$html = '';
|
||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||
if (!$this->_xhtml) {
|
||||
// remove namespaced attributes
|
||||
if (strpos($key, ':') !== false) continue;
|
||||
// also needed: check for attribute minimization
|
||||
}
|
||||
$html .= $key.'="'.$this->escape($value).'" ';
|
||||
}
|
||||
return rtrim($html);
|
||||
|
@ -52,10 +52,8 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||
$expect[7] = $theta_char;
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
||||
$result = $this->gen->generateFromToken($input);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@ -122,6 +120,34 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
var $config;
|
||||
function assertGeneration($tokens, $expect) {
|
||||
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||
$this->assertEqual($expect, $result);
|
||||
}
|
||||
|
||||
function test_generateFromTokens_XHTMLoff() {
|
||||
$this->config = HTMLPurifier_Config::createDefault();
|
||||
$this->config->set('Core', 'XHTML', false);
|
||||
|
||||
// omit trailing slash
|
||||
$this->assertGeneration(
|
||||
array( new HTMLPurifier_Token_Empty('br') ),
|
||||
'<br>'
|
||||
);
|
||||
|
||||
// there should be a test for attribute minimization, but it is
|
||||
// impossible for something like that to happen due to our current
|
||||
// definitions! fix it later
|
||||
|
||||
// namespaced attributes must be dropped
|
||||
$this->assertGeneration(
|
||||
array( new HTMLPurifier_Token_Start('p', array('xml:lang'=>'fr')) ),
|
||||
'<p>'
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Loading…
Reference in New Issue
Block a user