mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 08:21:52 +00:00
[3.0.0] Fully implement CSS extraction and cleaning. See NEWS for more information, it is now a Filter.
- Some Lexer things were moved around git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1469 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
831f552ec5
commit
5b3431d889
6
NEWS
6
NEWS
@ -18,8 +18,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
+ PHP4 reference/foreach cruft removed (in progress)
|
||||
! CSS properties are no case-insensitive
|
||||
! DefinitionCacheFactory now can register new implementations
|
||||
! <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks.
|
||||
These contents can be retrieved from $context->get('StyleBlocks');
|
||||
! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting <style> from
|
||||
documents and cleaning their contents up. Requires the CSSTidy library
|
||||
<http://csstidy.sourceforge.net/>. You can access the blocks with the
|
||||
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks'))
|
||||
. Unit tests for Injector improved
|
||||
|
||||
2.1.3, released 2007-11-05
|
||||
|
104
library/HTMLPurifier/Filter/ExtractStyleBlocks.php
Normal file
104
library/HTMLPurifier/Filter/ExtractStyleBlocks.php
Normal file
@ -0,0 +1,104 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Filter.php';
|
||||
|
||||
/**
|
||||
* This filter extracts <style> blocks from input HTML, cleans them up
|
||||
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
|
||||
* so they can be used elsewhere in the document.
|
||||
* @note See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
|
||||
* @todo Allow for selectors to be munged/checked
|
||||
* @todo Expose CSSTidy configuration so that custom changes can be made
|
||||
*/
|
||||
class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
public $name = 'ExtractStyleBlocks';
|
||||
private $_styleMatches = array();
|
||||
private $_tidy, $_disableCharacterEscaping;
|
||||
|
||||
/**
|
||||
* @param $tidy Instance of csstidy to use, false to turn off cleaning,
|
||||
* and null to automatically instantiate
|
||||
* @param $disable_character_escaping Whether or not to stop munging
|
||||
* <, > and &. This can be set to true if the CSS will
|
||||
* be placed in an external style and not inline.
|
||||
*/
|
||||
public function __construct($tidy = null, $disable_character_escaping = false) {
|
||||
if ($tidy === null) $tidy = new csstidy();
|
||||
$this->_tidy = $tidy;
|
||||
$this->_disableCharacterEscaping = $disable_character_escaping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the contents of CSS blocks to style matches
|
||||
* @param $matches preg_replace style $matches array
|
||||
*/
|
||||
protected function styleCallback($matches) {
|
||||
$this->_styleMatches[] = $matches[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes inline <style> tags from HTML, saves them for later use
|
||||
* @todo Extend to indicate non-text/css style blocks
|
||||
*/
|
||||
public function preFilter($html, $config, &$context) {
|
||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||
$style_blocks = $this->_styleMatches;
|
||||
$this->_styleMatches = array(); // reset
|
||||
$context->register('StyleBlocks', $style_blocks); // $context must not be reused
|
||||
if ($this->_tidy) {
|
||||
foreach ($style_blocks as &$style) {
|
||||
$style = $this->cleanCSS($style, $config, $context);
|
||||
}
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes CSS (the stuff found in <style>) and cleans it.
|
||||
* @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
|
||||
* @param $css CSS styling to clean
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param $context Instance of HTMLPurifier_Context
|
||||
* @return Cleaned CSS
|
||||
*/
|
||||
public function cleanCSS($css, $config, &$context) {
|
||||
$this->_tidy->parse($css);
|
||||
$css_definition = $config->getDefinition('CSS');
|
||||
foreach ($this->_tidy->css as &$decls) {
|
||||
// $decls are all CSS declarations inside an @ selector
|
||||
foreach ($decls as &$style) {
|
||||
foreach ($style as $name => $value) {
|
||||
if (!isset($css_definition->info[$name])) {
|
||||
unset($style[$name]);
|
||||
continue;
|
||||
}
|
||||
$def = $css_definition->info[$name];
|
||||
$ret = $def->validate($value, $config, $context);
|
||||
if ($ret === false) unset($style[$name]);
|
||||
else $style[$name] = $ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
// remove stuff that shouldn't be used, could be reenabled
|
||||
// after security risks are analyzed
|
||||
$this->_tidy->import = array();
|
||||
$this->_tidy->charset = null;
|
||||
$this->_tidy->namespace = null;
|
||||
$printer = new csstidy_print($this->_tidy);
|
||||
$css = $printer->plain();
|
||||
// we are going to escape any special characters <>& to ensure
|
||||
// that no funny business occurs (i.e. </style> in a font-family prop).
|
||||
if (!$this->_disableCharacterEscaping) {
|
||||
$css = str_replace(
|
||||
array('<', '>', '&'),
|
||||
array('\3C ', '\3E ', '\26 '),
|
||||
$css
|
||||
);
|
||||
}
|
||||
return $css;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/Filter.php';
|
||||
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
public $name = 'YouTube preservation';
|
||||
public $name = 'YouTube';
|
||||
|
||||
public function preFilter($html, $config, &$context) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
|
@ -79,14 +79,6 @@ It is not necessary and will have no effect for PHP 4.
|
||||
This directive has been available since 2.1.0.
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'ExtractStyleBlocks', false, 'bool', '
|
||||
This directive enables extraction of <code>style</code> tags contents so
|
||||
that they can be incorporated in the <code>head</code> of the document,
|
||||
after sufficient validation.
|
||||
This directive has been available since 3.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
@ -346,34 +338,6 @@ class HTMLPurifier_Lexer
|
||||
// represent non-SGML characters (horror, horror!)
|
||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||
|
||||
if ($config->get('HTML', 'ExtractStyleBlocks')) {
|
||||
// extract <style> CSS blocks
|
||||
$html = $this->extractStyleBlocks($html, $config, $context);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
private $_styleMatches = array();
|
||||
|
||||
/**
|
||||
* Save the contents of CSS blocks to style matches
|
||||
*/
|
||||
protected function styleCallback($matches) {
|
||||
$this->_styleMatches[] = $matches[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes inline <style> tags from HTML, saves them for later use
|
||||
* @todo Extend to indicate non-text/css style blocks
|
||||
*/
|
||||
public function extractStyleBlocks($html, $config, $context) {
|
||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||
$style_blocks = $this->_styleMatches;
|
||||
$this->_styleMatches = array(); // reset
|
||||
// this is a persistent context, so we have to overwrite it with every call
|
||||
if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks');
|
||||
$context->register('StyleBlocks', $style_blocks);
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,9 @@ set_time_limit(0);
|
||||
// Where is SimpleTest located?
|
||||
$simpletest_location = '/path/to/simpletest/';
|
||||
|
||||
// Where is CSSTidy located?
|
||||
$csstidy_location = '/path/to/csstidy/';
|
||||
|
||||
// How many times should profiling scripts iterate over the function? More runs
|
||||
// means more accurate results, but they'll take longer to perform.
|
||||
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
||||
|
112
tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
Normal file
112
tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
Normal file
@ -0,0 +1,112 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Filter/ExtractStyleBlocks.php';
|
||||
|
||||
/**
|
||||
* @todo Assimilate CSSTidy into our library
|
||||
*/
|
||||
class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||
{
|
||||
|
||||
// usual use case:
|
||||
function test_tokenizeHTML_extractStyleBlocks() {
|
||||
$purifier = new HTMLPurifier($this->config);
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_ExtractStyleBlocks());
|
||||
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;}</style>Test<style>* {font-size:12pt;}</style>');
|
||||
$this->assertIdentical($result, 'Test');
|
||||
$this->assertIdentical($purifier->context->get('StyleBlocks'),
|
||||
array(
|
||||
".foo {\ntext-align:center;\n}",
|
||||
"* {\nfont-size:12pt;\n}"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
||||
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks(false); // disable cleaning
|
||||
if ($expect === true) $expect = $html;
|
||||
$result = $filter->preFilter($html, $this->config, $this->context);
|
||||
$this->assertIdentical($result, $expect);
|
||||
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_preserve() {
|
||||
$this->assertExtractStyleBlocks('Foobar');
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_allStyle() {
|
||||
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_multipleBlocks() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"<style>1</style><style>2</style>NOP<style>4</style>",
|
||||
"NOP",
|
||||
array('1', '2', '4')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_blockWithAttributes() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
'<style type="text/css">css</style>',
|
||||
'',
|
||||
array('css')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_styleWithPadding() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
||||
"Alas<styled>Awesome</styled>\n Trendy!",
|
||||
array('foo')
|
||||
);
|
||||
}
|
||||
|
||||
function assertCleanCSS($input, $expect = true) {
|
||||
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks();
|
||||
if ($expect === true) $expect = $input;
|
||||
$result = $filter->cleanCSS($input, $this->config, $this->context);
|
||||
$this->assertIdentical($result, $expect);
|
||||
}
|
||||
|
||||
function test_cleanCSS_malformed() {
|
||||
$this->assertCleanCSS('</style>', '');
|
||||
}
|
||||
|
||||
function test_cleanCSS_selector() {
|
||||
$this->assertCleanCSS("a .foo #id div.cl#foo {\nfont-weight:700;\n}");
|
||||
}
|
||||
|
||||
function test_cleanCSS_angledBrackets() {
|
||||
$this->assertCleanCSS(
|
||||
".class {\nfont-family:'</style>';\n}",
|
||||
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||
);
|
||||
}
|
||||
|
||||
function test_cleanCSS_angledBrackets2() {
|
||||
// CSSTidy's behavior in this case is wrong, and should be fixed
|
||||
//$this->assertCleanCSS(
|
||||
// "span[title=\"</style>\"] {\nfont-size:12pt;\n}",
|
||||
// "span[title=\"\\3C /style\\3E \"] {\nfont-size:12pt;\n}"
|
||||
//);
|
||||
}
|
||||
|
||||
function test_cleanCSS_bogus() {
|
||||
$this->assertCleanCSS("div {bogus:tree;}", "div {\n}");
|
||||
}
|
||||
|
||||
function test_cleanCSS_escapeCodes() {
|
||||
$this->assertCleanCSS(
|
||||
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||
);
|
||||
}
|
||||
|
||||
function test_cleanCSS_noEscapeCodes() {
|
||||
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks(null, true);
|
||||
$input = ".class {\nfont-family:'</style>';\n}";
|
||||
$result = $filter->cleanCSS($input, $this->config, $this->context);
|
||||
$this->assertIdentical($result, $input);
|
||||
}
|
||||
|
||||
}
|
@ -31,48 +31,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
||||
}
|
||||
|
||||
// HTMLPurifier_Lexer->extractStyleBlocks() --------------------------------
|
||||
|
||||
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
||||
$lexer = HTMLPurifier_Lexer::create($this->config);
|
||||
if ($expect === true) $expect = $html;
|
||||
$result = $lexer->extractStyleBlocks($html, $this->config, $this->context);
|
||||
$this->assertIdentical($result, $expect);
|
||||
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_preserve() {
|
||||
$this->assertExtractStyleBlocks('Foobar');
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_allStyle() {
|
||||
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_multipleBlocks() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"<style>1</style><style>2</style>NOP<style>4</style>",
|
||||
"NOP",
|
||||
array('1', '2', '4')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_blockWithAttributes() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
'<style type="text/css">css</style>',
|
||||
'',
|
||||
array('css')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_styleWithPadding() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
||||
"Alas<styled>Awesome</styled>\n Trendy!",
|
||||
array('foo')
|
||||
);
|
||||
}
|
||||
|
||||
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
||||
|
||||
function assertParseData($input, $expect = true) {
|
||||
@ -553,17 +511,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_extractStyleBlocks() {
|
||||
$this->config->set('HTML', 'ExtractStyleBlocks', true);
|
||||
$this->assertTokenization(
|
||||
'<style type="text/css">.foo {text-align:center;}</style>Test',
|
||||
array(
|
||||
new HTMLPurifier_Token_Text('Test')
|
||||
)
|
||||
);
|
||||
$this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}'));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
|
@ -16,6 +16,7 @@ $GLOBALS['HTMLPurifierTest'] = array();
|
||||
$GLOBALS['HTMLPurifierTest']['PEAR'] = false; // do PEAR tests
|
||||
$GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument');
|
||||
$simpletest_location = 'simpletest/'; // reasonable guess
|
||||
$csstidy_location = false;
|
||||
|
||||
// load SimpleTest
|
||||
if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
|
||||
@ -24,6 +25,11 @@ require_once $simpletest_location . 'unit_tester.php';
|
||||
require_once $simpletest_location . 'reporter.php';
|
||||
require_once $simpletest_location . 'mock_objects.php';
|
||||
|
||||
if ($csstidy_location !== false) {
|
||||
require_once $csstidy_location . 'class.csstidy.php';
|
||||
require_once $csstidy_location . 'class.csstidy_print.php';
|
||||
}
|
||||
|
||||
error_reporting(E_ALL | E_STRICT); // after SimpleTest is loaded, turn on compile time errors
|
||||
|
||||
// load Debugger
|
||||
|
@ -75,6 +75,7 @@ $test_files[] = 'HTMLPurifier/ErrorCollectorTest.php';
|
||||
$test_files[] = 'HTMLPurifier/EncoderTest.php';
|
||||
$test_files[] = 'HTMLPurifier/EntityLookupTest.php';
|
||||
$test_files[] = 'HTMLPurifier/EntityParserTest.php';
|
||||
$test_files[] = 'HTMLPurifier/Filter/ExtractStyleBlocksTest.php';
|
||||
$test_files[] = 'HTMLPurifier/GeneratorTest.php';
|
||||
$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
|
||||
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
|
||||
|
Loading…
Reference in New Issue
Block a user