mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
[3.0.0] Fully implement CSS extraction and cleaning. See NEWS for more information, it is now a Filter.
- Some Lexer things were moved around git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1469 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
831f552ec5
commit
5b3431d889
6
NEWS
6
NEWS
@ -18,8 +18,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
+ PHP4 reference/foreach cruft removed (in progress)
|
+ PHP4 reference/foreach cruft removed (in progress)
|
||||||
! CSS properties are no case-insensitive
|
! CSS properties are no case-insensitive
|
||||||
! DefinitionCacheFactory now can register new implementations
|
! DefinitionCacheFactory now can register new implementations
|
||||||
! <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks.
|
! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting <style> from
|
||||||
These contents can be retrieved from $context->get('StyleBlocks');
|
documents and cleaning their contents up. Requires the CSSTidy library
|
||||||
|
<http://csstidy.sourceforge.net/>. You can access the blocks with the
|
||||||
|
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks'))
|
||||||
. Unit tests for Injector improved
|
. Unit tests for Injector improved
|
||||||
|
|
||||||
2.1.3, released 2007-11-05
|
2.1.3, released 2007-11-05
|
||||||
|
104
library/HTMLPurifier/Filter/ExtractStyleBlocks.php
Normal file
104
library/HTMLPurifier/Filter/ExtractStyleBlocks.php
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Filter.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This filter extracts <style> blocks from input HTML, cleans them up
|
||||||
|
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
|
||||||
|
* so they can be used elsewhere in the document.
|
||||||
|
* @note See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
|
||||||
|
* @todo Allow for selectors to be munged/checked
|
||||||
|
* @todo Expose CSSTidy configuration so that custom changes can be made
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||||
|
{
|
||||||
|
|
||||||
|
public $name = 'ExtractStyleBlocks';
|
||||||
|
private $_styleMatches = array();
|
||||||
|
private $_tidy, $_disableCharacterEscaping;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $tidy Instance of csstidy to use, false to turn off cleaning,
|
||||||
|
* and null to automatically instantiate
|
||||||
|
* @param $disable_character_escaping Whether or not to stop munging
|
||||||
|
* <, > and &. This can be set to true if the CSS will
|
||||||
|
* be placed in an external style and not inline.
|
||||||
|
*/
|
||||||
|
public function __construct($tidy = null, $disable_character_escaping = false) {
|
||||||
|
if ($tidy === null) $tidy = new csstidy();
|
||||||
|
$this->_tidy = $tidy;
|
||||||
|
$this->_disableCharacterEscaping = $disable_character_escaping;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save the contents of CSS blocks to style matches
|
||||||
|
* @param $matches preg_replace style $matches array
|
||||||
|
*/
|
||||||
|
protected function styleCallback($matches) {
|
||||||
|
$this->_styleMatches[] = $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes inline <style> tags from HTML, saves them for later use
|
||||||
|
* @todo Extend to indicate non-text/css style blocks
|
||||||
|
*/
|
||||||
|
public function preFilter($html, $config, &$context) {
|
||||||
|
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||||
|
$style_blocks = $this->_styleMatches;
|
||||||
|
$this->_styleMatches = array(); // reset
|
||||||
|
$context->register('StyleBlocks', $style_blocks); // $context must not be reused
|
||||||
|
if ($this->_tidy) {
|
||||||
|
foreach ($style_blocks as &$style) {
|
||||||
|
$style = $this->cleanCSS($style, $config, $context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes CSS (the stuff found in <style>) and cleans it.
|
||||||
|
* @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
|
||||||
|
* @param $css CSS styling to clean
|
||||||
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
|
* @return Cleaned CSS
|
||||||
|
*/
|
||||||
|
public function cleanCSS($css, $config, &$context) {
|
||||||
|
$this->_tidy->parse($css);
|
||||||
|
$css_definition = $config->getDefinition('CSS');
|
||||||
|
foreach ($this->_tidy->css as &$decls) {
|
||||||
|
// $decls are all CSS declarations inside an @ selector
|
||||||
|
foreach ($decls as &$style) {
|
||||||
|
foreach ($style as $name => $value) {
|
||||||
|
if (!isset($css_definition->info[$name])) {
|
||||||
|
unset($style[$name]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$def = $css_definition->info[$name];
|
||||||
|
$ret = $def->validate($value, $config, $context);
|
||||||
|
if ($ret === false) unset($style[$name]);
|
||||||
|
else $style[$name] = $ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// remove stuff that shouldn't be used, could be reenabled
|
||||||
|
// after security risks are analyzed
|
||||||
|
$this->_tidy->import = array();
|
||||||
|
$this->_tidy->charset = null;
|
||||||
|
$this->_tidy->namespace = null;
|
||||||
|
$printer = new csstidy_print($this->_tidy);
|
||||||
|
$css = $printer->plain();
|
||||||
|
// we are going to escape any special characters <>& to ensure
|
||||||
|
// that no funny business occurs (i.e. </style> in a font-family prop).
|
||||||
|
if (!$this->_disableCharacterEscaping) {
|
||||||
|
$css = str_replace(
|
||||||
|
array('<', '>', '&'),
|
||||||
|
array('\3C ', '\3E ', '\26 '),
|
||||||
|
$css
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return $css;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -5,7 +5,7 @@ require_once 'HTMLPurifier/Filter.php';
|
|||||||
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||||
{
|
{
|
||||||
|
|
||||||
public $name = 'YouTube preservation';
|
public $name = 'YouTube';
|
||||||
|
|
||||||
public function preFilter($html, $config, &$context) {
|
public function preFilter($html, $config, &$context) {
|
||||||
$pre_regex = '#<object[^>]+>.+?'.
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
|
@ -79,14 +79,6 @@ It is not necessary and will have no effect for PHP 4.
|
|||||||
This directive has been available since 2.1.0.
|
This directive has been available since 2.1.0.
|
||||||
');
|
');
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'HTML', 'ExtractStyleBlocks', false, 'bool', '
|
|
||||||
This directive enables extraction of <code>style</code> tags contents so
|
|
||||||
that they can be incorporated in the <code>head</code> of the document,
|
|
||||||
after sufficient validation.
|
|
||||||
This directive has been available since 3.0.0.
|
|
||||||
');
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||||
*
|
*
|
||||||
@ -346,34 +338,6 @@ class HTMLPurifier_Lexer
|
|||||||
// represent non-SGML characters (horror, horror!)
|
// represent non-SGML characters (horror, horror!)
|
||||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||||
|
|
||||||
if ($config->get('HTML', 'ExtractStyleBlocks')) {
|
|
||||||
// extract <style> CSS blocks
|
|
||||||
$html = $this->extractStyleBlocks($html, $config, $context);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $html;
|
|
||||||
}
|
|
||||||
|
|
||||||
private $_styleMatches = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Save the contents of CSS blocks to style matches
|
|
||||||
*/
|
|
||||||
protected function styleCallback($matches) {
|
|
||||||
$this->_styleMatches[] = $matches[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes inline <style> tags from HTML, saves them for later use
|
|
||||||
* @todo Extend to indicate non-text/css style blocks
|
|
||||||
*/
|
|
||||||
public function extractStyleBlocks($html, $config, $context) {
|
|
||||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
|
||||||
$style_blocks = $this->_styleMatches;
|
|
||||||
$this->_styleMatches = array(); // reset
|
|
||||||
// this is a persistent context, so we have to overwrite it with every call
|
|
||||||
if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks');
|
|
||||||
$context->register('StyleBlocks', $style_blocks);
|
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,6 +14,9 @@ set_time_limit(0);
|
|||||||
// Where is SimpleTest located?
|
// Where is SimpleTest located?
|
||||||
$simpletest_location = '/path/to/simpletest/';
|
$simpletest_location = '/path/to/simpletest/';
|
||||||
|
|
||||||
|
// Where is CSSTidy located?
|
||||||
|
$csstidy_location = '/path/to/csstidy/';
|
||||||
|
|
||||||
// How many times should profiling scripts iterate over the function? More runs
|
// How many times should profiling scripts iterate over the function? More runs
|
||||||
// means more accurate results, but they'll take longer to perform.
|
// means more accurate results, but they'll take longer to perform.
|
||||||
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
||||||
|
112
tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
Normal file
112
tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Filter/ExtractStyleBlocks.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @todo Assimilate CSSTidy into our library
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
|
||||||
|
{
|
||||||
|
|
||||||
|
// usual use case:
|
||||||
|
function test_tokenizeHTML_extractStyleBlocks() {
|
||||||
|
$purifier = new HTMLPurifier($this->config);
|
||||||
|
$purifier->addFilter(new HTMLPurifier_Filter_ExtractStyleBlocks());
|
||||||
|
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;}</style>Test<style>* {font-size:12pt;}</style>');
|
||||||
|
$this->assertIdentical($result, 'Test');
|
||||||
|
$this->assertIdentical($purifier->context->get('StyleBlocks'),
|
||||||
|
array(
|
||||||
|
".foo {\ntext-align:center;\n}",
|
||||||
|
"* {\nfont-size:12pt;\n}"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
||||||
|
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks(false); // disable cleaning
|
||||||
|
if ($expect === true) $expect = $html;
|
||||||
|
$result = $filter->preFilter($html, $this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, $expect);
|
||||||
|
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_preserve() {
|
||||||
|
$this->assertExtractStyleBlocks('Foobar');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_allStyle() {
|
||||||
|
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_multipleBlocks() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
"<style>1</style><style>2</style>NOP<style>4</style>",
|
||||||
|
"NOP",
|
||||||
|
array('1', '2', '4')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_blockWithAttributes() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
'<style type="text/css">css</style>',
|
||||||
|
'',
|
||||||
|
array('css')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_styleWithPadding() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
||||||
|
"Alas<styled>Awesome</styled>\n Trendy!",
|
||||||
|
array('foo')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertCleanCSS($input, $expect = true) {
|
||||||
|
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks();
|
||||||
|
if ($expect === true) $expect = $input;
|
||||||
|
$result = $filter->cleanCSS($input, $this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, $expect);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_malformed() {
|
||||||
|
$this->assertCleanCSS('</style>', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_selector() {
|
||||||
|
$this->assertCleanCSS("a .foo #id div.cl#foo {\nfont-weight:700;\n}");
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_angledBrackets() {
|
||||||
|
$this->assertCleanCSS(
|
||||||
|
".class {\nfont-family:'</style>';\n}",
|
||||||
|
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_angledBrackets2() {
|
||||||
|
// CSSTidy's behavior in this case is wrong, and should be fixed
|
||||||
|
//$this->assertCleanCSS(
|
||||||
|
// "span[title=\"</style>\"] {\nfont-size:12pt;\n}",
|
||||||
|
// "span[title=\"\\3C /style\\3E \"] {\nfont-size:12pt;\n}"
|
||||||
|
//);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_bogus() {
|
||||||
|
$this->assertCleanCSS("div {bogus:tree;}", "div {\n}");
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_escapeCodes() {
|
||||||
|
$this->assertCleanCSS(
|
||||||
|
".class {\nfont-family:'\\3C /style\\3E ';\n}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_cleanCSS_noEscapeCodes() {
|
||||||
|
$filter = new HTMLPurifier_Filter_ExtractStyleBlocks(null, true);
|
||||||
|
$input = ".class {\nfont-family:'</style>';\n}";
|
||||||
|
$result = $filter->cleanCSS($input, $this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, $input);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -31,48 +31,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
||||||
}
|
}
|
||||||
|
|
||||||
// HTMLPurifier_Lexer->extractStyleBlocks() --------------------------------
|
|
||||||
|
|
||||||
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
|
||||||
$lexer = HTMLPurifier_Lexer::create($this->config);
|
|
||||||
if ($expect === true) $expect = $html;
|
|
||||||
$result = $lexer->extractStyleBlocks($html, $this->config, $this->context);
|
|
||||||
$this->assertIdentical($result, $expect);
|
|
||||||
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_extractStyleBlocks_preserve() {
|
|
||||||
$this->assertExtractStyleBlocks('Foobar');
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_extractStyleBlocks_allStyle() {
|
|
||||||
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_extractStyleBlocks_multipleBlocks() {
|
|
||||||
$this->assertExtractStyleBlocks(
|
|
||||||
"<style>1</style><style>2</style>NOP<style>4</style>",
|
|
||||||
"NOP",
|
|
||||||
array('1', '2', '4')
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_extractStyleBlocks_blockWithAttributes() {
|
|
||||||
$this->assertExtractStyleBlocks(
|
|
||||||
'<style type="text/css">css</style>',
|
|
||||||
'',
|
|
||||||
array('css')
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function test_extractStyleBlocks_styleWithPadding() {
|
|
||||||
$this->assertExtractStyleBlocks(
|
|
||||||
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
|
||||||
"Alas<styled>Awesome</styled>\n Trendy!",
|
|
||||||
array('foo')
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
||||||
|
|
||||||
function assertParseData($input, $expect = true) {
|
function assertParseData($input, $expect = true) {
|
||||||
@ -553,17 +511,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_tokenizeHTML_extractStyleBlocks() {
|
|
||||||
$this->config->set('HTML', 'ExtractStyleBlocks', true);
|
|
||||||
$this->assertTokenization(
|
|
||||||
'<style type="text/css">.foo {text-align:center;}</style>Test',
|
|
||||||
array(
|
|
||||||
new HTMLPurifier_Token_Text('Test')
|
|
||||||
)
|
|
||||||
);
|
|
||||||
$this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}'));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
function test_tokenizeHTML_() {
|
function test_tokenizeHTML_() {
|
||||||
|
@ -16,6 +16,7 @@ $GLOBALS['HTMLPurifierTest'] = array();
|
|||||||
$GLOBALS['HTMLPurifierTest']['PEAR'] = false; // do PEAR tests
|
$GLOBALS['HTMLPurifierTest']['PEAR'] = false; // do PEAR tests
|
||||||
$GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument');
|
$GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") && class_exists('DOMDocument');
|
||||||
$simpletest_location = 'simpletest/'; // reasonable guess
|
$simpletest_location = 'simpletest/'; // reasonable guess
|
||||||
|
$csstidy_location = false;
|
||||||
|
|
||||||
// load SimpleTest
|
// load SimpleTest
|
||||||
if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
|
if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
|
||||||
@ -24,6 +25,11 @@ require_once $simpletest_location . 'unit_tester.php';
|
|||||||
require_once $simpletest_location . 'reporter.php';
|
require_once $simpletest_location . 'reporter.php';
|
||||||
require_once $simpletest_location . 'mock_objects.php';
|
require_once $simpletest_location . 'mock_objects.php';
|
||||||
|
|
||||||
|
if ($csstidy_location !== false) {
|
||||||
|
require_once $csstidy_location . 'class.csstidy.php';
|
||||||
|
require_once $csstidy_location . 'class.csstidy_print.php';
|
||||||
|
}
|
||||||
|
|
||||||
error_reporting(E_ALL | E_STRICT); // after SimpleTest is loaded, turn on compile time errors
|
error_reporting(E_ALL | E_STRICT); // after SimpleTest is loaded, turn on compile time errors
|
||||||
|
|
||||||
// load Debugger
|
// load Debugger
|
||||||
|
@ -75,6 +75,7 @@ $test_files[] = 'HTMLPurifier/ErrorCollectorTest.php';
|
|||||||
$test_files[] = 'HTMLPurifier/EncoderTest.php';
|
$test_files[] = 'HTMLPurifier/EncoderTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/EntityLookupTest.php';
|
$test_files[] = 'HTMLPurifier/EntityLookupTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/EntityParserTest.php';
|
$test_files[] = 'HTMLPurifier/EntityParserTest.php';
|
||||||
|
$test_files[] = 'HTMLPurifier/Filter/ExtractStyleBlocksTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/GeneratorTest.php';
|
$test_files[] = 'HTMLPurifier/GeneratorTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
|
$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
|
||||||
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
|
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
|
||||||
|
Loading…
Reference in New Issue
Block a user