mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-22 16:31:53 +00:00
[3.0.0] <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks. These contents can be retrieved from $context->get('StyleBlocks');
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1468 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
54b37674f1
commit
831f552ec5
2
NEWS
2
NEWS
@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
+ PHP4 reference/foreach cruft removed (in progress)
|
||||
! CSS properties are no case-insensitive
|
||||
! DefinitionCacheFactory now can register new implementations
|
||||
! <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks.
|
||||
These contents can be retrieved from $context->get('StyleBlocks');
|
||||
. Unit tests for Injector improved
|
||||
|
||||
2.1.3, released 2007-11-05
|
||||
|
@ -79,6 +79,14 @@ It is not necessary and will have no effect for PHP 4.
|
||||
This directive has been available since 2.1.0.
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'ExtractStyleBlocks', false, 'bool', '
|
||||
This directive enables extraction of <code>style</code> tags contents so
|
||||
that they can be incorporated in the <code>head</code> of the document,
|
||||
after sufficient validation.
|
||||
This directive has been available since 3.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
@ -338,6 +346,34 @@ class HTMLPurifier_Lexer
|
||||
// represent non-SGML characters (horror, horror!)
|
||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||
|
||||
if ($config->get('HTML', 'ExtractStyleBlocks')) {
|
||||
// extract <style> CSS blocks
|
||||
$html = $this->extractStyleBlocks($html, $config, $context);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
private $_styleMatches = array();
|
||||
|
||||
/**
|
||||
* Save the contents of CSS blocks to style matches
|
||||
*/
|
||||
protected function styleCallback($matches) {
|
||||
$this->_styleMatches[] = $matches[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes inline <style> tags from HTML, saves them for later use
|
||||
* @todo Extend to indicate non-text/css style blocks
|
||||
*/
|
||||
public function extractStyleBlocks($html, $config, $context) {
|
||||
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||
$style_blocks = $this->_styleMatches;
|
||||
$this->_styleMatches = array(); // reset
|
||||
// this is a persistent context, so we have to overwrite it with every call
|
||||
if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks');
|
||||
$context->register('StyleBlocks', $style_blocks);
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
@ -186,6 +186,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
|
||||
$ret .= '<html><head>';
|
||||
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
||||
// No protection if $html contains a stray </div>!
|
||||
$ret .= '</head><body><div>'.$html.'</div></body></html>';
|
||||
return $ret;
|
||||
}
|
||||
|
@ -31,6 +31,48 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
||||
}
|
||||
|
||||
// HTMLPurifier_Lexer->extractStyleBlocks() --------------------------------
|
||||
|
||||
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
||||
$lexer = HTMLPurifier_Lexer::create($this->config);
|
||||
if ($expect === true) $expect = $html;
|
||||
$result = $lexer->extractStyleBlocks($html, $this->config, $this->context);
|
||||
$this->assertIdentical($result, $expect);
|
||||
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_preserve() {
|
||||
$this->assertExtractStyleBlocks('Foobar');
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_allStyle() {
|
||||
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_multipleBlocks() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"<style>1</style><style>2</style>NOP<style>4</style>",
|
||||
"NOP",
|
||||
array('1', '2', '4')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_blockWithAttributes() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
'<style type="text/css">css</style>',
|
||||
'',
|
||||
array('css')
|
||||
);
|
||||
}
|
||||
|
||||
function test_extractStyleBlocks_styleWithPadding() {
|
||||
$this->assertExtractStyleBlocks(
|
||||
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
||||
"Alas<styled>Awesome</styled>\n Trendy!",
|
||||
array('foo')
|
||||
);
|
||||
}
|
||||
|
||||
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
||||
|
||||
function assertParseData($input, $expect = true) {
|
||||
@ -511,6 +553,17 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_extractStyleBlocks() {
|
||||
$this->config->set('HTML', 'ExtractStyleBlocks', true);
|
||||
$this->assertTokenization(
|
||||
'<style type="text/css">.foo {text-align:center;}</style>Test',
|
||||
array(
|
||||
new HTMLPurifier_Token_Text('Test')
|
||||
)
|
||||
);
|
||||
$this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}'));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
|
Loading…
Reference in New Issue
Block a user