mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2024-12-23 00:41:52 +00:00
[3.0.0] <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks. These contents can be retrieved from $context->get('StyleBlocks');
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1468 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
54b37674f1
commit
831f552ec5
2
NEWS
2
NEWS
@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
+ PHP4 reference/foreach cruft removed (in progress)
|
+ PHP4 reference/foreach cruft removed (in progress)
|
||||||
! CSS properties are no case-insensitive
|
! CSS properties are no case-insensitive
|
||||||
! DefinitionCacheFactory now can register new implementations
|
! DefinitionCacheFactory now can register new implementations
|
||||||
|
! <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks.
|
||||||
|
These contents can be retrieved from $context->get('StyleBlocks');
|
||||||
. Unit tests for Injector improved
|
. Unit tests for Injector improved
|
||||||
|
|
||||||
2.1.3, released 2007-11-05
|
2.1.3, released 2007-11-05
|
||||||
|
@ -79,6 +79,14 @@ It is not necessary and will have no effect for PHP 4.
|
|||||||
This directive has been available since 2.1.0.
|
This directive has been available since 2.1.0.
|
||||||
');
|
');
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'ExtractStyleBlocks', false, 'bool', '
|
||||||
|
This directive enables extraction of <code>style</code> tags contents so
|
||||||
|
that they can be incorporated in the <code>head</code> of the document,
|
||||||
|
after sufficient validation.
|
||||||
|
This directive has been available since 3.0.0.
|
||||||
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||||
*
|
*
|
||||||
@ -338,6 +346,34 @@ class HTMLPurifier_Lexer
|
|||||||
// represent non-SGML characters (horror, horror!)
|
// represent non-SGML characters (horror, horror!)
|
||||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||||
|
|
||||||
|
if ($config->get('HTML', 'ExtractStyleBlocks')) {
|
||||||
|
// extract <style> CSS blocks
|
||||||
|
$html = $this->extractStyleBlocks($html, $config, $context);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
|
||||||
|
private $_styleMatches = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save the contents of CSS blocks to style matches
|
||||||
|
*/
|
||||||
|
protected function styleCallback($matches) {
|
||||||
|
$this->_styleMatches[] = $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes inline <style> tags from HTML, saves them for later use
|
||||||
|
* @todo Extend to indicate non-text/css style blocks
|
||||||
|
*/
|
||||||
|
public function extractStyleBlocks($html, $config, $context) {
|
||||||
|
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
|
||||||
|
$style_blocks = $this->_styleMatches;
|
||||||
|
$this->_styleMatches = array(); // reset
|
||||||
|
// this is a persistent context, so we have to overwrite it with every call
|
||||||
|
if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks');
|
||||||
|
$context->register('StyleBlocks', $style_blocks);
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,6 +186,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
$ret .= '<html><head>';
|
$ret .= '<html><head>';
|
||||||
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
||||||
|
// No protection if $html contains a stray </div>!
|
||||||
$ret .= '</head><body><div>'.$html.'</div></body></html>';
|
$ret .= '</head><body><div>'.$html.'</div></body></html>';
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,48 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HTMLPurifier_Lexer->extractStyleBlocks() --------------------------------
|
||||||
|
|
||||||
|
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
|
||||||
|
$lexer = HTMLPurifier_Lexer::create($this->config);
|
||||||
|
if ($expect === true) $expect = $html;
|
||||||
|
$result = $lexer->extractStyleBlocks($html, $this->config, $this->context);
|
||||||
|
$this->assertIdentical($result, $expect);
|
||||||
|
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_preserve() {
|
||||||
|
$this->assertExtractStyleBlocks('Foobar');
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_allStyle() {
|
||||||
|
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_multipleBlocks() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
"<style>1</style><style>2</style>NOP<style>4</style>",
|
||||||
|
"NOP",
|
||||||
|
array('1', '2', '4')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_blockWithAttributes() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
'<style type="text/css">css</style>',
|
||||||
|
'',
|
||||||
|
array('css')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_extractStyleBlocks_styleWithPadding() {
|
||||||
|
$this->assertExtractStyleBlocks(
|
||||||
|
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
|
||||||
|
"Alas<styled>Awesome</styled>\n Trendy!",
|
||||||
|
array('foo')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
// HTMLPurifier_Lexer->parseData() -----------------------------------------
|
||||||
|
|
||||||
function assertParseData($input, $expect = true) {
|
function assertParseData($input, $expect = true) {
|
||||||
@ -511,6 +553,17 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_tokenizeHTML_extractStyleBlocks() {
|
||||||
|
$this->config->set('HTML', 'ExtractStyleBlocks', true);
|
||||||
|
$this->assertTokenization(
|
||||||
|
'<style type="text/css">.foo {text-align:center;}</style>Test',
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Text('Test')
|
||||||
|
)
|
||||||
|
);
|
||||||
|
$this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}'));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
function test_tokenizeHTML_() {
|
function test_tokenizeHTML_() {
|
||||||
|
Loading…
Reference in New Issue
Block a user