0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00

[3.0.0] <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks. These contents can be retrieved from $context->get('StyleBlocks');

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1468 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-12-12 03:29:12 +00:00
parent 54b37674f1
commit 831f552ec5
4 changed files with 92 additions and 0 deletions

2
NEWS
View File

@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
+ PHP4 reference/foreach cruft removed (in progress) + PHP4 reference/foreach cruft removed (in progress)
! CSS properties are no case-insensitive ! CSS properties are no case-insensitive
! DefinitionCacheFactory now can register new implementations ! DefinitionCacheFactory now can register new implementations
! <style> tags can now be extracted from input HTML using %HTML.ExtractStyleBlocks.
These contents can be retrieved from $context->get('StyleBlocks');
. Unit tests for Injector improved . Unit tests for Injector improved
2.1.3, released 2007-11-05 2.1.3, released 2007-11-05

View File

@ -79,6 +79,14 @@ It is not necessary and will have no effect for PHP 4.
This directive has been available since 2.1.0. This directive has been available since 2.1.0.
'); ');
HTMLPurifier_ConfigSchema::define(
'HTML', 'ExtractStyleBlocks', false, 'bool', '
This directive enables extraction of <code>style</code> tags contents so
that they can be incorporated in the <code>head</code> of the document,
after sufficient validation.
This directive has been available since 3.0.0.
');
/** /**
* Forgivingly lexes HTML (SGML-style) markup into tokens. * Forgivingly lexes HTML (SGML-style) markup into tokens.
* *
@ -338,6 +346,34 @@ class HTMLPurifier_Lexer
// represent non-SGML characters (horror, horror!) // represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html); $html = HTMLPurifier_Encoder::cleanUTF8($html);
if ($config->get('HTML', 'ExtractStyleBlocks')) {
// extract <style> CSS blocks
$html = $this->extractStyleBlocks($html, $config, $context);
}
return $html;
}
private $_styleMatches = array();
/**
* Save the contents of CSS blocks to style matches
*/
protected function styleCallback($matches) {
$this->_styleMatches[] = $matches[1];
}
/**
* Removes inline <style> tags from HTML, saves them for later use
* @todo Extend to indicate non-text/css style blocks
*/
public function extractStyleBlocks($html, $config, $context) {
$html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
$style_blocks = $this->_styleMatches;
$this->_styleMatches = array(); // reset
// this is a persistent context, so we have to overwrite it with every call
if ($context->exists('StyleBlocks')) $context->destroy('StyleBlocks');
$context->register('StyleBlocks', $style_blocks);
return $html; return $html;
} }

View File

@ -186,6 +186,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$ret .= '<html><head>'; $ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
// No protection if $html contains a stray </div>!
$ret .= '</head><body><div>'.$html.'</div></body></html>'; $ret .= '</head><body><div>'.$html.'</div></body></html>';
return $ret; return $ret;
} }

View File

@ -31,6 +31,48 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
$this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex'); $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
} }
// HTMLPurifier_Lexer->extractStyleBlocks() --------------------------------
function assertExtractStyleBlocks($html, $expect = true, $styles = array()) {
$lexer = HTMLPurifier_Lexer::create($this->config);
if ($expect === true) $expect = $html;
$result = $lexer->extractStyleBlocks($html, $this->config, $this->context);
$this->assertIdentical($result, $expect);
$this->assertIdentical($this->context->get('StyleBlocks'), $styles);
}
function test_extractStyleBlocks_preserve() {
$this->assertExtractStyleBlocks('Foobar');
}
function test_extractStyleBlocks_allStyle() {
$this->assertExtractStyleBlocks('<style>foo</style>', '', array('foo'));
}
function test_extractStyleBlocks_multipleBlocks() {
$this->assertExtractStyleBlocks(
"<style>1</style><style>2</style>NOP<style>4</style>",
"NOP",
array('1', '2', '4')
);
}
function test_extractStyleBlocks_blockWithAttributes() {
$this->assertExtractStyleBlocks(
'<style type="text/css">css</style>',
'',
array('css')
);
}
function test_extractStyleBlocks_styleWithPadding() {
$this->assertExtractStyleBlocks(
"Alas<styled>Awesome</styled>\n<style>foo</style> Trendy!",
"Alas<styled>Awesome</styled>\n Trendy!",
array('foo')
);
}
// HTMLPurifier_Lexer->parseData() ----------------------------------------- // HTMLPurifier_Lexer->parseData() -----------------------------------------
function assertParseData($input, $expect = true) { function assertParseData($input, $expect = true) {
@ -511,6 +553,17 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
); );
} }
function test_tokenizeHTML_extractStyleBlocks() {
$this->config->set('HTML', 'ExtractStyleBlocks', true);
$this->assertTokenization(
'<style type="text/css">.foo {text-align:center;}</style>Test',
array(
new HTMLPurifier_Token_Text('Test')
)
);
$this->assertIdentical($this->context->get('StyleBlocks'), array('.foo {text-align:center;}'));
}
/* /*
function test_tokenizeHTML_() { function test_tokenizeHTML_() {