mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-18 11:41:52 +00:00
Outfit a bunch of other classes so they can accept a configuration object. Put in basic scaffolding for extractBody() functionality.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@257 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
24c64dbbac
commit
d7140f2e05
@ -9,3 +9,10 @@ are nevertheless error checking and a centralized configuration object.
|
||||
Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps. Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependent on context, most of the internal classes
|
||||
require a configuration object to be passed as a parameter. However, a few
|
||||
make this optional: they will supply a default configuration object if none
|
||||
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||
is defined, that object should be used.
|
||||
|
@ -59,8 +59,10 @@ class HTMLPurifier
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
return $generator->generateFromTokens(
|
||||
$strategy->execute(
|
||||
$lexer->tokenizeHTML($html), $config
|
||||
)
|
||||
$lexer->tokenizeHTML($html, $config),
|
||||
$config
|
||||
),
|
||||
$config
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token)
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
@ -148,7 +148,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token )
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
|
@ -5,26 +5,28 @@
|
||||
class HTMLPurifier_Generator
|
||||
{
|
||||
|
||||
function generateFromTokens($tokens) {
|
||||
// only unit tests may omit configuration: internals MUST pass config
|
||||
function generateFromTokens($tokens, $config = null) {
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
if (!$tokens) return '';
|
||||
foreach ($tokens as $token) {
|
||||
$html .= $this->generateFromToken($token);
|
||||
$html .= $this->generateFromToken($token, $config);
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
function generateFromToken($token) {
|
||||
function generateFromToken($token, $config) {
|
||||
if (!isset($token->type)) return '';
|
||||
if ($token->type == 'start') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attributes, $config);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||
|
||||
} elseif ($token->type == 'end') {
|
||||
return '</' . $token->name . '>';
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attributes, $config);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||
|
||||
} elseif ($token->type == 'text') {
|
||||
@ -36,7 +38,7 @@ class HTMLPurifier_Generator
|
||||
}
|
||||
}
|
||||
|
||||
function generateAttributes($assoc_array_of_attributes) {
|
||||
function generateAttributes($assoc_array_of_attributes, $config) {
|
||||
$html = '';
|
||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||
$html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';
|
||||
|
@ -2,6 +2,14 @@
|
||||
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
'Core', 'AcceptFullDocuments', true,
|
||||
'This parameter determines whether or not the filter should accept full '.
|
||||
'HTML documents, not just HTML fragments. When on, it will '.
|
||||
'drop all sections except the content between body. Depending on '.
|
||||
'the implementation in use, this may speed up document parse times.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
@ -52,7 +60,7 @@ class HTMLPurifier_Lexer
|
||||
* @param $string String HTML.
|
||||
* @return HTMLPurifier_Token array representation of HTML.
|
||||
*/
|
||||
function tokenizeHTML($string) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||
}
|
||||
|
||||
@ -228,6 +236,14 @@ class HTMLPurifier_Lexer
|
||||
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a string of HTML (fragment or document) and returns the content
|
||||
*/
|
||||
function extractBody($html) {
|
||||
if (strpos($html, '<html') === false) return $html; // already fragment
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@ -25,7 +25,9 @@ require_once 'HTMLPurifier/Lexer.php';
|
||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
public function tokenizeHTML($string) {
|
||||
public function tokenizeHTML($string, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->encoding = 'UTF-8'; // technically does nothing, but comprehensive
|
||||
|
||||
|
@ -106,7 +106,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
}
|
||||
|
||||
function tokenizeHTML($string) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// some quick checking (if empty, return empty)
|
||||
$string = @ (string) $string;
|
||||
|
@ -29,7 +29,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||
*/
|
||||
var $tokens = array();
|
||||
|
||||
function tokenizeHTML($html) {
|
||||
function tokenizeHTML($html, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$html = $this->escapeCDATA($html);
|
||||
$html = $this->substituteNonSpecialEntities($html);
|
||||
$parser=& new XML_HTMLSax3();
|
||||
|
@ -87,7 +87,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// make sure that we have something open
|
||||
if (empty($current_nesting)) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@ -122,7 +122,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// we still didn't find the tag, so translate to text
|
||||
if ($skipped_tags === false) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
} else {
|
||||
// invalid tag, generate HTML and insert in
|
||||
$token = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
} elseif ($token->type == 'comment') {
|
||||
|
@ -19,7 +19,6 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
|
||||
function assertSeries($inputs, $expect, $config, $context = array()) {
|
||||
foreach ($inputs as $i => $input) {
|
||||
$tokens = $this->lex->tokenizeHTML($input);
|
||||
|
||||
if (!isset($context[$i])) {
|
||||
$context[$i] = null;
|
||||
@ -28,12 +27,13 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
$config[$i] = HTMLPurifier_Config::createDefault();
|
||||
}
|
||||
|
||||
$tokens = $this->lex->tokenizeHTML($input, $config[$i]);
|
||||
$result = $this->def->validateChildren($tokens, $config[$i], $context[$i]);
|
||||
|
||||
if (is_bool($expect[$i])) {
|
||||
$this->assertIdentical($expect[$i], $result, "Test $i: %s");
|
||||
} else {
|
||||
$result_html = $this->gen->generateFromTokens($result);
|
||||
$result_html = $this->gen->generateFromTokens($result, $config[$i]);
|
||||
$this->assertIdentical($expect[$i], $result_html, "Test $i: %s");
|
||||
paintIf($result_html, $result_html != $expect[$i]);
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>');
|
||||
$expect[0] = 'Foobar.<>';
|
||||
@ -51,8 +52,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||
$expect[7] = $theta_char;
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->gen->generateFromToken($input);
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@ -63,6 +66,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = '';
|
||||
@ -81,8 +85,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
$inputs[4] = array('title' => 'Theta is ' . $theta_char);
|
||||
$expect[4] = 'title="Theta is ' . $theta_char . '"';
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->gen->generateAttributes($input);
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateAttributes($input, $config[$i]);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@ -93,6 +99,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$inputs[0] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
@ -104,8 +111,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
$inputs[1] = array();
|
||||
$expect[1] = '';
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->gen->generateFromTokens($input);
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateFromTokens($input, $config[$i]);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
|
@ -40,11 +40,16 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
$this->Lexer->substituteNonSpecialEntities('"') );
|
||||
}
|
||||
|
||||
function test_extractBody() {
|
||||
|
||||
}
|
||||
|
||||
function test_tokenizeHTML() {
|
||||
|
||||
$input = array();
|
||||
$expect = array();
|
||||
$sax_expect = array();
|
||||
$config = array();
|
||||
|
||||
$input[0] = '';
|
||||
$expect[0] = array();
|
||||
@ -221,14 +226,17 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
$input[17] = $char_hearts;
|
||||
$expect[17] = array( new HTMLPurifier_Token_Text($char_hearts) );
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach($input as $i => $discard) {
|
||||
$result = $this->DirectLex->tokenizeHTML($input[$i]);
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
|
||||
$result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i]);
|
||||
$this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s');
|
||||
paintIf($result, $expect[$i] != $result);
|
||||
|
||||
if ($this->_has_pear) {
|
||||
// assert unless I say otherwise
|
||||
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i]);
|
||||
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i]);
|
||||
if (!isset($sax_expect[$i])) {
|
||||
// by default, assert with normal result
|
||||
$this->assertEqual($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s');
|
||||
@ -244,7 +252,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
}
|
||||
|
||||
if ($this->_has_dom) {
|
||||
$dom_result = $this->DOMLex->tokenizeHTML($input[$i]);
|
||||
$dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i]);
|
||||
// same structure as SAX
|
||||
if (!isset($dom_expect[$i])) {
|
||||
$this->assertEqual($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s');
|
||||
|
@ -31,7 +31,7 @@ class HTMLPurifier_StrategyHarness extends UnitTestCase
|
||||
$config[$i] = HTMLPurifier_Config::createDefault();
|
||||
}
|
||||
$result_tokens = $strategy->execute($tokens, $config[$i]);
|
||||
$result = $this->gen->generateFromTokens($result_tokens);
|
||||
$result = $this->gen->generateFromTokens($result_tokens, $config[$i]);
|
||||
$this->assertEqual($expect[$i], $result, "Test $i: %s");
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user