0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-18 11:41:52 +00:00

Outfit a bunch of other classes so they can accept a configuration object. Put in basic scaffolding for extractBody() functionality.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@257 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-08-15 00:31:12 +00:00
parent 24c64dbbac
commit d7140f2e05
14 changed files with 75 additions and 26 deletions

View File

@ -9,3 +9,10 @@ are nevertheless error checking and a centralized configuration object.
Directives are divided into namespaces, indicating the major portion of
functionality they cover (although there may be overlaps. Please consult
the documentation in ConfigDef for more information on these namespaces.
Since configuration is dependent on context, most of the internal classes
require a configuration object to be passed as a parameter. However, a few
make this optional: they will supply a default configuration object if none
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
and Lexer::tokenizeHTML. However, whenever a valid configuration object
is defined, that object should be used.

View File

@ -59,8 +59,10 @@ class HTMLPurifier
$generator = new HTMLPurifier_Generator();
return $generator->generateFromTokens(
$strategy->execute(
$lexer->tokenizeHTML($html), $config
)
$lexer->tokenizeHTML($html, $config),
$config
),
$config
);
}

View File

@ -137,7 +137,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$is_deleting = true;
if ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token)
$this->gen->generateFromToken($token, $config)
);
}
continue;
@ -148,7 +148,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token )
$this->gen->generateFromToken( $token, $config )
);
} else {
// drop silently

View File

@ -5,26 +5,28 @@
class HTMLPurifier_Generator
{
function generateFromTokens($tokens) {
// only unit tests may omit configuration: internals MUST pass config
function generateFromTokens($tokens, $config = null) {
$html = '';
if (!$config) $config = HTMLPurifier_Config::createDefault();
if (!$tokens) return '';
foreach ($tokens as $token) {
$html .= $this->generateFromToken($token);
$html .= $this->generateFromToken($token, $config);
}
return $html;
}
function generateFromToken($token) {
function generateFromToken($token, $config) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attributes, $config);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attributes, $config);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
} elseif ($token->type == 'text') {
@ -36,7 +38,7 @@ class HTMLPurifier_Generator
}
}
function generateAttributes($assoc_array_of_attributes) {
function generateAttributes($assoc_array_of_attributes, $config) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
$html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';

View File

@ -2,6 +2,14 @@
require_once 'HTMLPurifier/Token.php';
HTMLPurifier_ConfigDef::define(
'Core', 'AcceptFullDocuments', true,
'This parameter determines whether or not the filter should accept full '.
'HTML documents, not just HTML fragments. When on, it will '.
'drop all sections except the content between body. Depending on '.
'the implementation in use, this may speed up document parse times.'
);
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
@ -52,7 +60,7 @@ class HTMLPurifier_Lexer
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
function tokenizeHTML($string) {
function tokenizeHTML($string, $config = null) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
@ -228,6 +236,14 @@ class HTMLPurifier_Lexer
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}
/**
* Takes a string of HTML (fragment or document) and returns the content
*/
function extractBody($html) {
if (strpos($html, '<html') === false) return $html; // already fragment
// ...
}
}
?>

View File

@ -25,7 +25,9 @@ require_once 'HTMLPurifier/Lexer.php';
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
{
public function tokenizeHTML($string) {
public function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$doc = new DOMDocument();
$doc->encoding = 'UTF-8'; // technically does nothing, but comprehensive

View File

@ -106,7 +106,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
}
function tokenizeHTML($string) {
function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
// some quick checking (if empty, return empty)
$string = @ (string) $string;

View File

@ -29,7 +29,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/
var $tokens = array();
function tokenizeHTML($html) {
function tokenizeHTML($html, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$html = $this->escapeCDATA($html);
$html = $this->substituteNonSpecialEntities($html);
$parser=& new XML_HTMLSax3();

View File

@ -87,7 +87,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// make sure that we have something open
if (empty($current_nesting)) {
$result[] = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
continue;
}
@ -122,7 +122,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// we still didn't find the tag, so translate to text
if ($skipped_tags === false) {
$result[] = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
continue;
}

View File

@ -43,7 +43,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
// invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
}
} elseif ($token->type == 'comment') {

View File

@ -19,7 +19,6 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
function assertSeries($inputs, $expect, $config, $context = array()) {
foreach ($inputs as $i => $input) {
$tokens = $this->lex->tokenizeHTML($input);
if (!isset($context[$i])) {
$context[$i] = null;
@ -28,12 +27,13 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
$config[$i] = HTMLPurifier_Config::createDefault();
}
$tokens = $this->lex->tokenizeHTML($input, $config[$i]);
$result = $this->def->validateChildren($tokens, $config[$i], $context[$i]);
if (is_bool($expect[$i])) {
$this->assertIdentical($expect[$i], $result, "Test $i: %s");
} else {
$result_html = $this->gen->generateFromTokens($result);
$result_html = $this->gen->generateFromTokens($result, $config[$i]);
$this->assertIdentical($expect[$i], $result_html, "Test $i: %s");
paintIf($result_html, $result_html != $expect[$i]);
}

View File

@ -19,6 +19,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs = array();
$expect = array();
$config = array();
$inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>');
$expect[0] = 'Foobar.&lt;&gt;';
@ -51,8 +52,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
$expect[7] = $theta_char;
$default_config = HTMLPurifier_Config::createDefault();
foreach ($inputs as $i => $input) {
$result = $this->gen->generateFromToken($input);
if (!isset($config[$i])) $config[$i] = $default_config;
$result = $this->gen->generateFromToken($input, $config[$i]);
$this->assertEqual($result, $expect[$i]);
paintIf($result, $result != $expect[$i]);
}
@ -63,6 +66,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs = array();
$expect = array();
$config = array();
$inputs[0] = array();
$expect[0] = '';
@ -81,8 +85,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs[4] = array('title' => 'Theta is ' . $theta_char);
$expect[4] = 'title="Theta is ' . $theta_char . '"';
$default_config = HTMLPurifier_Config::createDefault();
foreach ($inputs as $i => $input) {
$result = $this->gen->generateAttributes($input);
if (!isset($config[$i])) $config[$i] = $default_config;
$result = $this->gen->generateAttributes($input, $config[$i]);
$this->assertEqual($result, $expect[$i]);
paintIf($result, $result != $expect[$i]);
}
@ -93,6 +99,7 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs = array();
$expect = array();
$config = array();
$inputs[0] = array(
new HTMLPurifier_Token_Start('b'),
@ -104,8 +111,10 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs[1] = array();
$expect[1] = '';
$default_config = HTMLPurifier_Config::createDefault();
foreach ($inputs as $i => $input) {
$result = $this->gen->generateFromTokens($input);
if (!isset($config[$i])) $config[$i] = $default_config;
$result = $this->gen->generateFromTokens($input, $config[$i]);
$this->assertEqual($expect[$i], $result);
paintIf($result, $result != $expect[$i]);
}

View File

@ -40,11 +40,16 @@ class HTMLPurifier_LexerTest extends UnitTestCase
$this->Lexer->substituteNonSpecialEntities('"') );
}
function test_extractBody() {
}
function test_tokenizeHTML() {
$input = array();
$expect = array();
$sax_expect = array();
$config = array();
$input[0] = '';
$expect[0] = array();
@ -221,14 +226,17 @@ class HTMLPurifier_LexerTest extends UnitTestCase
$input[17] = $char_hearts;
$expect[17] = array( new HTMLPurifier_Token_Text($char_hearts) );
$default_config = HTMLPurifier_Config::createDefault();
foreach($input as $i => $discard) {
$result = $this->DirectLex->tokenizeHTML($input[$i]);
if (!isset($config[$i])) $config[$i] = $default_config;
$result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i]);
$this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s');
paintIf($result, $expect[$i] != $result);
if ($this->_has_pear) {
// assert unless I say otherwise
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i]);
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i]);
if (!isset($sax_expect[$i])) {
// by default, assert with normal result
$this->assertEqual($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s');
@ -244,7 +252,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
}
if ($this->_has_dom) {
$dom_result = $this->DOMLex->tokenizeHTML($input[$i]);
$dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i]);
// same structure as SAX
if (!isset($dom_expect[$i])) {
$this->assertEqual($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s');

View File

@ -31,7 +31,7 @@ class HTMLPurifier_StrategyHarness extends UnitTestCase
$config[$i] = HTMLPurifier_Config::createDefault();
}
$result_tokens = $strategy->execute($tokens, $config[$i]);
$result = $this->gen->generateFromTokens($result_tokens);
$result = $this->gen->generateFromTokens($result_tokens, $config[$i]);
$this->assertEqual($expect[$i], $result, "Test $i: %s");
paintIf($result, $result != $expect[$i]);
}