From 0eadf98ee20a58f2dbfa2338583f14a639e049c8 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 10 Feb 2008 22:38:53 +0000 Subject: [PATCH] Split out tokens to prevent autoload barfing. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1543 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Token.php | 124 ------------------------- library/HTMLPurifier/Token/Comment.php | 19 ++++ library/HTMLPurifier/Token/Empty.php | 6 ++ library/HTMLPurifier/Token/End.php | 10 ++ library/HTMLPurifier/Token/Start.php | 6 ++ library/HTMLPurifier/Token/Tag.php | 53 +++++++++++ library/HTMLPurifier/Token/Text.php | 30 ++++++ 7 files changed, 124 insertions(+), 124 deletions(-) create mode 100644 library/HTMLPurifier/Token/Comment.php create mode 100644 library/HTMLPurifier/Token/Empty.php create mode 100644 library/HTMLPurifier/Token/End.php create mode 100644 library/HTMLPurifier/Token/Start.php create mode 100644 library/HTMLPurifier/Token/Tag.php create mode 100644 library/HTMLPurifier/Token/Text.php diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php index 9d7b0c00..788b06cb 100644 --- a/library/HTMLPurifier/Token.php +++ b/library/HTMLPurifier/Token.php @@ -1,11 +1,5 @@ !empty($obj->is_tag) - * without having to use a function call is_a(). - */ - public $is_tag = true; - - /** - * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. - * - * @note Strictly speaking, XML tags are case sensitive, so we shouldn't - * be lower-casing them, but these tokens cater to HTML tags, which are - * insensitive. - */ - public $name; - - /** - * Associative array of the tag's attributes. - */ - public $attr = array(); - - /** - * Non-overloaded constructor, which lower-cases passed tag name. - * - * @param $name String name. - * @param $attr Associative array of attributes. - */ - public function __construct($name, $attr = array(), $line = null) { - $this->name = ctype_lower($name) ? $name : strtolower($name); - foreach ($attr as $key => $value) { - // normalization only necessary when key is not lowercase - if (!ctype_lower($key)) { - $new_key = strtolower($key); - if (!isset($attr[$new_key])) { - $attr[$new_key] = $attr[$key]; - } - if ($new_key !== $key) { - unset($attr[$key]); - } - } - } - $this->attr = $attr; - $this->line = $line; - } -} - -/** - * Concrete start token class. - */ -class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag {} - -/** - * Concrete empty token class. - */ -class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag {} - -/** - * Concrete end token class. - * - * @warning This class accepts attributes even though end tags cannot. This - * is for optimization reasons, as under normal circumstances, the Lexers - * do not pass attributes. - */ -class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag {} - -/** - * Concrete text token class. - * - * Text tokens comprise of regular parsed character data (PCDATA) and raw - * character data (from the CDATA sections). Internally, their - * data is parsed with all entities expanded. Surprisingly, the text token - * does have a "tag name" called #PCDATA, which is how the DTD represents it - * in permissible child nodes. - */ -class HTMLPurifier_Token_Text extends HTMLPurifier_Token -{ - - public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */ - public $data; /**< Parsed character data of text. */ - public $is_whitespace; /**< Bool indicating if node is whitespace. */ - - /** - * Constructor, accepts data and determines if it is whitespace. - * - * @param $data String parsed character data. - */ - public function __construct($data, $line = null) { - $this->data = $data; - $this->is_whitespace = ctype_space($data); - $this->line = $line; - } - -} - -/** - * Concrete comment token class. Generally will be ignored. - */ -class HTMLPurifier_Token_Comment extends HTMLPurifier_Token -{ - public $data; /**< Character data within comment. */ - /** - * Transparent constructor. - * - * @param $data String comment data. - */ - public function __construct($data, $line = null) { - $this->data = $data; - $this->line = $line; - } -} - diff --git a/library/HTMLPurifier/Token/Comment.php b/library/HTMLPurifier/Token/Comment.php new file mode 100644 index 00000000..1571a40d --- /dev/null +++ b/library/HTMLPurifier/Token/Comment.php @@ -0,0 +1,19 @@ +data = $data; + $this->line = $line; + } +} + diff --git a/library/HTMLPurifier/Token/Empty.php b/library/HTMLPurifier/Token/Empty.php new file mode 100644 index 00000000..772015da --- /dev/null +++ b/library/HTMLPurifier/Token/Empty.php @@ -0,0 +1,6 @@ +!empty($obj->is_tag) + * without having to use a function call is_a(). + */ + public $is_tag = true; + + /** + * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. + * + * @note Strictly speaking, XML tags are case sensitive, so we shouldn't + * be lower-casing them, but these tokens cater to HTML tags, which are + * insensitive. + */ + public $name; + + /** + * Associative array of the tag's attributes. + */ + public $attr = array(); + + /** + * Non-overloaded constructor, which lower-cases passed tag name. + * + * @param $name String name. + * @param $attr Associative array of attributes. + */ + public function __construct($name, $attr = array(), $line = null) { + $this->name = ctype_lower($name) ? $name : strtolower($name); + foreach ($attr as $key => $value) { + // normalization only necessary when key is not lowercase + if (!ctype_lower($key)) { + $new_key = strtolower($key); + if (!isset($attr[$new_key])) { + $attr[$new_key] = $attr[$key]; + } + if ($new_key !== $key) { + unset($attr[$key]); + } + } + } + $this->attr = $attr; + $this->line = $line; + } +} diff --git a/library/HTMLPurifier/Token/Text.php b/library/HTMLPurifier/Token/Text.php new file mode 100644 index 00000000..3942f8a0 --- /dev/null +++ b/library/HTMLPurifier/Token/Text.php @@ -0,0 +1,30 @@ +data = $data; + $this->is_whitespace = ctype_space($data); + $this->line = $line; + } + +}