From dc242158749de79e4f155d03874400ed6be60178 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 23 Jul 2006 13:20:15 +0000 Subject: [PATCH] Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Token.php | 99 +++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 13 deletions(-) diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php index 6275ea5a..eace6510 100644 --- a/library/HTMLPurifier/Token.php +++ b/library/HTMLPurifier/Token.php @@ -1,57 +1,130 @@ is_a(). @public */ +} +/** + * Abstract class of a tag token (start, end or empty), and its behavior. + */ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract { + /** + * Static bool marker that indicates the class is a tag. + * + * This allows us to check objects with !empty($obj->is_tag) + * without having to use a function call is_a(). + * + * @public + */ var $is_tag = true; + + /** + * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. + * + * @note Strictly speaking, XML tags are case sensitive, so we shouldn't + * be lower-casing them, but these tokens cater to HTML tags, which are + * insensitive. + * + * @public + */ var $name; + + /** + * Associative array of the tag's attributes. + */ var $attributes = array(); + + /** + * Non-overloaded constructor, which lower-cases passed tag name. + * + * @param $name String name. + * @param $attributes Associative array of attributes. + */ function HTMLPurifier_Token_Tag($name, $attributes = array()) { $this->name = ctype_lower($name) ? $name : strtolower($name); $this->attributes = $attributes; } } -// start CONCRETE ones - +/** + * Concrete start token class. + */ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag { var $type = 'start'; } +/** + * Concrete empty token class. + */ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag { var $type = 'empty'; } -// accepts attributes even though it really can't, for optimization reasons +/** + * Concrete end token class. + * + * @warning This class accepts attributes even though end tags cannot. This + * is for optimization reasons, as under normal circumstances, the Lexers + * do not pass attributes. + */ class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag { var $type = 'end'; } +/** + * Concrete text token class. + * + * Text tokens comprise of regular parsed character data (PCDATA) and raw + * character data (from the CDATA sections). Internally, their + * data is parsed with all entities expanded. Surprisingly, the text token + * does have a "tag name" called #PCDATA, which is how the DTD represents it + * in permissible child nodes. + */ class HTMLPurifier_Token_Text extends HTMLPurifier_Token { - var $name = '#PCDATA'; + + var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */ var $type = 'text'; - var $data; - var $is_whitespace = false; + var $data; /**< Parsed character data of text. @public */ + var $is_whitespace; /**< Bool indicating if node is whitespace. @public */ + + /** + * Constructor, accepts data and determines if it is whitespace. + * + * @param $data String parsed character data. + */ function HTMLPurifier_Token_Text($data) { $this->data = $data; - if (ctype_space($data)) $this->is_whitespace = true; - } - function append($text) { - return new HTMLPurifier_Token_Text($this->data . $text->data); + $this->is_whitespace = ctype_space($data); } + } +/** + * Concrete comment token class. Generally will be ignored. + */ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token { - var $data; + var $data; /**< Character data within comment. @public */ var $type = 'comment'; + /** + * Transparent constructor. + * + * @param $data String comment data. + */ function HTMLPurifier_Token_Comment($data) { $this->data = $data; }