htmlpurifier/library/HTMLPurifier/Token.php

<?php

/**
 * Defines a set of immutable value object tokens for HTML representation.
 * 
 * @file
 */

/**
 * Abstract base token class that all others inherit from.
 */
class HTMLPurifier_Token {
    var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
}

/**
 * Abstract class of a tag token (start, end or empty), and its behavior.
 */
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
{
    /**
     * Static bool marker that indicates the class is a tag.
     * 
     * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
     * without having to use a function call <tt>is_a()</tt>.
     * 
     * @public
     */
    var $is_tag = true;
    
    /**
     * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
     * 
     * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
     * be lower-casing them, but these tokens cater to HTML tags, which are
     * insensitive.
     * 
     * @public
     */
    var $name;
    
    /**
     * Associative array of the tag's attributes.
     */
    var $attributes = array();
    
    /**
     * Non-overloaded constructor, which lower-cases passed tag name.
     * 
     * @param $name         String name.
     * @param $attributes   Associative array of attributes.
     */
    function HTMLPurifier_Token_Tag($name, $attributes = array()) {
        $this->name = ctype_lower($name) ? $name : strtolower($name);
        $this->attributes = $attributes;
    }
}

/**
 * Concrete start token class.
 */
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
    var $type = 'start';
}

/**
 * Concrete empty token class.
 */
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
    var $type = 'empty';
}

/**
 * Concrete end token class.
 * 
 * @warning This class accepts attributes even though end tags cannot. This
 * is for optimization reasons, as under normal circumstances, the Lexers
 * do not pass attributes.
 */
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
    var $type = 'end';
}

/**
 * Concrete text token class.
 * 
 * Text tokens comprise of regular parsed character data (PCDATA) and raw
 * character data (from the CDATA sections). Internally, their
 * data is parsed with all entities expanded. Surprisingly, the text token
 * does have a "tag name" called #PCDATA, which is how the DTD represents it
 * in permissible child nodes.
 */
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
{
    
    var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
    var $type = 'text';
    var $data; /**< Parsed character data of text. @public */
    var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
    
    /**
     * Constructor, accepts data and determines if it is whitespace.
     * 
     * @param $data String parsed character data.
     */
    function HTMLPurifier_Token_Text($data) {
        $this->data = $data;
        $this->is_whitespace = ctype_space($data);
    }
    
}

/**
 * Concrete comment token class. Generally will be ignored.
 */
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
{
    var $data; /**< Character data within comment. @public */
    var $type = 'comment';
    /**
     * Transparent constructor.
     * 
     * @param $data String comment data.
     */
    function HTMLPurifier_Token_Comment($data) {
        $this->data = $data;
    }
}

?>
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`<?php`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Defines a set of immutable value object tokens for HTML representation.`
			`*`
			`* @file`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Abstract base token class that all others inherit from.`
			`*/`
			`class HTMLPurifier_Token {`
			`var $type; /*< Type of node to bypass <tt>is_a()</tt>. @public /`
			`}`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Abstract class of a tag token (start, end or empty), and its behavior.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract`
			`{`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Static bool marker that indicates the class is a tag.`
			`*`
			`* This allows us to check objects with <tt>!empty($obj->is_tag)</tt>`
			`* without having to use a function call <tt>is_a()</tt>.`
			`*`
			`* @public`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`var $is_tag = true;`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00
			`/**`
			`* The lower-case name of the tag, like 'a', 'b' or 'blockquote'.`
			`*`
			`* @note Strictly speaking, XML tags are case sensitive, so we shouldn't`
			`* be lower-casing them, but these tokens cater to HTML tags, which are`
			`* insensitive.`
			`*`
			`* @public`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`var $name;`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00
			`/**`
			`* Associative array of the tag's attributes.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`var $attributes = array();`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00
			`/**`
			`* Non-overloaded constructor, which lower-cases passed tag name.`
			`*`
			`* @param $name String name.`
			`* @param $attributes Associative array of attributes.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`function HTMLPurifier_Token_Tag($name, $attributes = array()) {`
			`$this->name = ctype_lower($name) ? $name : strtolower($name);`
			`$this->attributes = $attributes;`
			`}`
			`}`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Concrete start token class.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag`
			`{`
			`var $type = 'start';`
			`}`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Concrete empty token class.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag`
			`{`
			`var $type = 'empty';`
			`}`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Concrete end token class.`
			`*`
			`* @warning This class accepts attributes even though end tags cannot. This`
			`* is for optimization reasons, as under normal circumstances, the Lexers`
			`* do not pass attributes.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag`
			`{`
			`var $type = 'end';`
			`}`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Concrete text token class.`
			`*`
			`* Text tokens comprise of regular parsed character data (PCDATA) and raw`
			`* character data (from the CDATA sections). Internally, their`
			`* data is parsed with all entities expanded. Surprisingly, the text token`
			`* does have a "tag name" called #PCDATA, which is how the DTD represents it`
			`* in permissible child nodes.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_Text extends HTMLPurifier_Token`
			`{`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00
			`var $name = '#PCDATA'; /*< PCDATA tag name compatible with DTD. @public /`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`var $type = 'text';`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`var $data; /*< Parsed character data of text. @public /`
			`var $is_whitespace; /*< Bool indicating if node is whitespace. @public /`

			`/**`
			`* Constructor, accepts data and determines if it is whitespace.`
			`*`
			`* @param $data String parsed character data.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`function HTMLPurifier_Token_Text($data) {`
			`$this->data = $data;`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`$this->is_whitespace = ctype_space($data);`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`}`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`}`

Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Concrete comment token class. Generally will be ignored.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`class HTMLPurifier_Token_Comment extends HTMLPurifier_Token`
			`{`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`var $data; /*< Character data within comment. @public /`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`var $type = 'comment';`
Token.php: Document the code. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@99 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 13:20:15 +00:00			`/**`
			`* Transparent constructor.`
			`*`
			`* @param $data String comment data.`
			`*/`
svn:eol-style = native git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@97 48356398-32a2-884e-a903-53898d9a118a 2006-07-23 00:11:03 +00:00			`function HTMLPurifier_Token_Comment($data) {`
			`$this->data = $data;`
			`}`
			`}`

Rename MarkupFragment.php to Token.php, change internal class names and rewire the classes. We also started adding more dependence on the Lexer and Generator in unrelated tests. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@63 48356398-32a2-884e-a903-53898d9a118a 2006-07-21 11:27:54 +00:00			`?>`