0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 08:51:53 +00:00
htmlpurifier/library/HTMLPurifier/Token.php

133 lines
3.3 KiB
PHP
Raw Normal View History

<?php
/**
* Defines a set of immutable value object tokens for HTML representation.
*
* @file
*/
/**
* Abstract base token class that all others inherit from.
*/
class HTMLPurifier_Token {
var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
}
/**
* Abstract class of a tag token (start, end or empty), and its behavior.
*/
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
{
/**
* Static bool marker that indicates the class is a tag.
*
* This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
* without having to use a function call <tt>is_a()</tt>.
*
* @public
*/
var $is_tag = true;
/**
* The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
*
* @note Strictly speaking, XML tags are case sensitive, so we shouldn't
* be lower-casing them, but these tokens cater to HTML tags, which are
* insensitive.
*
* @public
*/
var $name;
/**
* Associative array of the tag's attributes.
*/
var $attributes = array();
/**
* Non-overloaded constructor, which lower-cases passed tag name.
*
* @param $name String name.
* @param $attributes Associative array of attributes.
*/
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
$this->attributes = $attributes;
}
}
/**
* Concrete start token class.
*/
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
}
/**
* Concrete empty token class.
*/
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
}
/**
* Concrete end token class.
*
* @warning This class accepts attributes even though end tags cannot. This
* is for optimization reasons, as under normal circumstances, the Lexers
* do not pass attributes.
*/
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
var $type = 'end';
}
/**
* Concrete text token class.
*
* Text tokens comprise of regular parsed character data (PCDATA) and raw
* character data (from the CDATA sections). Internally, their
* data is parsed with all entities expanded. Surprisingly, the text token
* does have a "tag name" called #PCDATA, which is how the DTD represents it
* in permissible child nodes.
*/
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
{
var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
var $type = 'text';
var $data; /**< Parsed character data of text. @public */
var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
/**
* Constructor, accepts data and determines if it is whitespace.
*
* @param $data String parsed character data.
*/
function HTMLPurifier_Token_Text($data) {
$this->data = $data;
$this->is_whitespace = ctype_space($data);
}
}
/**
* Concrete comment token class. Generally will be ignored.
*/
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
{
var $data; /**< Character data within comment. @public */
var $type = 'comment';
/**
* Transparent constructor.
*
* @param $data String comment data.
*/
function HTMLPurifier_Token_Comment($data) {
$this->data = $data;
}
}
?>