mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-01-10 16:01:53 +00:00
Split out tokens to prevent autoload barfing.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1543 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
parent
6eb193a316
commit
0eadf98ee2
@ -1,11 +1,5 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
|
||||||
* Defines a set of immutable value object tokens for HTML representation.
|
|
||||||
*
|
|
||||||
* @file
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base token class that all others inherit from.
|
* Abstract base token class that all others inherit from.
|
||||||
*/
|
*/
|
||||||
@ -42,121 +36,3 @@ class HTMLPurifier_Token {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Abstract class of a tag token (start, end or empty), and its behavior.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Static bool marker that indicates the class is a tag.
|
|
||||||
*
|
|
||||||
* This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
|
|
||||||
* without having to use a function call <tt>is_a()</tt>.
|
|
||||||
*/
|
|
||||||
public $is_tag = true;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
|
|
||||||
*
|
|
||||||
* @note Strictly speaking, XML tags are case sensitive, so we shouldn't
|
|
||||||
* be lower-casing them, but these tokens cater to HTML tags, which are
|
|
||||||
* insensitive.
|
|
||||||
*/
|
|
||||||
public $name;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Associative array of the tag's attributes.
|
|
||||||
*/
|
|
||||||
public $attr = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Non-overloaded constructor, which lower-cases passed tag name.
|
|
||||||
*
|
|
||||||
* @param $name String name.
|
|
||||||
* @param $attr Associative array of attributes.
|
|
||||||
*/
|
|
||||||
public function __construct($name, $attr = array(), $line = null) {
|
|
||||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
|
||||||
foreach ($attr as $key => $value) {
|
|
||||||
// normalization only necessary when key is not lowercase
|
|
||||||
if (!ctype_lower($key)) {
|
|
||||||
$new_key = strtolower($key);
|
|
||||||
if (!isset($attr[$new_key])) {
|
|
||||||
$attr[$new_key] = $attr[$key];
|
|
||||||
}
|
|
||||||
if ($new_key !== $key) {
|
|
||||||
unset($attr[$key]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->attr = $attr;
|
|
||||||
$this->line = $line;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Concrete start token class.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Concrete empty token class.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Concrete end token class.
|
|
||||||
*
|
|
||||||
* @warning This class accepts attributes even though end tags cannot. This
|
|
||||||
* is for optimization reasons, as under normal circumstances, the Lexers
|
|
||||||
* do not pass attributes.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Concrete text token class.
|
|
||||||
*
|
|
||||||
* Text tokens comprise of regular parsed character data (PCDATA) and raw
|
|
||||||
* character data (from the CDATA sections). Internally, their
|
|
||||||
* data is parsed with all entities expanded. Surprisingly, the text token
|
|
||||||
* does have a "tag name" called #PCDATA, which is how the DTD represents it
|
|
||||||
* in permissible child nodes.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
|
|
||||||
{
|
|
||||||
|
|
||||||
public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
|
|
||||||
public $data; /**< Parsed character data of text. */
|
|
||||||
public $is_whitespace; /**< Bool indicating if node is whitespace. */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor, accepts data and determines if it is whitespace.
|
|
||||||
*
|
|
||||||
* @param $data String parsed character data.
|
|
||||||
*/
|
|
||||||
public function __construct($data, $line = null) {
|
|
||||||
$this->data = $data;
|
|
||||||
$this->is_whitespace = ctype_space($data);
|
|
||||||
$this->line = $line;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Concrete comment token class. Generally will be ignored.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
|
|
||||||
{
|
|
||||||
public $data; /**< Character data within comment. */
|
|
||||||
/**
|
|
||||||
* Transparent constructor.
|
|
||||||
*
|
|
||||||
* @param $data String comment data.
|
|
||||||
*/
|
|
||||||
public function __construct($data, $line = null) {
|
|
||||||
$this->data = $data;
|
|
||||||
$this->line = $line;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
19
library/HTMLPurifier/Token/Comment.php
Normal file
19
library/HTMLPurifier/Token/Comment.php
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete comment token class. Generally will be ignored.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
|
||||||
|
{
|
||||||
|
public $data; /**< Character data within comment. */
|
||||||
|
/**
|
||||||
|
* Transparent constructor.
|
||||||
|
*
|
||||||
|
* @param $data String comment data.
|
||||||
|
*/
|
||||||
|
public function __construct($data, $line = null) {
|
||||||
|
$this->data = $data;
|
||||||
|
$this->line = $line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
6
library/HTMLPurifier/Token/Empty.php
Normal file
6
library/HTMLPurifier/Token/Empty.php
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete empty token class.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag {}
|
10
library/HTMLPurifier/Token/End.php
Normal file
10
library/HTMLPurifier/Token/End.php
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete end token class.
|
||||||
|
*
|
||||||
|
* @warning This class accepts attributes even though end tags cannot. This
|
||||||
|
* is for optimization reasons, as under normal circumstances, the Lexers
|
||||||
|
* do not pass attributes.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag {}
|
6
library/HTMLPurifier/Token/Start.php
Normal file
6
library/HTMLPurifier/Token/Start.php
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete start token class.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag {}
|
53
library/HTMLPurifier/Token/Tag.php
Normal file
53
library/HTMLPurifier/Token/Tag.php
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract class of a tag token (start, end or empty), and its behavior.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Static bool marker that indicates the class is a tag.
|
||||||
|
*
|
||||||
|
* This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
|
||||||
|
* without having to use a function call <tt>is_a()</tt>.
|
||||||
|
*/
|
||||||
|
public $is_tag = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
|
||||||
|
*
|
||||||
|
* @note Strictly speaking, XML tags are case sensitive, so we shouldn't
|
||||||
|
* be lower-casing them, but these tokens cater to HTML tags, which are
|
||||||
|
* insensitive.
|
||||||
|
*/
|
||||||
|
public $name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associative array of the tag's attributes.
|
||||||
|
*/
|
||||||
|
public $attr = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Non-overloaded constructor, which lower-cases passed tag name.
|
||||||
|
*
|
||||||
|
* @param $name String name.
|
||||||
|
* @param $attr Associative array of attributes.
|
||||||
|
*/
|
||||||
|
public function __construct($name, $attr = array(), $line = null) {
|
||||||
|
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||||
|
foreach ($attr as $key => $value) {
|
||||||
|
// normalization only necessary when key is not lowercase
|
||||||
|
if (!ctype_lower($key)) {
|
||||||
|
$new_key = strtolower($key);
|
||||||
|
if (!isset($attr[$new_key])) {
|
||||||
|
$attr[$new_key] = $attr[$key];
|
||||||
|
}
|
||||||
|
if ($new_key !== $key) {
|
||||||
|
unset($attr[$key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->attr = $attr;
|
||||||
|
$this->line = $line;
|
||||||
|
}
|
||||||
|
}
|
30
library/HTMLPurifier/Token/Text.php
Normal file
30
library/HTMLPurifier/Token/Text.php
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete text token class.
|
||||||
|
*
|
||||||
|
* Text tokens comprise of regular parsed character data (PCDATA) and raw
|
||||||
|
* character data (from the CDATA sections). Internally, their
|
||||||
|
* data is parsed with all entities expanded. Surprisingly, the text token
|
||||||
|
* does have a "tag name" called #PCDATA, which is how the DTD represents it
|
||||||
|
* in permissible child nodes.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
|
||||||
|
{
|
||||||
|
|
||||||
|
public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
|
||||||
|
public $data; /**< Parsed character data of text. */
|
||||||
|
public $is_whitespace; /**< Bool indicating if node is whitespace. */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor, accepts data and determines if it is whitespace.
|
||||||
|
*
|
||||||
|
* @param $data String parsed character data.
|
||||||
|
*/
|
||||||
|
public function __construct($data, $line = null) {
|
||||||
|
$this->data = $data;
|
||||||
|
$this->is_whitespace = ctype_space($data);
|
||||||
|
$this->line = $line;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user