0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-23 00:41:52 +00:00
htmlpurifier/library/HTMLPurifier/Lexer.php

90 lines
2.9 KiB
PHP
Raw Normal View History

<?php
require_once 'HTMLPurifier/Token.php';
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
* The lexer parses a string of SGML-style markup and converts them into
* corresponding tokens. It doesn't check for well-formedness, although its
* internal mechanism may make this automatic (such as the case of
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
* from.
*
* The lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
* reasons.
*
* This class cannot be directly instantiated, but you may use create() to
* retrieve a default copy of the lexer.
*
* @note
* We use tokens rather than create a DOM representation because DOM would:
*
* @note
* -# Require more processing power to create,
* -# Require recursion to iterate,
* -# Must be compatible with PHP 5's DOM (otherwise duplication),
* -# Has the entire document structure (html and body not needed), and
* -# Has unknown readability improvement.
*
* @note
* What the last item means is that the functions for manipulating tokens are
* already fairly compact, and when well-commented, more abstraction may not
* be needed.
*
* @see HTMLPurifier_Token
*/
class HTMLPurifier_Lexer
{
/**
* Lexes an HTML string into tokens.
*
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
function tokenizeHTML($string) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* Depending on what PHP version you are running, the abstract base
* Lexer class will determine which concrete Lexer is best for you:
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
* for PHP 5 and beyond.
*
* Passing the optional prototype lexer parameter will override the
* default with your own implementation. A copy/reference of the prototype
* lexer will now be returned when you request a new lexer.
*
* @note
* Though it is possible to call this factory method from subclasses,
* such usage is not recommended.
*
* @param $prototype Optional prototype lexer.
* @return Concrete lexer.
*/
function create($prototype = null) {
// we don't really care if it's a reference or a copy
static $lexer = null;
if ($prototype) {
$lexer = $prototype;
}
if (empty($lexer)) {
if (version_compare(PHP_VERSION, '5', '>=')) {
require_once 'HTMLPurifier/Lexer/DOMLex.php';
$lexer = new HTMLPurifier_Lexer_DOMLex();
} else {
require_once 'HTMLPurifier/Lexer/DirectLex.php';
$lexer = new HTMLPurifier_Lexer_DirectLex();
}
}
return $lexer;
}
}
?>