0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 16:31:53 +00:00

Begin adding Doxygen documentation.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@98 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2006-07-23 03:43:53 +00:00
parent 14f481bcf6
commit 728848c4c7
3 changed files with 339 additions and 10 deletions

236
Doxyfile Normal file
View File

@ -0,0 +1,236 @@
# Doxyfile 1.4.7
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = HTMLPurifier
PROJECT_NUMBER = trunk
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = NO
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF = "The $name class" \
"The $name widget" \
"The $name file" \
is \
provides \
specifies \
contains \
represents \
a \
an \
the
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = YES
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
BUILTIN_STL_SUPPORT = NO
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = YES
EXTRACT_STATIC = YES
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_DIRECTORIES = NO
FILE_VERSION_FILTER =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
FILE_PATTERNS = *.php
RECURSIVE = YES
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = */tests/* \
*/benchmarks/*
EXAMPLE_PATH =
EXAMPLE_PATTERNS = *
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
REFERENCES_LINK_SOURCE = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = NO
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = YES
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
DOT_PATH =
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 1000
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

View File

@ -1,22 +1,62 @@
<?php
/*!
* @mainpage
*
* HTMLPurifier is a purification class that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
* -# Lexing (parsing into tokens) the document,
* -# Removing all elements not in the whitelist,
* -# Making the tokens well-formed,
* -# Fixing the nesting of the nodes,
* -# Validating attributes of the nodes, and
* -# Generating HTML from the purified tokens.
*
* See /docs/spec.txt for more details.
*/
require_once 'HTMLPurifier/Lexer.php';
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/Generator.php';
/**
* Main library execution class.
*
* Facade that performs calls to the HTMLPurifier_Lexer,
* HTMLPurifier_Definition and HTMLPurifier_Generator subsystems in order to
* purify HTML.
*/
class HTMLPurifier
{
var $lexer;
var $definition;
var $generator;
var $lexer; /*!< @brief Instance of HTMLPurifier_Lexer concrete
implementation. */
var $definition; /*!< @brief Instance of HTMLPurifier_Definition. */
var $generator; /*!< @brief Instance of HTMLPurifier_Generator. */
/**
* Initializes the purifier.
*
* The constructor instantiates all necessary sub-objects to do the job,
* because creating some of them (esp. HTMLPurifier_Definition) can be
* expensive.
*
* @todo Accept Policy object to define configuration.
*/
function HTMLPurifier() {
$this->lexer = new HTMLPurifier_Lexer();
$this->lexer = new HTMLPurifier_Lexer::create();
$this->definition = new HTMLPurifier_Definition();
$this->generator = new HTMLPurifier_Generator();
}
/**
* Purifies HTML.
*
* @param $html String of HTML to purify
* @return Purified HTML
*/
function purify($html) {
$tokens = $this->lexer->tokenizeHTML($html);
$tokens = $this->definition->purifyTokens($tokens);

View File

@ -1,21 +1,74 @@
<?php
/**
* Forgivingly lexes HTML (not XML, since it doesn't adhere to spec exactly)
*/
require_once 'HTMLPurifier/Token.php';
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
* The lexer parses a string of SGML-style markup and converts them into
* corresponding tokens. It doesn't check for well-formedness, although it's
* internal mechanism may make this automatic (such as the case of
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
* from.
*
* The lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
* reasons.
*
* This class cannot be directly instantiated, but you may use create() to
* retrieve a default copy of the lexer.
*
* @note
* We use tokens rather than create a DOM representation because DOM would:
*
* @note
* -# Require more processing power to create,
* -# Require recursion to iterate,
* -# Must be compatible with PHP 5's DOM (otherwise duplication),
* -# Has the entire document structure (html and body not needed), and
* -# Has unknown readability improvement.
*
* @note
* What the last item means is that the functions for manipulating tokens are
* already fairly compact, and when well-commented, more abstraction may not
* be needed.
*
* @see HTMLPurifier_Token
*/
class HTMLPurifier_Lexer
{
/**
* Lexes an HTML string into tokens.
*
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
function tokenizeHTML($string) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
// we don't really care if it's a reference or a copy
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* Depending on what PHP version you are running, the abstract base
* Lexer class will determine which concrete Lexer is best for you:
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
* for PHP 5 and beyond.
*
* Passing the optional prototype lexer parameter will override the
* default with your own implementation. A copy/reference of the prototype
* lexer will now be returned when you request a new lexer.
*
* @note
* Though it is possible to call this factory method from subclasses,
* such usage is not recommended.
*
* @param $prototype Optional prototype lexer.
* @return Concrete lexer.
*/
function create($prototype = null) {
// we don't really care if it's a reference or a copy
static $lexer = null;
if ($prototype) {
$lexer = $prototype;