diff --git a/Doxyfile b/Doxyfile
index 044217be..5bd7625b 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -1,19 +1,93 @@
-# Doxyfile 1.4.7
+# Doxyfile 1.5.3
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
-PROJECT_NAME = HTML Purifier
-PROJECT_NUMBER = 2.1.2
-OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
+
+# This tag specifies the encoding used for all characters in the config file that
+# follow. The default is UTF-8 which is also the encoding used for all text before
+# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into
+# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of
+# possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = HTMLPurifier
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = 2.1.3
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = "docs/doxygen "
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian,
+# Italian, Japanese, Japanese-en (Japanese with English messages), Korean,
+# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian,
+# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
+
OUTPUT_LANGUAGE = English
-USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
REPEAT_BRIEF = YES
-ABBREVIATE_BRIEF = "The $name class" \
- "The $name widget" \
- "The $name file" \
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF = "The $name class " \
+ "The $name widget " \
+ "The $name file " \
is \
provides \
specifies \
@@ -22,71 +96,440 @@ ABBREVIATE_BRIEF = "The $name class" \
a \
an \
the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
FULL_PATH_NAMES = YES
-STRIP_FROM_PATH = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH = "C:/Users/Edward/Webs/htmlpurifier " \
+ "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier "
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
JAVADOC_AUTOBRIEF = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
TAB_SIZE = 4
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
OPTIMIZE_OUTPUT_JAVA = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to
+# include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
SUBGROUPING = YES
+
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
EXTRACT_PRIVATE = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be extracted
+# and appear in the documentation as a namespace called 'anonymous_namespace{file}',
+# where file will be replaced with the base name of the file that contains the anonymous
+# namespace. By default anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
SHOW_DIRECTORIES = NO
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from the
+# version control system). Doxygen will invoke the program by executing (via
+# popen()) the command , where is the value of
+# the FILE_VERSION_FILTER tag, and is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
FILE_VERSION_FILTER =
+
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
WARN_NO_PARAMDOC = NO
-WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text "
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
WARN_LOGFILE =
+
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
-INPUT = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = ". "
+
+# This tag can be used to specify the character encoding of the source files that
+# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default
+# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding.
+# See http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py
+
FILE_PATTERNS = *.php
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
EXCLUDE_PATTERNS = */tests/* \
*/benchmarks/* \
*/docs/* \
@@ -94,149 +537,778 @@ EXCLUDE_PATTERNS = */tests/* \
*/configdoc/* \
*/test-settings.php \
*/maintenance/* \
- */smoketests/*
+ */smoketests/* \
+ */library/standalone/* \
+ */.svn*
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the output.
+# The symbol name can be a fully qualified name, a word, or if the wildcard * is used,
+# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
EXAMPLE_PATTERNS = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command , where
+# is the value of the INPUT_FILTER tag, and is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
FILTER_SOURCE_FILES = NO
+
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH
+# then you must also enable this option. If you don't then doxygen will produce
+# a warning and turn it on anyway
+
SOURCE_BROWSER = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
REFERENCES_RELATION = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code. Otherwise they will link to the documentstion.
+
REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
VERBATIM_HEADERS = YES
+
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
IGNORE_PREFIX =
+
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
GENERATE_HTMLHELP = NO
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
GENERATE_TREEVIEW = YES
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
TREEVIEW_WIDTH = 250
+
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
LATEX_HIDE_INDICES = NO
+
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
RTF_EXTENSIONS_FILE =
+
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
MAN_LINKS = NO
+
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
XML_PROGRAMLISTING = YES
+
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
GENERATE_AUTOGEN_DEF = NO
+
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
PERLMOD_MAKEVAR_PREFIX =
+
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
SKIP_FUNCTION_MACROS = YES
+
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
PERL_PATH = /usr/bin/perl
+
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to
+# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to
+# specify the directory where the mscgen tool resides. If left empty the tool is assumed to
+# be found in the default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
HAVE_DOT = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
CALL_GRAPH = NO
+
+# If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will
+# generate a caller dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+
CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
DOTFILE_DIRS =
-MAX_DOT_GRAPH_WIDTH = 1024
-MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the number
+# of direct children of the root node in a graph is already larger than
+# MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
MAX_DOT_GRAPH_DEPTH = 1000
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, which results in a white background.
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+
DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
DOT_CLEANUP = YES
+
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
SEARCHENGINE = NO
diff --git a/INSTALL b/INSTALL
index 1d68c0c9..317c89bb 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,64 +1,56 @@
+
Install
How to install HTML Purifier
-HTML Purifier is designed to run out of the box, so actually using the library
-is extremely easy. (Although, if you were looking for a step-by-step
-installation GUI, you've come to the wrong place!) The impatient can scroll
-down to the bottom of this INSTALL document to see the code, but you really
-should make sure a few things are properly done.
-
-
+HTML Purifier is designed to run out of the box, so actually using the
+library is extremely easy. (Although... if you were looking for a
+step-by-step installation GUI, you've downloaded the wrong software!)
+
+While the impatient can get going immediately with some of the sample
+code at the bottom of this library, it's well worth performing some
+basic sanity checks to get the most out of this library.
+---------------------------------------------------------------------------
1. Compatibility
-HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
-core dependencies with other libraries.
+HTML Purifier works in both PHP 4 and PHP 5, and is actively tested from
+PHP 4.3.7 and up (see tests/multitest.php for specific versions). It has
+no core dependencies with other libraries. PHP 4 support will be
+deprecated on December 31, 2007, at which time only essential security
+fixes will be issued for the PHP 4 version until August 8, 2008.
-Optional extensions are iconv (usually installed) and tidy (also common).
-If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
-not having either of these extensions.
+These optional extensions can enhance the capabilities of HTML Purifier:
+
+ * iconv : Converts text to and from non-UTF-8 encodings
+ * tidy : Used for pretty-printing HTML
+---------------------------------------------------------------------------
+2. Reconnaissance
-2. Including the library
+A big plus of HTML Purifier is its inerrant support of standards, so
+your web-pages should be standards-compliant. (They should also use
+semantic markup, but that's another issue altogether, one HTML Purifier
+cannot fix without reading your mind.)
-Simply use:
-
- require_once '/path/to/library/HTMLPurifier.auto.php';
-
-...and you're good to go. Since HTML Purifier's codebase is fairly
-large, I recommend only including HTML Purifier when you need it.
-
-If you don't like your include_path to be fiddled around with, simply set
-HTML Purifier's library/ directory to the include path yourself and then:
-
- require_once 'HTMLPurifier.php';
-
-Only the contents in the library/ folder are necessary, so you can remove
-everything else when using HTML Purifier in a production environment.
-
-
-
-3. Preparing the proper output environment
-
-HTML Purifier is all about web-standards, so accordingly your webpages should
-be standards compliant. HTML Purifier can deal with these doctypes:
+HTML Purifier can process these doctypes:
* XHTML 1.0 Transitional (default)
* XHTML 1.0 Strict
* HTML 4.01 Transitional
* HTML 4.01 Strict
-* XHTML 1.1 (sans Ruby)
+* XHTML 1.1
...and these character encodings:
* UTF-8 (default)
-* Any encoding iconv supports (support is crippled for i18n though)
+* Any encoding iconv supports (with crippled internationalization support)
-The defaults are there for a reason: they are best-practice choices that
-should not be changed lightly. For those of you in the dark, you can determine
-the doctype from this code in your HTML documents:
+These defaults reflect what my choices where be if I were authoring an
+HTML document, however, what you choose depends on the nature of your
+codebase. If you don't know what doctype you are using, you can determine
+the doctype from this identifier at the top of your source code:
@@ -67,18 +59,34 @@ the doctype from this code in your HTML documents:
-For legacy codebases these declarations may be missing. If that is the case,
-STOP, and read docs/enduser-utf8.html
+If the character encoding declaration is missing, STOP NOW, and
+read 'docs/enduser-utf8.html' (web accessible at
+http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
+present, read this document anyway, as most websites specify character
+encoding incorrectly.
+---------------------------------------------------------------------------
+3. Including the library
+
+The procedure is quite simple:
+
+ require_once '/path/to/library/HTMLPurifier.auto.php';
+
+I recommend only including HTML Purifier when you need it, because that
+call represents the inclusion of a lot of PHP files which constitute
+the bulk of HTML Purifier's memory usage.
+
+If you don't like your include_path to be fiddled around with, simply set
+HTML Purifier's library/ directory to the include path yourself and then:
+
+ require_once 'HTMLPurifier.php';
+
+Only the contents in the library/ folder are necessary, so you can remove
+everything else when using HTML Purifier in a production environment.
-
-You may currently be vulnerable to XSS and other security threats, and HTML
-Purifier won't be able to fix that.
-
-
-
+---------------------------------------------------------------------------
4. Configuration
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
@@ -95,7 +103,6 @@ object and read on:
$config = HTMLPurifier_Config::createDefault();
-
4.1. Setting a different character encoding
You really shouldn't use any other encoding except UTF-8, especially if you
@@ -122,10 +129,6 @@ but please be cognizant of the issues the "solution" creates (for this
reason, I do not include the solution in this document).
-
-
-
-
4.2. Setting a different doctype
For those of you using HTML 4.01 Transitional, you can disable
@@ -135,7 +138,6 @@ XHTML output like this:
Other supported doctypes include:
-
* HTML 4.01 Strict
* HTML 4.01 Transitional
* XHTML 1.0 Strict
@@ -143,7 +145,6 @@ Other supported doctypes include:
* XHTML 1.1
-
4.3. Other settings
There are more configuration directives which can be read about
@@ -153,55 +154,24 @@ your code. Some of the more interesting ones are configurable at the
demo and are well worth looking into
for your own system.
+For example, you can fine tune allowed elements and attributes, convert
+relative URLs to absolute ones, and even autoparagraph input text! These
+are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
+%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
+translates to:
+
+ $config->set('Namespace', 'Directive', $value);
+
+E.g.
+
+ $config->set('HTML', 'Allowed', 'p,b,a[href],i');
+ $config->set('URI', 'Base', 'http://www.example.com');
+ $config->set('URI', 'MakeAbsolute', true);
+ $config->set('AutoFormat', 'AutoParagraph', true);
-5. Using the code
-
-The interface is mind-numbingly simple:
-
- $purifier = new HTMLPurifier();
- $clean_html = $purifier->purify( $dirty_html );
-
-...or, if you're using the configuration object:
-
- $purifier = new HTMLPurifier($config);
- $clean_html = $purifier->purify( $dirty_html );
-
-That's it! For more examples, check out docs/examples/ (they aren't very
-different though). Also, docs/enduser-slow.html gives advice on what to
-do if HTML Purifier is slowing down your application.
-
-
-
-6. Quick install
-
-First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
-writable by the webserver (see Section 7: Caching below for details).
-If your website is in UTF-8 and XHTML Transitional, use this code:
-
-purify($dirty_html);
-?>
-
-If your website is in a different encoding or doctype, use this code:
-
-set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
- $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
- $purifier = new HTMLPurifier($config);
-
- $clean_html = $purifier->purify($dirty_html);
-?>
-
-
-
-7. Caching
+---------------------------------------------------------------------------
+5. Caching
HTML Purifier generates some cache files (generally one or two) to speed up
its execution. For maximum performance, make sure that
@@ -236,3 +206,50 @@ hit):
Or move the cache directory somewhere else (no trailing slash):
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
+
+
+---------------------------------------------------------------------------
+6. Using the code
+
+The interface is mind-numbingly simple:
+
+ $purifier = new HTMLPurifier();
+ $clean_html = $purifier->purify( $dirty_html );
+
+...or, if you're using the configuration object:
+
+ $purifier = new HTMLPurifier($config);
+ $clean_html = $purifier->purify( $dirty_html );
+
+That's it! For more examples, check out docs/examples/ (they aren't very
+different though). Also, docs/enduser-slow.html gives advice on what to
+do if HTML Purifier is slowing down your application.
+
+
+---------------------------------------------------------------------------
+7. Quick install
+
+First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
+writable by the webserver (see Section 5: Caching above for details).
+If your website is in UTF-8 and XHTML Transitional, use this code:
+
+purify($dirty_html);
+?>
+
+If your website is in a different encoding or doctype, use this code:
+
+set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
+ $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
+ $purifier = new HTMLPurifier($config);
+
+ $clean_html = $purifier->purify($dirty_html);
+?>
+
diff --git a/NEWS b/NEWS
index 212edc94..2f97331f 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,55 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
+2.1.3, released 2007-11-05
+! tests/multitest.php allows you to test multiple versions by running
+ tests/index.php through multiple interpreters using `phpv` shell
+ script (you must provide this script!)
+- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
+ on some systems.
+- Injector algorithm further refined: off-by-one error regarding skip
+ counts for dormant injectors fixed
+- Corrective blockquote definition now enabled for HTML 4.01 Strict
+- Fatal error when tag (or any other element with required attributes)
+ has 'id' attribute fixed, thanks NykO18 for reporting
+- Fix warning emitted when a non-supported URI scheme is passed to the
+ MakeAbsolute URIFilter, thanks NykO18 (again)
+- Further refine AutoParagraph injector. Behavior inside of elements
+ allowing paragraph tags clarified: only inline content delimeted by
+ double newlines (not block elements) are paragraphed.
+- Buggy treatment of end tags of elements that have required attributes
+ fixed (does not manifest on default tag-set)
+- Spurious internal content reorganization error suppressed
+- HTMLDefinition->addElement now returns a reference to the created
+ element object, as implied by the documentation
+- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
+- Fix a theoretical class of infinite loops from DirectLex reported
+ by Nate Abele
+- Work around unnecessary DOMElement type-cast in PH5P that caused errors
+ in PHP 5.1
+- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
+ HTMLDefinition errors, this may indicate problems with error-collecting
+ facilities in PHP 5
+- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
+- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
+. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
+ to better communicate its purpose
+. Error unit tests can now specify the expectation of no errors. Future
+ iterations of the harness will be extremely strict about what errors
+ are allowed
+. Extend Injector hooks to allow for more powerful injector routines
+. HTMLDefinition->addBlankElement created, as according to the HTMLModule
+ method
+. Doxygen configuration file updated, with minor improvements
+. Test runner now checks for similarly named files in conf/ directory too.
+. Minor cosmetic change to flush-definition-cache.php: trailing newline is
+ outputted
+. Maintenance script for generating PH5P patch added, original PH5P source
+ file also added under version control
+. Full unit test runner script title made more descriptive with PHP version
+. Updated INSTALL file to state that 4.3.7 is the earliest version we
+ are actively testing
+
2.1.2, released 2007-09-03
! Implemented Object module for trusted users
! Implemented experimental HTML5 parsing mode using PH5P. To use, add
diff --git a/VERSION b/VERSION
index 8f9174b4..abae0d9a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1.2
\ No newline at end of file
+2.1.3
\ No newline at end of file
diff --git a/WHATSNEW b/WHATSNEW
index e9a40184..d3511d5d 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -1,8 +1,6 @@
-Version 2.1.2 is a mix of experimental features and stability updates.
-Among new features: an Object module for trusted users, support for the
-CSS property 'border-spacing', and HTML 5 style parsing using PH5P.
-Bug fixes ihave resolved a few obscure issues including border-collapse:seperate,
-a DirectLex parsing error, broken HTML in printDefinition.php, and problems
-with the experimental standalone distribution. Also, there were large
-amounts of behind-the-scenes refactoring and the removal of URIScheme
-inclusion reflection.
+Stability release 2.1.3 fixes a slew of minor bugs found in HTML Purifier,
+and also includes some internal code enhancements and refactorings.
+Notably, tests/multitest.php automates testing in multiple versions,
+fatal AttrDef_URI_Email error fixed, blockquote contents are more lenient
+in HTML 4.01 Strict and fatal errors involving ID tags in img tags were
+fixed.
diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index 43fe616b..d5878d30 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -22,8 +22,8 @@
*/
/*
- HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
- Copyright (C) 2006 Edward Z. Yang
+ HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
+ Copyright (C) 2006-2007 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,8 @@
// constants are slow, but we'll make one exception
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
-// almost every class has an undocumented dependency to these, so make sure
-// they get included
-require_once 'HTMLPurifier/ConfigSchema.php'; // important
+// every class has an undocumented dependency to these, must be included!
+require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This
-is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
-This directive has been available since 2.0.0.
+is a useful way to give feedback to your users. Warning:
+Currently this feature is very patchy and experimental, with lots of
+possible error messages not yet implemented. It will not cause any problems,
+but it may not help your users either. This directive has been available
+since 2.0.0.
');
/**
- * Main library execution class.
+ * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
- * Facade that performs calls to the HTMLPurifier_Lexer,
- * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
- * purify HTML.
+ * @note There are several points in which configuration can be specified
+ * for HTML Purifier. The precedence of these (from lowest to
+ * highest) is as follows:
+ * -# Instance: new HTMLPurifier($config)
+ * -# Invocation: purify($html, $config)
+ * These configurations are entirely independent of each other and
+ * are *not* merged.
*
* @todo We need an easier way to inject strategies, it'll probably end
* up getting done through config though.
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
class HTMLPurifier
{
- var $version = '2.1.2';
+ var $version = '2.1.3';
var $config;
- var $filters;
+ var $filters = array();
var $strategy, $generator;
/**
- * Final HTMLPurifier_Context of last run purification. Might be an array.
+ * Resultant HTMLPurifier_Context of last run purification. Is an array
+ * of contexts if the last called method was purifyArray().
* @public
*/
var $context;
@@ -150,6 +157,11 @@ class HTMLPurifier
$context->register('ErrorCollector', $error_collector);
}
+ // setup id_accumulator context, necessary due to the fact that
+ // AttrValidator can be called from many places
+ $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+ $context->register('IDAccumulator', $id_accumulator);
+
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@@ -198,6 +210,8 @@ class HTMLPurifier
/**
* Singleton for enforcing just one HTML Purifier in your system
+ * @param $prototype Optional prototype HTMLPurifier instance to
+ * overload singleton with.
*/
static function &getInstance($prototype = null) {
static $htmlpurifier;
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index 365748c0..0e9a5f47 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$result = $uri->validate($config, $context);
if (!$result) break;
- // chained validation
+ // chained filtering
$uri_def =& $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php
index aaec099a..ababd9ea 100644
--- a/library/HTMLPurifier/AttrDef/URI/Email.php
+++ b/library/HTMLPurifier/AttrDef/URI/Email.php
@@ -1,7 +1,6 @@
getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
+ // initialize IDAccumulator if necessary
+ $ok =& $context->get('IDAccumulator', true);
+ if (!$ok) {
+ $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+ $context->register('IDAccumulator', $id_accumulator);
+ }
+
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php
index 779a7f06..e9f14edf 100644
--- a/library/HTMLPurifier/ChildDef/Optional.php
+++ b/library/HTMLPurifier/ChildDef/Optional.php
@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
var $type = 'optional';
function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
- if ($result === false) return array();
+ if ($result === false) {
+ if (empty($tokens_of_children)) return true;
+ else return array();
+ }
return $result;
}
}
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 7e330e47..0d75b609 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
- var $version = '2.1.2';
+ var $version = '2.1.3';
/**
* Two-level associative array of configuration directives
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index fe6bd141..e13e0c62 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
/**
* Adds a custom element to your HTML definition
* @note See HTMLPurifier_HTMLModule::addElement for detailed
- * parameter descriptions.
+ * parameter and return value descriptions.
*/
- function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+ function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module =& $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
- $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+ $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+ return $element;
+ }
+
+ /**
+ * Adds a blank element to your HTML definition, for overriding
+ * existing behavior
+ * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
+ * parameter and return value descriptions.
+ */
+ function &addBlankElement($element_name) {
+ $module =& $this->getAnonymousModule();
+ $element =& $module->addBlankElement($element_name);
+ return $element;
}
/**
diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
index 386cf365..dcf306a0 100644
--- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
+++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
HTMLPurifier_HTMLModule_Tidy
{
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
{
var $name = 'Tidy_Strict';
var $defaultLevel = 'light';
+
+ function makeFixes() {
+ $r = parent::makeFixes();
+ $r['blockquote#content_model_type'] = 'strictblockquote';
+ return $r;
+ }
+
+ var $defines_child_def = true;
+ function getChildDef($def) {
+ if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
+ return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+ }
}
diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php
deleted file mode 100644
index b701491e..00000000
--- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php
+++ /dev/null
@@ -1,26 +0,0 @@
-content_model_type != 'strictblockquote') return false;
- return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
- }
-
-}
-
diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php
index 74a233ff..3fc86160 100644
--- a/library/HTMLPurifier/HTMLModuleManager.php
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
HTMLPurifier_ConfigSchema::define(
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.0 Strict', true,
array_merge($common, $xml, $non_xml),
- array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
+ array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
array(),
'-//W3C//DTD XHTML 1.0 Strict//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')),
- array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
+ array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
diff --git a/library/HTMLPurifier/IDAccumulator.php b/library/HTMLPurifier/IDAccumulator.php
index 525c9aa0..c9f58835 100644
--- a/library/HTMLPurifier/IDAccumulator.php
+++ b/library/HTMLPurifier/IDAccumulator.php
@@ -1,11 +1,15 @@
load($config->get('Attr', 'IDBlacklist'));
+ return $id_accumulator;
+ }
+
/**
* Add an ID to the lookup table.
* @param $id ID to be added.
diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php
index 59017163..3b847097 100644
--- a/library/HTMLPurifier/Injector.php
+++ b/library/HTMLPurifier/Injector.php
@@ -4,6 +4,9 @@
* Injects tokens into the document while parsing for well-formedness.
* This enables "formatter-like" functionality such as auto-paragraphing,
* smiley-ification and linkification to take place.
+ *
+ * @todo Allow injectors to request a re-run on their output. This
+ * would help if an operation is recursive.
*/
class HTMLPurifier_Injector
{
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
*/
function handleElement(&$token) {}
+ /**
+ * Notifier that is called when an end token is processed
+ * @note This differs from handlers in that the token is read-only
+ */
+ function notifyEnd($token) {}
+
+
}
diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php
index 6e0a6a3e..56a6a268 100644
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'AutoParagraph', false, 'bool', '
This directive turns on auto-paragraphing, where double newlines are
- converted in to paragraphs whenever possible. Auto-paragraphing
- applies when:
+ converted in to paragraphs whenever possible. Auto-paragraphing:
- - There are inline elements or text in the root node
- - There are inline elements or text with double newlines or
- block elements in nodes that allow paragraph tags
- - There are double newlines in paragraph tags
+ - Always applies to inline elements or text in the root node,
+ - Applies to inline elements or text with double newlines in nodes
+ that allow paragraph tags,
+ - Applies to double newlines in paragraph tags
p
tags must be allowed for this directive to take effect.
We do not use br
tags for paragraphing, as that is
semantically incorrect.
+
+ To prevent auto-paragraphing as a content-producer, refrain from using
+ double-newlines except to specify a new paragraph or in contexts where
+ it has special meaning (whitespace usually has no meaning except in
+ tags like pre
, so this should not be difficult.) To prevent
+ the paragraphing of inline text adjacent to block elements, wrap them
+ in div
tags (the behavior is slightly different outside of
+ the root node.)
+
This directive has been available since 2.0.1.
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$ok = false;
// test if up-coming tokens are either block or have
// a double newline in them
+ $nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start'){
if (!$this->_isInline($this->inputTokens[$i])) {
- $ok = true;
+ // we haven't found a double-newline, and
+ // we've hit a block element, so don't paragraph
+ $ok = false;
+ break;
}
- break;
+ $nesting++;
+ }
+ if ($this->inputTokens[$i]->type == 'end') {
+ if ($nesting <= 0) break;
+ $nesting--;
}
- if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') {
+ // found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
+ break;
}
- if (!$this->inputTokens[$i]->is_whitespace) break;
}
}
if ($ok) {
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index f52579ab..3819ad22 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
}
HTMLPurifier_ConfigSchema::define(
- 'Core', 'AcceptFullDocuments', true, 'bool',
- 'This parameter determines whether or not the filter should accept full '.
- 'HTML documents, not just HTML fragments. When on, it will '.
- 'drop all sections except the content between body.'
-);
+ 'Core', 'ConvertDocumentToFragment', true, 'bool', '
+This parameter determines whether or not the filter should convert
+input that is a full document with html and body tags to a fragment
+of just the contents of a body tag. This parameter is simply something
+HTML Purifier can do during an edge-case: for most inputs, this
+processing is not necessary.
+');
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
HTMLPurifier_ConfigSchema::define(
'Core', 'LexerImpl', null, 'mixed/null', '
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
function normalize($html, $config, &$context) {
// extract body from document if applicable
- if ($config->get('Core', 'AcceptFullDocuments')) {
+ if ($config->get('Core', 'ConvertDocumentToFragment')) {
$html = $this->extractBody($html);
}
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 6f8c8ff6..cdcf2aa1 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$segment = substr($html, $cursor, $strlen_segment);
+ if ($segment === false) {
+ // somehow, we attempted to access beyond the end of
+ // the string, defense-in-depth, reported by Nate Abele
+ break;
+ }
+
// Check if it's a comment
if (
- substr($segment, 0, 3) == '!--'
+ substr($segment, 0, 3) === '!--'
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// trailing slash. Remember, we could have a tag like
, so
// any later token processing scripts must convert improperly
// classified EmptyTags from StartTags.
- $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
+ $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
if ($is_self_closing) {
$strlen_segment--;
$segment = substr($segment, 0, $strlen_segment);
diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php
index 5720c33a..b6762379 100644
--- a/library/HTMLPurifier/Lexer/PH5P.php
+++ b/library/HTMLPurifier/Lexer/PH5P.php
@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
}
-// begin PHP5P source code here
-
/*
Copyright 2007 Jeroen van der Meer
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
}
}
- private function generateImpliedEndTags(array $exclude = array()) {
+ private function generateImpliedEndTags($exclude = array()) {
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
}
}
- private function getElementCategory($name) {
+ private function getElementCategory($node) {
+ $name = $node->tagName;
if(in_array($name, $this->special))
return self::SPECIAL;
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
return $this->dom;
}
}
+?>
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index 51a14a78..25e9f8ac 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
//################################################################//
// Process result by interpreting $result
- if ($result === true) {
+ if ($result === true || $child_tokens === $result) {
// leave the node as is
// register start token as a parental node start
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index b3e8aa74..4b6f498f 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$definition = $config->getHTMLDefinition();
- // CurrentNesting
- $this->currentNesting = array();
- $context->register('CurrentNesting', $this->currentNesting);
-
- // InputIndex
- $this->inputIndex = false;
- $context->register('InputIndex', $this->inputIndex);
-
- // InputTokens
- $context->register('InputTokens', $tokens);
- $this->inputTokens =& $tokens;
-
- // OutputTokens
+ // local variables
$result = array();
- $this->outputTokens =& $result;
-
- // %Core.EscapeInvalidTags
- $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$generator = new HTMLPurifier_Generator();
-
+ $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$e =& $context->get('ErrorCollector', true);
+ // member variables
+ $this->currentNesting = array();
+ $this->inputIndex = false;
+ $this->inputTokens =& $tokens;
+ $this->outputTokens =& $result;
+
+ // context variables
+ $context->register('CurrentNesting', $this->currentNesting);
+ $context->register('InputIndex', $this->inputIndex);
+ $context->register('InputTokens', $tokens);
+
// -- begin INJECTOR --
$this->injectors = array();
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
}
+ // warning: most foreach loops follow the convention $i => $x.
+ // be sure, for PHP4 compatibility, to only perform write operations
+ // directly referencing the object using $i: $x is only safe for reads
+
// -- end INJECTOR --
$token = false;
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex];
+ //printTokens($tokens, $this->inputIndex);
+
foreach ($this->injectors as $i => $x) {
if ($x->skip > 0) $this->injectors[$i]->skip--;
}
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($token->type === 'text') {
// injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $x->handleText($token);
+ if (!$x->skip) $this->injectors[$i]->handleText($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// injector handler code; duplicated for performance reasons
if ($ok) {
foreach ($this->injectors as $i => $x) {
- if (!$x->skip) $x->handleElement($token);
+ if (!$x->skip) $this->injectors[$i]->handleElement($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$result[] = $token;
+ foreach ($this->injectors as $i => $x) {
+ $this->injectors[$i]->notifyEnd($token);
+ }
continue;
}
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
- $size = count($skipped_tags);
- for ($i = $size - 1; $i > 0; $i--) {
- if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+ for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
+ if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
}
- $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+ $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+ foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+ $this->injectors[$j]->notifyEnd($new_token);
+ }
}
- $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
-
}
$context->destroy('CurrentNesting');
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
- // we're at the end now, fix all still unclosed tags
- // not using processToken() because at this point we don't
- // care about current nesting
+ // we're at the end now, fix all still unclosed tags (this is
+ // duplicated from the end of the loop with some slight modifications)
+ // not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) {
- $size = count($this->currentNesting);
- for ($i = $size - 1; $i >= 0; $i--) {
+ for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
- $result[] =
- new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+ $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+ foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+ $this->injectors[$j]->notifyEnd($new_token);
+ }
}
}
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// adjust the injector skips based on the array substitution
if ($this->injectors) {
- $offset = count($token) + 1;
+ $offset = count($token);
for ($i = 0; $i <= $this->currentInjector; $i++) {
+ // because of the skip back, we need to add one more
+ // for uninitialized injectors. I'm not exactly
+ // sure why this is the case, but I think it has to
+ // do with the fact that we're decrementing skips
+ // before re-checking text
+ if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
$this->injectors[$i]->skip += $offset;
}
}
diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
index 2c280b23..5d26e4f5 100644
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// mostly everything's good, but
// we need to make sure required attributes are in order
if (
+ ($token->type === 'start' || $token->type === 'empty') &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token->armor['ValidateAttributes'] = true;
}
- // CAN BE GENERICIZED
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
$textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') {
diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php
index 869f3fab..6debcc33 100644
--- a/library/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
require_once 'HTMLPurifier/AttrValidator.php';
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'IDBlacklist', array(), 'list',
- 'Array of IDs not allowed in the document.');
-
/**
* Validate all attributes in the tokens.
*/
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
function execute($tokens, $config, &$context) {
- // setup id_accumulator context
- $id_accumulator = new HTMLPurifier_IDAccumulator();
- $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
- $context->register('IDAccumulator', $id_accumulator);
-
// setup validator
$validator = new HTMLPurifier_AttrValidator();
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
$tokens[$key] = $token; // for PHP 4
}
-
- $context->destroy('IDAccumulator');
$context->destroy('CurrentToken');
return $tokens;
diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php
index e0066f3b..ca000ea5 100644
--- a/library/HTMLPurifier/URIFilter.php
+++ b/library/HTMLPurifier/URIFilter.php
@@ -1,10 +1,22 @@
host)) return true;
$scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj) {
+ // scheme not recognized
+ return false;
+ }
if (!$scheme_obj->hierarchical) {
// non-hierarchal URI with explicit scheme, don't change
return true;
diff --git a/maintenance/PH5P.patch b/maintenance/PH5P.patch
index 37e4dbf1..9365cffe 100644
--- a/maintenance/PH5P.patch
+++ b/maintenance/PH5P.patch
@@ -1,5 +1,5 @@
---- old.php 2007-08-19 14:42:33.640625000 -0400
-+++ new.php 2007-08-19 14:41:51.609375000 -0400
+--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
++++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
@@ -211,7 +211,10 @@
// If nothing is returned, emit a U+0026 AMPERSAND character token.
// Otherwise, emit the character token that was returned.
@@ -43,3 +43,22 @@
$entity = $id;
break;
}
+@@ -3659,7 +3668,7 @@
+ }
+ }
+
+- private function generateImpliedEndTags(array $exclude = array()) {
++ private function generateImpliedEndTags($exclude = array()) {
+ /* When the steps below require the UA to generate implied end tags,
+ then, if the current node is a dd element, a dt element, an li element,
+ a p element, a td element, a th element, or a tr element, the UA must
+@@ -3673,7 +3682,8 @@
+ }
+ }
+
+- private function getElementCategory($name) {
++ private function getElementCategory($node) {
++ $name = $node->tagName;
+ if(in_array($name, $this->special))
+ return self::SPECIAL;
+
diff --git a/maintenance/PH5P.php b/maintenance/PH5P.php
new file mode 100644
index 00000000..96d0d13f
--- /dev/null
+++ b/maintenance/PH5P.php
@@ -0,0 +1,3824 @@
+data = $data;
+ $this->char = -1;
+ $this->EOF = strlen($data);
+ $this->tree = new HTML5TreeConstructer;
+ $this->content_model = self::PCDATA;
+
+ $this->state = 'data';
+
+ while($this->state !== null) {
+ $this->{$this->state.'State'}();
+ }
+ }
+
+ public function save() {
+ return $this->tree->save();
+ }
+
+ private function char() {
+ return ($this->char < $this->EOF)
+ ? $this->data[$this->char]
+ : false;
+ }
+
+ private function character($s, $l = 0) {
+ if($s + $l < $this->EOF) {
+ if($l === 0) {
+ return $this->data[$s];
+ } else {
+ return substr($this->data, $s, $l);
+ }
+ }
+ }
+
+ private function characters($char_class, $start) {
+ return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
+ }
+
+ private function dataState() {
+ // Consume the next input character
+ $this->char++;
+ $char = $this->char();
+
+ if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
+ /* U+0026 AMPERSAND (&)
+ When the content model flag is set to one of the PCDATA or RCDATA
+ states: switch to the entity data state. Otherwise: treat it as per
+ the "anything else" entry below. */
+ $this->state = 'entityData';
+
+ } elseif($char === '-') {
+ /* If the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is false, and there are at
+ least three characters before this one in the input stream, and the
+ last four characters in the input stream, including this one, are
+ U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
+ and U+002D HYPHEN-MINUS (""),
+ set the escape flag to false. */
+ if(($this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA) && $this->escape === true &&
+ $this->character($this->char, 3) === '-->') {
+ $this->escape = false;
+ }
+
+ /* In any case, emit the input character as a character token.
+ Stay in the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ ));
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Emit an end-of-file token. */
+ $this->EOF();
+
+ } elseif($this->content_model === self::PLAINTEXT) {
+ /* When the content model flag is set to the PLAINTEXT state
+ THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
+ the text and emit it as a character token. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => substr($this->data, $this->char)
+ ));
+
+ $this->EOF();
+
+ } else {
+ /* Anything else
+ THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
+ otherwise would also be treated as a character token and emit it
+ as a single character token. Stay in the data state. */
+ $len = strcspn($this->data, '<&', $this->char);
+ $char = substr($this->data, $this->char, $len);
+ $this->char += $len - 1;
+
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ ));
+
+ $this->state = 'data';
+ }
+ }
+
+ private function entityDataState() {
+ // Attempt to consume an entity.
+ $entity = $this->entity();
+
+ // If nothing is returned, emit a U+0026 AMPERSAND character token.
+ // Otherwise, emit the character token that was returned.
+ $char = (!$entity) ? '&' : $entity;
+ $this->emitToken($char);
+
+ // Finally, switch to the data state.
+ $this->state = 'data';
+ }
+
+ private function tagOpenState() {
+ switch($this->content_model) {
+ case self::RCDATA:
+ case self::CDATA:
+ /* If the next input character is a U+002F SOLIDUS (/) character,
+ consume it and switch to the close tag open state. If the next
+ input character is not a U+002F SOLIDUS (/) character, emit a
+ U+003C LESS-THAN SIGN character token and switch to the data
+ state to process the next input character. */
+ if($this->character($this->char + 1) === '/') {
+ $this->char++;
+ $this->state = 'closeTagOpen';
+
+ } else {
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => '<'
+ ));
+
+ $this->state = 'data';
+ }
+ break;
+
+ case self::PCDATA:
+ // If the content model flag is set to the PCDATA state
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->char();
+
+ if($char === '!') {
+ /* U+0021 EXCLAMATION MARK (!)
+ Switch to the markup declaration open state. */
+ $this->state = 'markupDeclarationOpen';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the close tag open state. */
+ $this->state = 'closeTagOpen';
+
+ } elseif(preg_match('/^[A-Za-z]$/', $char)) {
+ /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+ Create a new start tag token, set its tag name to the lowercase
+ version of the input character (add 0x0020 to the character's code
+ point), then switch to the tag name state. (Don't emit the token
+ yet; further details will be filled in before it is emitted.) */
+ $this->token = array(
+ 'name' => strtolower($char),
+ 'type' => self::STARTTAG,
+ 'attr' => array()
+ );
+
+ $this->state = 'tagName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and a
+ U+003E GREATER-THAN SIGN character token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => '<>'
+ ));
+
+ $this->state = 'data';
+
+ } elseif($char === '?') {
+ /* U+003F QUESTION MARK (?)
+ Parse error. Switch to the bogus comment state. */
+ $this->state = 'bogusComment';
+
+ } else {
+ /* Anything else
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and
+ reconsume the current input character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => '<'
+ ));
+
+ $this->char--;
+ $this->state = 'data';
+ }
+ break;
+ }
+ }
+
+ private function closeTagOpenState() {
+ $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
+ $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
+
+ if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
+ (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
+ $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
+ /* If the content model flag is set to the RCDATA or CDATA states then
+ examine the next few characters. If they do not match the tag name of
+ the last start tag token emitted (case insensitively), or if they do but
+ they are not immediately followed by one of the following characters:
+ * U+0009 CHARACTER TABULATION
+ * U+000A LINE FEED (LF)
+ * U+000B LINE TABULATION
+ * U+000C FORM FEED (FF)
+ * U+0020 SPACE
+ * U+003E GREATER-THAN SIGN (>)
+ * U+002F SOLIDUS (/)
+ * EOF
+ ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
+ token, a U+002F SOLIDUS character token, and switch to the data state
+ to process the next input character. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => ''
+ ));
+
+ $this->state = 'data';
+
+ } else {
+ /* Otherwise, if the content model flag is set to the PCDATA state,
+ or if the next few characters do match that tag name, consume the
+ next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if(preg_match('/^[A-Za-z]$/', $char)) {
+ /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+ Create a new end tag token, set its tag name to the lowercase version
+ of the input character (add 0x0020 to the character's code point), then
+ switch to the tag name state. (Don't emit the token yet; further details
+ will be filled in before it is emitted.) */
+ $this->token = array(
+ 'name' => strtolower($char),
+ 'type' => self::ENDTAG
+ );
+
+ $this->state = 'tagName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Switch to the data state. */
+ $this->state = 'data';
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
+ SOLIDUS character token. Reconsume the EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTR,
+ 'data' => ''
+ ));
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Parse error. Switch to the bogus comment state. */
+ $this->state = 'bogusComment';
+ }
+ }
+ }
+
+ private function tagNameState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Parse error unless this is a permitted slash. Switch to the before
+ attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current tag token's tag name.
+ Stay in the tag name state. */
+ $this->token['name'] .= strtolower($char);
+ $this->state = 'tagName';
+ }
+ }
+
+ private function beforeAttributeNameState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Parse error unless this is a permitted slash. Stay in the before
+ attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the attribute name state. */
+ $this->token['attr'][] = array(
+ 'name' => strtolower($char),
+ 'value' => null
+ );
+
+ $this->state = 'attributeName';
+ }
+ }
+
+ private function attributeNameState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute name state. */
+ $this->state = 'afterAttributeName';
+
+ } elseif($char === '=') {
+ /* U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state. */
+ $this->state = 'beforeAttributeValue';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+ /* U+002F SOLIDUS (/)
+ Parse error unless this is a permitted slash. Switch to the before
+ attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's name.
+ Stay in the attribute name state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['name'] .= strtolower($char);
+
+ $this->state = 'attributeName';
+ }
+ }
+
+ private function afterAttributeNameState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after attribute name state. */
+ $this->state = 'afterAttributeName';
+
+ } elseif($char === '=') {
+ /* U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state. */
+ $this->state = 'beforeAttributeValue';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+ /* U+002F SOLIDUS (/)
+ Parse error unless this is a permitted slash. Switch to the
+ before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the attribute name state. */
+ $this->token['attr'][] = array(
+ 'name' => strtolower($char),
+ 'value' => null
+ );
+
+ $this->state = 'attributeName';
+ }
+ }
+
+ private function beforeAttributeValueState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute value state. */
+ $this->state = 'beforeAttributeValue';
+
+ } elseif($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the attribute value (double-quoted) state. */
+ $this->state = 'attributeValueDoubleQuoted';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the attribute value (unquoted) state and reconsume
+ this input character. */
+ $this->char--;
+ $this->state = 'attributeValueUnquoted';
+
+ } elseif($char === '\'') {
+ /* U+0027 APOSTROPHE (')
+ Switch to the attribute value (single-quoted) state. */
+ $this->state = 'attributeValueSingleQuoted';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Switch to the attribute value (unquoted) state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ $this->state = 'attributeValueUnquoted';
+ }
+ }
+
+ private function attributeValueDoubleQuotedState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the entity in attribute value state. */
+ $this->entityInAttributeValueState('double');
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (double-quoted) state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ $this->state = 'attributeValueDoubleQuoted';
+ }
+ }
+
+ private function attributeValueSingleQuotedState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if($char === '\'') {
+ /* U+0022 QUOTATION MARK (')
+ Switch to the before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the entity in attribute value state. */
+ $this->entityInAttributeValueState('single');
+
+ } elseif($this->char === $this->EOF) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state. */
+ $this->emitToken($this->token);
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (single-quoted) state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ $this->state = 'attributeValueSingleQuoted';
+ }
+ }
+
+ private function attributeValueUnquotedState() {
+ // Consume the next input character:
+ $this->char++;
+ $char = $this->character($this->char);
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000B LINE TABULATION
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state. */
+ $this->state = 'beforeAttributeName';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the entity in attribute value state. */
+ $this->entityInAttributeValueState('non');
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (unquoted) state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ $this->state = 'attributeValueUnquoted';
+ }
+ }
+
+ private function entityInAttributeValueState() {
+ // Attempt to consume an entity.
+ $entity = $this->entity();
+
+ // If nothing is returned, append a U+0026 AMPERSAND character to the
+ // current attribute's value. Otherwise, emit the character token that
+ // was returned.
+ $char = (!$entity)
+ ? '&'
+ : $entity;
+
+ $this->emitToken($char);
+ }
+
+ private function bogusCommentState() {
+ /* Consume every character up to the first U+003E GREATER-THAN SIGN
+ character (>) or the end of the file (EOF), whichever comes first. Emit
+ a comment token whose data is the concatenation of all the characters
+ starting from and including the character that caused the state machine
+ to switch into the bogus comment state, up to and including the last
+ consumed character before the U+003E character, if any, or up to the
+ end of the file otherwise. (If the comment was started by the end of
+ the file (EOF), the token is empty.) */
+ $data = $this->characters('^>', $this->char);
+ $this->emitToken(array(
+ 'data' => $data,
+ 'type' => self::COMMENT
+ ));
+
+ $this->char += strlen($data);
+
+ /* Switch to the data state. */
+ $this->state = 'data';
+
+ /* If the end of the file was reached, reconsume the EOF character. */
+ if($this->char === $this->EOF) {
+ $this->char = $this->EOF - 1;
+ }
+ }
+
+ private function markupDeclarationOpenState() {
+ /* If the next two characters are both U+002D HYPHEN-MINUS (-)
+ characters, consume those two characters, create a comment token whose
+ data is the empty string, and switch to the comment state. */
+ if($this->character($this->char + 1, 2) === '--') {
+ $this->char += 2;
+ $this->state = 'comment';
+ $this->token = array(
+ 'data' => null,
+ 'type' => self::COMMENT
+ );
+
+ /* Otherwise if the next seven chacacters are a case-insensitive match
+ for the word "DOCTYPE", then consume those characters and switch to the
+ DOCTYPE state. */
+ } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
+ $this->char += 7;
+ $this->state = 'doctype';
+
+ /* Otherwise, is is a parse error. Switch to the bogus comment state.
+ The next character that is consumed, if any, is the first character
+ that will be in the comment. */
+ } else {
+ $this->char++;
+ $this->state = 'bogusComment';
+ }
+ }
+
+ private function commentState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ /* U+002D HYPHEN-MINUS (-) */
+ if($char === '-') {
+ /* Switch to the comment dash state */
+ $this->state = 'commentDash';
+
+ /* EOF */
+ } elseif($this->char === $this->EOF) {
+ /* Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state. */
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ /* Anything else */
+ } else {
+ /* Append the input character to the comment token's data. Stay in
+ the comment state. */
+ $this->token['data'] .= $char;
+ }
+ }
+
+ private function commentDashState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ /* U+002D HYPHEN-MINUS (-) */
+ if($char === '-') {
+ /* Switch to the comment end state */
+ $this->state = 'commentEnd';
+
+ /* EOF */
+ } elseif($this->char === $this->EOF) {
+ /* Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state. */
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ /* Anything else */
+ } else {
+ /* Append a U+002D HYPHEN-MINUS (-) character and the input
+ character to the comment token's data. Switch to the comment state. */
+ $this->token['data'] .= '-'.$char;
+ $this->state = 'comment';
+ }
+ }
+
+ private function commentEndState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if($char === '>') {
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($char === '-') {
+ $this->token['data'] .= '-';
+
+ } elseif($this->char === $this->EOF) {
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ $this->token['data'] .= '--'.$char;
+ $this->state = 'comment';
+ }
+ }
+
+ private function doctypeState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ $this->state = 'beforeDoctypeName';
+
+ } else {
+ $this->char--;
+ $this->state = 'beforeDoctypeName';
+ }
+ }
+
+ private function beforeDoctypeNameState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ // Stay in the before DOCTYPE name state.
+
+ } elseif(preg_match('/^[a-z]$/', $char)) {
+ $this->token = array(
+ 'name' => strtoupper($char),
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ );
+
+ $this->state = 'doctypeName';
+
+ } elseif($char === '>') {
+ $this->emitToken(array(
+ 'name' => null,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ ));
+
+ $this->state = 'data';
+
+ } elseif($this->char === $this->EOF) {
+ $this->emitToken(array(
+ 'name' => null,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ ));
+
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ $this->token = array(
+ 'name' => $char,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ );
+
+ $this->state = 'doctypeName';
+ }
+ }
+
+ private function doctypeNameState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ $this->state = 'AfterDoctypeName';
+
+ } elseif($char === '>') {
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif(preg_match('/^[a-z]$/', $char)) {
+ $this->token['name'] .= strtoupper($char);
+
+ } elseif($this->char === $this->EOF) {
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ $this->token['name'] .= $char;
+ }
+
+ $this->token['error'] = ($this->token['name'] === 'HTML')
+ ? false
+ : true;
+ }
+
+ private function afterDoctypeNameState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ // Stay in the DOCTYPE name state.
+
+ } elseif($char === '>') {
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($this->char === $this->EOF) {
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ $this->token['error'] = true;
+ $this->state = 'bogusDoctype';
+ }
+ }
+
+ private function bogusDoctypeState() {
+ /* Consume the next input character: */
+ $this->char++;
+ $char = $this->char();
+
+ if($char === '>') {
+ $this->emitToken($this->token);
+ $this->state = 'data';
+
+ } elseif($this->char === $this->EOF) {
+ $this->emitToken($this->token);
+ $this->char--;
+ $this->state = 'data';
+
+ } else {
+ // Stay in the bogus DOCTYPE state.
+ }
+ }
+
+ private function entity() {
+ $start = $this->char;
+
+ // This section defines how to consume an entity. This definition is
+ // used when parsing entities in text and in attributes.
+
+ // The behaviour depends on the identity of the next character (the
+ // one immediately after the U+0026 AMPERSAND character):
+
+ switch($this->character($this->char + 1)) {
+ // U+0023 NUMBER SIGN (#)
+ case '#':
+
+ // The behaviour further depends on the character after the
+ // U+0023 NUMBER SIGN:
+ switch($this->character($this->char + 1)) {
+ // U+0078 LATIN SMALL LETTER X
+ // U+0058 LATIN CAPITAL LETTER X
+ case 'x':
+ case 'X':
+ // Follow the steps below, but using the range of
+ // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+ // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
+ // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
+ // A, through to U+0046 LATIN CAPITAL LETTER F (in other
+ // words, 0-9, A-F, a-f).
+ $char = 1;
+ $char_class = '0-9A-Fa-f';
+ break;
+
+ // Anything else
+ default:
+ // Follow the steps below, but using the range of
+ // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+ // NINE (i.e. just 0-9).
+ $char = 0;
+ $char_class = '0-9';
+ break;
+ }
+
+ // Consume as many characters as match the range of characters
+ // given above.
+ $this->char++;
+ $e_name = $this->characters($char_class, $this->char + $char + 1);
+ $entity = $this->character($start, $this->char);
+ $cond = strlen($e_name) > 0;
+
+ // The rest of the parsing happens bellow.
+ break;
+
+ // Anything else
+ default:
+ // Consume the maximum number of characters possible, with the
+ // consumed characters case-sensitively matching one of the
+ // identifiers in the first column of the entities table.
+ $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
+ $len = strlen($e_name);
+
+ for($c = 1; $c <= $len; $c++) {
+ $id = substr($e_name, 0, $c);
+ $this->char++;
+
+ if(in_array($id, $this->entities)) {
+ $entity = $id;
+ break;
+ }
+ }
+
+ $cond = isset($entity);
+ // The rest of the parsing happens bellow.
+ break;
+ }
+
+ if(!$cond) {
+ // If no match can be made, then this is a parse error. No
+ // characters are consumed, and nothing is returned.
+ $this->char = $start;
+ return false;
+ }
+
+ // Return a character token for the character corresponding to the
+ // entity name (as given by the second column of the entities table).
+ return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
+ }
+
+ private function emitToken($token) {
+ $emit = $this->tree->emitToken($token);
+
+ if(is_int($emit)) {
+ $this->content_model = $emit;
+
+ } elseif($token['type'] === self::ENDTAG) {
+ $this->content_model = self::PCDATA;
+ }
+ }
+
+ private function EOF() {
+ $this->state = null;
+ $this->tree->emitToken(array(
+ 'type' => self::EOF
+ ));
+ }
+}
+
+class HTML5TreeConstructer {
+ public $stack = array();
+
+ private $phase;
+ private $mode;
+ private $dom;
+ private $foster_parent = null;
+ private $a_formatting = array();
+
+ private $head_pointer = null;
+ private $form_pointer = null;
+
+ private $scoping = array('button','caption','html','marquee','object','table','td','th');
+ private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
+ private $special = array('address','area','base','basefont','bgsound',
+ 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
+ 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
+ 'h6','head','hr','iframe','image','img','input','isindex','li','link',
+ 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
+ 'option','p','param','plaintext','pre','script','select','spacer','style',
+ 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
+
+ // The different phases.
+ const INIT_PHASE = 0;
+ const ROOT_PHASE = 1;
+ const MAIN_PHASE = 2;
+ const END_PHASE = 3;
+
+ // The different insertion modes for the main phase.
+ const BEFOR_HEAD = 0;
+ const IN_HEAD = 1;
+ const AFTER_HEAD = 2;
+ const IN_BODY = 3;
+ const IN_TABLE = 4;
+ const IN_CAPTION = 5;
+ const IN_CGROUP = 6;
+ const IN_TBODY = 7;
+ const IN_ROW = 8;
+ const IN_CELL = 9;
+ const IN_SELECT = 10;
+ const AFTER_BODY = 11;
+ const IN_FRAME = 12;
+ const AFTR_FRAME = 13;
+
+ // The different types of elements.
+ const SPECIAL = 0;
+ const SCOPING = 1;
+ const FORMATTING = 2;
+ const PHRASING = 3;
+
+ const MARKER = 0;
+
+ public function __construct() {
+ $this->phase = self::INIT_PHASE;
+ $this->mode = self::BEFOR_HEAD;
+ $this->dom = new DOMDocument;
+
+ $this->dom->encoding = 'UTF-8';
+ $this->dom->preserveWhiteSpace = true;
+ $this->dom->substituteEntities = true;
+ $this->dom->strictErrorChecking = false;
+ }
+
+ // Process tag tokens
+ public function emitToken($token) {
+ switch($this->phase) {
+ case self::INIT_PHASE: return $this->initPhase($token); break;
+ case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
+ case self::MAIN_PHASE: return $this->mainPhase($token); break;
+ case self::END_PHASE : return $this->trailingEndPhase($token); break;
+ }
+ }
+
+ private function initPhase($token) {
+ /* Initially, the tree construction stage must handle each token
+ emitted from the tokenisation stage as follows: */
+
+ /* A DOCTYPE token that is marked as being in error
+ A comment token
+ A start tag token
+ An end tag token
+ A character token that is not one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE
+ An end-of-file token */
+ if((isset($token['error']) && $token['error']) ||
+ $token['type'] === HTML5::COMMENT ||
+ $token['type'] === HTML5::STARTTAG ||
+ $token['type'] === HTML5::ENDTAG ||
+ $token['type'] === HTML5::EOF ||
+ ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
+ !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
+ /* This specification does not define how to handle this case. In
+ particular, user agents may ignore the entirety of this specification
+ altogether for such documents, and instead invoke special parse modes
+ with a greater emphasis on backwards compatibility. */
+
+ $this->phase = self::ROOT_PHASE;
+ return $this->rootElementPhase($token);
+
+ /* A DOCTYPE token marked as being correct */
+ } elseif(isset($token['error']) && !$token['error']) {
+ /* Append a DocumentType node to the Document node, with the name
+ attribute set to the name given in the DOCTYPE token (which will be
+ "HTML"), and the other attributes specific to DocumentType objects
+ set to null, empty lists, or the empty string as appropriate. */
+ $doctype = new DOMDocumentType(null, null, 'HTML');
+
+ /* Then, switch to the root element phase of the tree construction
+ stage. */
+ $this->phase = self::ROOT_PHASE;
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
+ $token['data'])) {
+ /* Append that character to the Document node. */
+ $text = $this->dom->createTextNode($token['data']);
+ $this->dom->appendChild($text);
+ }
+ }
+
+ private function rootElementPhase($token) {
+ /* After the initial phase, as each token is emitted from the tokenisation
+ stage, it must be processed as described in this section. */
+
+ /* A DOCTYPE token */
+ if($token['type'] === HTML5::DOCTYPE) {
+ // Parse error. Ignore the token.
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->dom->appendChild($comment);
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append that character to the Document node. */
+ $text = $this->dom->createTextNode($token['data']);
+ $this->dom->appendChild($text);
+
+ /* A character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
+ (FF), or U+0020 SPACE
+ A start tag token
+ An end tag token
+ An end-of-file token */
+ } elseif(($token['type'] === HTML5::CHARACTR &&
+ !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+ $token['type'] === HTML5::STARTTAG ||
+ $token['type'] === HTML5::ENDTAG ||
+ $token['type'] === HTML5::EOF) {
+ /* Create an HTMLElement node with the tag name html, in the HTML
+ namespace. Append it to the Document object. Switch to the main
+ phase and reprocess the current token. */
+ $html = $this->dom->createElement('html');
+ $this->dom->appendChild($html);
+ $this->stack[] = $html;
+
+ $this->phase = self::MAIN_PHASE;
+ return $this->mainPhase($token);
+ }
+ }
+
+ private function mainPhase($token) {
+ /* Tokens in the main phase must be handled as follows: */
+
+ /* A DOCTYPE token */
+ if($token['type'] === HTML5::DOCTYPE) {
+ // Parse error. Ignore the token.
+
+ /* A start tag token with the tag name "html" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
+ /* If this start tag token was not the first start tag token, then
+ it is a parse error. */
+
+ /* For each attribute on the token, check to see if the attribute
+ is already present on the top element of the stack of open elements.
+ If it is not, add the attribute and its corresponding value to that
+ element. */
+ foreach($token['attr'] as $attr) {
+ if(!$this->stack[0]->hasAttribute($attr['name'])) {
+ $this->stack[0]->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+
+ /* An end-of-file token */
+ } elseif($token['type'] === HTML5::EOF) {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* Anything else. */
+ } else {
+ /* Depends on the insertion mode: */
+ switch($this->mode) {
+ case self::BEFOR_HEAD: return $this->beforeHead($token); break;
+ case self::IN_HEAD: return $this->inHead($token); break;
+ case self::AFTER_HEAD: return $this->afterHead($token); break;
+ case self::IN_BODY: return $this->inBody($token); break;
+ case self::IN_TABLE: return $this->inTable($token); break;
+ case self::IN_CAPTION: return $this->inCaption($token); break;
+ case self::IN_CGROUP: return $this->inColumnGroup($token); break;
+ case self::IN_TBODY: return $this->inTableBody($token); break;
+ case self::IN_ROW: return $this->inRow($token); break;
+ case self::IN_CELL: return $this->inCell($token); break;
+ case self::IN_SELECT: return $this->inSelect($token); break;
+ case self::AFTER_BODY: return $this->afterBody($token); break;
+ case self::IN_FRAME: return $this->inFrameset($token); break;
+ case self::AFTR_FRAME: return $this->afterFrameset($token); break;
+ case self::END_PHASE: return $this->trailingEndPhase($token); break;
+ }
+ }
+ }
+
+ private function beforeHead($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A start tag token with the tag name "head" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
+ /* Create an element for the token, append the new element to the
+ current node and push it onto the stack of open elements. */
+ $element = $this->insertElement($token);
+
+ /* Set the head element pointer to this new element node. */
+ $this->head_pointer = $element;
+
+ /* Change the insertion mode to "in head". */
+ $this->mode = self::IN_HEAD;
+
+ /* A start tag token whose tag name is one of: "base", "link", "meta",
+ "script", "style", "title". Or an end tag with the tag name "html".
+ Or a character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE. Or any other start tag token */
+ } elseif($token['type'] === HTML5::STARTTAG ||
+ ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
+ ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
+ $token['data']))) {
+ /* Act as if a start tag token with the tag name "head" and no
+ attributes had been seen, then reprocess the current token. */
+ $this->beforeHead(array(
+ 'name' => 'head',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ return $this->inHead($token);
+
+ /* Any other end tag */
+ } elseif($token['type'] === HTML5::ENDTAG) {
+ /* Parse error. Ignore the token. */
+ }
+ }
+
+ private function inHead($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE.
+
+ THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
+ or script element, append the character to the current node regardless
+ of its content. */
+ if(($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
+ $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
+ array('title', 'style', 'script')))) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('title', 'style', 'script'))) {
+ array_pop($this->stack);
+ return HTML5::PCDATA;
+
+ /* A start tag with the tag name "title" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
+ /* Create an element for the token and append the new element to the
+ node pointed to by the head element pointer, or, if that is null
+ (innerHTML case), to the current node. */
+ if($this->head_pointer !== null) {
+ $element = $this->insertElement($token, false);
+ $this->head_pointer->appendChild($element);
+
+ } else {
+ $element = $this->insertElement($token);
+ }
+
+ /* Switch the tokeniser's content model flag to the RCDATA state. */
+ return HTML5::RCDATA;
+
+ /* A start tag with the tag name "style" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
+ /* Create an element for the token and append the new element to the
+ node pointed to by the head element pointer, or, if that is null
+ (innerHTML case), to the current node. */
+ if($this->head_pointer !== null) {
+ $element = $this->insertElement($token, false);
+ $this->head_pointer->appendChild($element);
+
+ } else {
+ $this->insertElement($token);
+ }
+
+ /* Switch the tokeniser's content model flag to the CDATA state. */
+ return HTML5::CDATA;
+
+ /* A start tag with the tag name "script" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
+ /* Create an element for the token. */
+ $element = $this->insertElement($token, false);
+ $this->head_pointer->appendChild($element);
+
+ /* Switch the tokeniser's content model flag to the CDATA state. */
+ return HTML5::CDATA;
+
+ /* A start tag with the tag name "base", "link", or "meta" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('base', 'link', 'meta'))) {
+ /* Create an element for the token and append the new element to the
+ node pointed to by the head element pointer, or, if that is null
+ (innerHTML case), to the current node. */
+ if($this->head_pointer !== null) {
+ $element = $this->insertElement($token, false);
+ $this->head_pointer->appendChild($element);
+ array_pop($this->stack);
+
+ } else {
+ $this->insertElement($token);
+ }
+
+ /* An end tag with the tag name "head" */
+ } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
+ /* If the current node is a head element, pop the current node off
+ the stack of open elements. */
+ if($this->head_pointer->isSameNode(end($this->stack))) {
+ array_pop($this->stack);
+
+ /* Otherwise, this is a parse error. */
+ } else {
+ // k
+ }
+
+ /* Change the insertion mode to "after head". */
+ $this->mode = self::AFTER_HEAD;
+
+ /* A start tag with the tag name "head" or an end tag except "html". */
+ } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
+ ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
+ // Parse error. Ignore the token.
+
+ /* Anything else */
+ } else {
+ /* If the current node is a head element, act as if an end tag
+ token with the tag name "head" had been seen. */
+ if($this->head_pointer->isSameNode(end($this->stack))) {
+ $this->inHead(array(
+ 'name' => 'head',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ /* Otherwise, change the insertion mode to "after head". */
+ } else {
+ $this->mode = self::AFTER_HEAD;
+ }
+
+ /* Then, reprocess the current token. */
+ return $this->afterHead($token);
+ }
+ }
+
+ private function afterHead($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A start tag token with the tag name "body" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
+ /* Insert a body element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in body". */
+ $this->mode = self::IN_BODY;
+
+ /* A start tag token with the tag name "frameset" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
+ /* Insert a frameset element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in frameset". */
+ $this->mode = self::IN_FRAME;
+
+ /* A start tag token whose tag name is one of: "base", "link", "meta",
+ "script", "style", "title" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('base', 'link', 'meta', 'script', 'style', 'title'))) {
+ /* Parse error. Switch the insertion mode back to "in head" and
+ reprocess the token. */
+ $this->mode = self::IN_HEAD;
+ return $this->inHead($token);
+
+ /* Anything else */
+ } else {
+ /* Act as if a start tag token with the tag name "body" and no
+ attributes had been seen, and then reprocess the current token. */
+ $this->afterHead(array(
+ 'name' => 'body',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ return $this->inBody($token);
+ }
+ }
+
+ private function inBody($token) {
+ /* Handle the token as follows: */
+
+ switch($token['type']) {
+ /* A character token */
+ case HTML5::CHARACTR:
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Append the token's character to the current node. */
+ $this->insertText($token['data']);
+ break;
+
+ /* A comment token */
+ case HTML5::COMMENT:
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+ break;
+
+ case HTML5::STARTTAG:
+ switch($token['name']) {
+ /* A start tag token whose tag name is one of: "script",
+ "style" */
+ case 'script': case 'style':
+ /* Process the token as if the insertion mode had been "in
+ head". */
+ return $this->inHead($token);
+ break;
+
+ /* A start tag token whose tag name is one of: "base", "link",
+ "meta", "title" */
+ case 'base': case 'link': case 'meta': case 'title':
+ /* Parse error. Process the token as if the insertion mode
+ had been "in head". */
+ return $this->inHead($token);
+ break;
+
+ /* A start tag token with the tag name "body" */
+ case 'body':
+ /* Parse error. If the second element on the stack of open
+ elements is not a body element, or, if the stack of open
+ elements has only one node on it, then ignore the token.
+ (innerHTML case) */
+ if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
+ // Ignore
+
+ /* Otherwise, for each attribute on the token, check to see
+ if the attribute is already present on the body element (the
+ second element) on the stack of open elements. If it is not,
+ add the attribute and its corresponding value to that
+ element. */
+ } else {
+ foreach($token['attr'] as $attr) {
+ if(!$this->stack[1]->hasAttribute($attr['name'])) {
+ $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+ }
+ break;
+
+ /* A start tag whose tag name is one of: "address",
+ "blockquote", "center", "dir", "div", "dl", "fieldset",
+ "listing", "menu", "ol", "p", "ul" */
+ case 'address': case 'blockquote': case 'center': case 'dir':
+ case 'div': case 'dl': case 'fieldset': case 'listing':
+ case 'menu': case 'ol': case 'p': case 'ul':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag whose tag name is "form" */
+ case 'form':
+ /* If the form element pointer is not null, ignore the
+ token with a parse error. */
+ if($this->form_pointer !== null) {
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* If the stack of open elements has a p element in
+ scope, then act as if an end tag with the tag name p
+ had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token, and set the
+ form element pointer to point to the element created. */
+ $element = $this->insertElement($token);
+ $this->form_pointer = $element;
+ }
+ break;
+
+ /* A start tag whose tag name is "li", "dd" or "dt" */
+ case 'li': case 'dd': case 'dt':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ $stack_length = count($this->stack) - 1;
+
+ for($n = $stack_length; 0 <= $n; $n--) {
+ /* 1. Initialise node to be the current node (the
+ bottommost node of the stack). */
+ $stop = false;
+ $node = $this->stack[$n];
+ $cat = $this->getElementCategory($node->tagName);
+
+ /* 2. If node is an li, dd or dt element, then pop all
+ the nodes from the current node up to node, including
+ node, then stop this algorithm. */
+ if($token['name'] === $node->tagName || ($token['name'] !== 'li'
+ && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
+ for($x = $stack_length; $x >= $n ; $x--) {
+ array_pop($this->stack);
+ }
+
+ break;
+ }
+
+ /* 3. If node is not in the formatting category, and is
+ not in the phrasing category, and is not an address or
+ div element, then stop this algorithm. */
+ if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
+ $node->tagName !== 'address' && $node->tagName !== 'div') {
+ break;
+ }
+ }
+
+ /* Finally, insert an HTML element with the same tag
+ name as the token's. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag token whose tag name is "plaintext" */
+ case 'plaintext':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ return HTML5::PLAINTEXT;
+ break;
+
+ /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* If the stack of open elements has in scope an element whose
+ tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+ this is a parse error; pop elements from the stack until an
+ element with one of those tag names has been popped from the
+ stack. */
+ while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
+ array_pop($this->stack);
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag whose tag name is "a" */
+ case 'a':
+ /* If the list of active formatting elements contains
+ an element whose tag name is "a" between the end of the
+ list and the last marker on the list (or the start of
+ the list if there is no marker on the list), then this
+ is a parse error; act as if an end tag with the tag name
+ "a" had been seen, then remove that element from the list
+ of active formatting elements and the stack of open
+ elements if the end tag didn't already remove it (it
+ might not have if the element is not in table scope). */
+ $leng = count($this->a_formatting);
+
+ for($n = $leng - 1; $n >= 0; $n--) {
+ if($this->a_formatting[$n] === self::MARKER) {
+ break;
+
+ } elseif($this->a_formatting[$n]->nodeName === 'a') {
+ $this->emitToken(array(
+ 'name' => 'a',
+ 'type' => HTML5::ENDTAG
+ ));
+ break;
+ }
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ /* A start tag whose tag name is one of: "b", "big", "em", "font",
+ "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+ case 'b': case 'big': case 'em': case 'font': case 'i':
+ case 'nobr': case 's': case 'small': case 'strike':
+ case 'strong': case 'tt': case 'u':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ /* A start tag token whose tag name is "button" */
+ case 'button':
+ /* If the stack of open elements has a button element in scope,
+ then this is a parse error; act as if an end tag with the tag
+ name "button" had been seen, then reprocess the token. (We don't
+ do that. Unnecessary.) */
+ if($this->elementInScope('button')) {
+ $this->inBody(array(
+ 'name' => 'button',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+ break;
+
+ /* A start tag token whose tag name is one of: "marquee", "object" */
+ case 'marquee': case 'object':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+ break;
+
+ /* A start tag token whose tag name is "xmp" */
+ case 'xmp':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Switch the content model flag to the CDATA state. */
+ return HTML5::CDATA;
+ break;
+
+ /* A start tag whose tag name is "table" */
+ case 'table':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in table". */
+ $this->mode = self::IN_TABLE;
+ break;
+
+ /* A start tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
+ case 'area': case 'basefont': case 'bgsound': case 'br':
+ case 'embed': case 'img': case 'param': case 'spacer':
+ case 'wbr':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+ break;
+
+ /* A start tag whose tag name is "hr" */
+ case 'hr':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+ break;
+
+ /* A start tag whose tag name is "image" */
+ case 'image':
+ /* Parse error. Change the token's tag name to "img" and
+ reprocess it. (Don't ask.) */
+ $token['name'] = 'img';
+ return $this->inBody($token);
+ break;
+
+ /* A start tag whose tag name is "input" */
+ case 'input':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an input element for the token. */
+ $element = $this->insertElement($token, false);
+
+ /* If the form element pointer is not null, then associate the
+ input element with the form element pointed to by the form
+ element pointer. */
+ $this->form_pointer !== null
+ ? $this->form_pointer->appendChild($element)
+ : end($this->stack)->appendChild($element);
+
+ /* Pop that input element off the stack of open elements. */
+ array_pop($this->stack);
+ break;
+
+ /* A start tag whose tag name is "isindex" */
+ case 'isindex':
+ /* Parse error. */
+ // w/e
+
+ /* If the form element pointer is not null,
+ then ignore the token. */
+ if($this->form_pointer === null) {
+ /* Act as if a start tag token with the tag name "form" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'body',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'hr',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a start tag token with the tag name "p" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'p',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a start tag token with the tag name "label"
+ had been seen. */
+ $this->inBody(array(
+ 'name' => 'label',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a stream of character tokens had been seen. */
+ $this->insertText('This is a searchable index. '.
+ 'Insert your search keywords here: ');
+
+ /* Act as if a start tag token with the tag name "input"
+ had been seen, with all the attributes from the "isindex"
+ token, except with the "name" attribute set to the value
+ "isindex" (ignoring any explicit "name" attribute). */
+ $attr = $token['attr'];
+ $attr[] = array('name' => 'name', 'value' => 'isindex');
+
+ $this->inBody(array(
+ 'name' => 'input',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => $attr
+ ));
+
+ /* Act as if a stream of character tokens had been seen
+ (see below for what they should say). */
+ $this->insertText('This is a searchable index. '.
+ 'Insert your search keywords here: ');
+
+ /* Act as if an end tag token with the tag name "label"
+ had been seen. */
+ $this->inBody(array(
+ 'name' => 'label',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ /* Act as if an end tag token with the tag name "p" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'hr',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ /* Act as if an end tag token with the tag name "form" had
+ been seen. */
+ $this->inBody(array(
+ 'name' => 'form',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+ break;
+
+ /* A start tag whose tag name is "textarea" */
+ case 'textarea':
+ $this->insertElement($token);
+
+ /* Switch the tokeniser's content model flag to the
+ RCDATA state. */
+ return HTML5::RCDATA;
+ break;
+
+ /* A start tag whose tag name is one of: "iframe", "noembed",
+ "noframes" */
+ case 'iframe': case 'noembed': case 'noframes':
+ $this->insertElement($token);
+
+ /* Switch the tokeniser's content model flag to the CDATA state. */
+ return HTML5::CDATA;
+ break;
+
+ /* A start tag whose tag name is "select" */
+ case 'select':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in select". */
+ $this->mode = self::IN_SELECT;
+ break;
+
+ /* A start or end tag whose tag name is one of: "caption", "col",
+ "colgroup", "frame", "frameset", "head", "option", "optgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr". */
+ case 'caption': case 'col': case 'colgroup': case 'frame':
+ case 'frameset': case 'head': case 'option': case 'optgroup':
+ case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
+ case 'tr':
+ // Parse error. Ignore the token.
+ break;
+
+ /* A start or end tag whose tag name is one of: "event-source",
+ "section", "nav", "article", "aside", "header", "footer",
+ "datagrid", "command" */
+ case 'event-source': case 'section': case 'nav': case 'article':
+ case 'aside': case 'header': case 'footer': case 'datagrid':
+ case 'command':
+ // Work in progress!
+ break;
+
+ /* A start tag token not covered by the previous entries */
+ default:
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ $this->insertElement($token);
+ break;
+ }
+ break;
+
+ case HTML5::ENDTAG:
+ switch($token['name']) {
+ /* An end tag with the tag name "body" */
+ case 'body':
+ /* If the second element in the stack of open elements is
+ not a body element, this is a parse error. Ignore the token.
+ (innerHTML case) */
+ if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
+ // Ignore.
+
+ /* If the current node is not the body element, then this
+ is a parse error. */
+ } elseif(end($this->stack)->nodeName !== 'body') {
+ // Parse error.
+ }
+
+ /* Change the insertion mode to "after body". */
+ $this->mode = self::AFTER_BODY;
+ break;
+
+ /* An end tag with the tag name "html" */
+ case 'html':
+ /* Act as if an end tag with tag name "body" had been seen,
+ then, if that token wasn't ignored, reprocess the current
+ token. */
+ $this->inBody(array(
+ 'name' => 'body',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->afterBody($token);
+ break;
+
+ /* An end tag whose tag name is one of: "address", "blockquote",
+ "center", "dir", "div", "dl", "fieldset", "listing", "menu",
+ "ol", "pre", "ul" */
+ case 'address': case 'blockquote': case 'center': case 'dir':
+ case 'div': case 'dl': case 'fieldset': case 'listing':
+ case 'menu': case 'ol': case 'pre': case 'ul':
+ /* If the stack of open elements has an element in scope
+ with the same tag name as that of the token, then generate
+ implied end tags. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with
+ the same tag name as that of the token, then this
+ is a parse error. */
+ // w/e
+
+ /* If the stack of open elements has an element in
+ scope with the same tag name as that of the token,
+ then pop elements from this stack until an element
+ with that tag name has been popped from the stack. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
+
+ array_pop($this->stack);
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is "form" */
+ case 'form':
+ /* If the stack of open elements has an element in scope
+ with the same tag name as that of the token, then generate
+ implied end tags. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
+
+ }
+
+ if(end($this->stack)->nodeName !== $token['name']) {
+ /* Now, if the current node is not an element with the
+ same tag name as that of the token, then this is a parse
+ error. */
+ // w/e
+
+ } else {
+ /* Otherwise, if the current node is an element with
+ the same tag name as that of the token pop that element
+ from the stack. */
+ array_pop($this->stack);
+ }
+
+ /* In any case, set the form element pointer to null. */
+ $this->form_pointer = null;
+ break;
+
+ /* An end tag whose tag name is "p" */
+ case 'p':
+ /* If the stack of open elements has a p element in scope,
+ then generate implied end tags, except for p elements. */
+ if($this->elementInScope('p')) {
+ $this->generateImpliedEndTags(array('p'));
+
+ /* If the current node is not a p element, then this is
+ a parse error. */
+ // k
+
+ /* If the stack of open elements has a p element in
+ scope, then pop elements from this stack until the stack
+ no longer has a p element in scope. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->elementInScope('p')) {
+ array_pop($this->stack);
+
+ } else {
+ break;
+ }
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is "dd", "dt", or "li" */
+ case 'dd': case 'dt': case 'li':
+ /* If the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then
+ generate implied end tags, except for elements with the
+ same tag name as the token. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags(array($token['name']));
+
+ /* If the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
+ // w/e
+
+ /* If the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then
+ pop elements from this stack until an element with that
+ tag name has been popped from the stack. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
+
+ array_pop($this->stack);
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+ $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+
+ /* If the stack of open elements has in scope an element whose
+ tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+ generate implied end tags. */
+ if($this->elementInScope($elements)) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with the same
+ tag name as that of the token, then this is a parse error. */
+ // w/e
+
+ /* If the stack of open elements has in scope an element
+ whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then pop elements from the stack until an element
+ with one of those tag names has been popped from the stack. */
+ while($this->elementInScope($elements)) {
+ array_pop($this->stack);
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "a", "b", "big", "em",
+ "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+ case 'a': case 'b': case 'big': case 'em': case 'font':
+ case 'i': case 'nobr': case 's': case 'small': case 'strike':
+ case 'strong': case 'tt': case 'u':
+ /* 1. Let the formatting element be the last element in
+ the list of active formatting elements that:
+ * is between the end of the list and the last scope
+ marker in the list, if any, or the start of the list
+ otherwise, and
+ * has the same tag name as the token.
+ */
+ while(true) {
+ for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
+ if($this->a_formatting[$a] === self::MARKER) {
+ break;
+
+ } elseif($this->a_formatting[$a]->tagName === $token['name']) {
+ $formatting_element = $this->a_formatting[$a];
+ $in_stack = in_array($formatting_element, $this->stack, true);
+ $fe_af_pos = $a;
+ break;
+ }
+ }
+
+ /* If there is no such node, or, if that node is
+ also in the stack of open elements but the element
+ is not in scope, then this is a parse error. Abort
+ these steps. The token is ignored. */
+ if(!isset($formatting_element) || ($in_stack &&
+ !$this->elementInScope($token['name']))) {
+ break;
+
+ /* Otherwise, if there is such a node, but that node
+ is not in the stack of open elements, then this is a
+ parse error; remove the element from the list, and
+ abort these steps. */
+ } elseif(isset($formatting_element) && !$in_stack) {
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
+ }
+
+ /* 2. Let the furthest block be the topmost node in the
+ stack of open elements that is lower in the stack
+ than the formatting element, and is not an element in
+ the phrasing or formatting categories. There might
+ not be one. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ $length = count($this->stack);
+
+ for($s = $fe_s_pos + 1; $s < $length; $s++) {
+ $category = $this->getElementCategory($this->stack[$s]->nodeName);
+
+ if($category !== self::PHRASING && $category !== self::FORMATTING) {
+ $furthest_block = $this->stack[$s];
+ }
+ }
+
+ /* 3. If there is no furthest block, then the UA must
+ skip the subsequent steps and instead just pop all
+ the nodes from the bottom of the stack of open
+ elements, from the current node up to the formatting
+ element, and remove the formatting element from the
+ list of active formatting elements. */
+ if(!isset($furthest_block)) {
+ for($n = $length - 1; $n >= $fe_s_pos; $n--) {
+ array_pop($this->stack);
+ }
+
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
+ }
+
+ /* 4. Let the common ancestor be the element
+ immediately above the formatting element in the stack
+ of open elements. */
+ $common_ancestor = $this->stack[$fe_s_pos - 1];
+
+ /* 5. If the furthest block has a parent node, then
+ remove the furthest block from its parent node. */
+ if($furthest_block->parentNode !== null) {
+ $furthest_block->parentNode->removeChild($furthest_block);
+ }
+
+ /* 6. Let a bookmark note the position of the
+ formatting element in the list of active formatting
+ elements relative to the elements on either side
+ of it in the list. */
+ $bookmark = $fe_af_pos;
+
+ /* 7. Let node and last node be the furthest block.
+ Follow these steps: */
+ $node = $furthest_block;
+ $last_node = $furthest_block;
+
+ while(true) {
+ for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
+ /* 7.1 Let node be the element immediately
+ prior to node in the stack of open elements. */
+ $node = $this->stack[$n];
+
+ /* 7.2 If node is not in the list of active
+ formatting elements, then remove node from
+ the stack of open elements and then go back
+ to step 1. */
+ if(!in_array($node, $this->a_formatting, true)) {
+ unset($this->stack[$n]);
+ $this->stack = array_merge($this->stack);
+
+ } else {
+ break;
+ }
+ }
+
+ /* 7.3 Otherwise, if node is the formatting
+ element, then go to the next step in the overall
+ algorithm. */
+ if($node === $formatting_element) {
+ break;
+
+ /* 7.4 Otherwise, if last node is the furthest
+ block, then move the aforementioned bookmark to
+ be immediately after the node in the list of
+ active formatting elements. */
+ } elseif($last_node === $furthest_block) {
+ $bookmark = array_search($node, $this->a_formatting, true) + 1;
+ }
+
+ /* 7.5 If node has any children, perform a
+ shallow clone of node, replace the entry for
+ node in the list of active formatting elements
+ with an entry for the clone, replace the entry
+ for node in the stack of open elements with an
+ entry for the clone, and let node be the clone. */
+ if($node->hasChildNodes()) {
+ $clone = $node->cloneNode();
+ $s_pos = array_search($node, $this->stack, true);
+ $a_pos = array_search($node, $this->a_formatting, true);
+
+ $this->stack[$s_pos] = $clone;
+ $this->a_formatting[$a_pos] = $clone;
+ $node = $clone;
+ }
+
+ /* 7.6 Insert last node into node, first removing
+ it from its previous parent node if any. */
+ if($last_node->parentNode !== null) {
+ $last_node->parentNode->removeChild($last_node);
+ }
+
+ $node->appendChild($last_node);
+
+ /* 7.7 Let last node be node. */
+ $last_node = $node;
+ }
+
+ /* 8. Insert whatever last node ended up being in
+ the previous step into the common ancestor node,
+ first removing it from its previous parent node if
+ any. */
+ if($last_node->parentNode !== null) {
+ $last_node->parentNode->removeChild($last_node);
+ }
+
+ $common_ancestor->appendChild($last_node);
+
+ /* 9. Perform a shallow clone of the formatting
+ element. */
+ $clone = $formatting_element->cloneNode();
+
+ /* 10. Take all of the child nodes of the furthest
+ block and append them to the clone created in the
+ last step. */
+ while($furthest_block->hasChildNodes()) {
+ $child = $furthest_block->firstChild;
+ $furthest_block->removeChild($child);
+ $clone->appendChild($child);
+ }
+
+ /* 11. Append that clone to the furthest block. */
+ $furthest_block->appendChild($clone);
+
+ /* 12. Remove the formatting element from the list
+ of active formatting elements, and insert the clone
+ into the list of active formatting elements at the
+ position of the aforementioned bookmark. */
+ $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+
+ $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
+ $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
+ $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
+
+ /* 13. Remove the formatting element from the stack
+ of open elements, and insert the clone into the stack
+ of open elements immediately after (i.e. in a more
+ deeply nested position than) the position of the
+ furthest block in that stack. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ $fb_s_pos = array_search($furthest_block, $this->stack, true);
+ unset($this->stack[$fe_s_pos]);
+
+ $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
+ $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
+ $this->stack = array_merge($s_part1, array($clone), $s_part2);
+
+ /* 14. Jump back to step 1 in this series of steps. */
+ unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
+ }
+ break;
+
+ /* An end tag token whose tag name is one of: "button",
+ "marquee", "object" */
+ case 'button': case 'marquee': case 'object':
+ /* If the stack of open elements has an element in scope whose
+ tag name matches the tag name of the token, then generate implied
+ tags. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
+ // k
+
+ /* Now, if the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then pop
+ elements from the stack until that element has been popped from
+ the stack, and clear the list of active formatting elements up
+ to the last marker. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
+
+ array_pop($this->stack);
+ }
+
+ $marker = end(array_keys($this->a_formatting, self::MARKER, true));
+
+ for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
+ array_pop($this->a_formatting);
+ }
+ }
+ break;
+
+ /* Or an end tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "hr", "iframe", "image", "img",
+ "input", "isindex", "noembed", "noframes", "param", "select",
+ "spacer", "table", "textarea", "wbr" */
+ case 'area': case 'basefont': case 'bgsound': case 'br':
+ case 'embed': case 'hr': case 'iframe': case 'image':
+ case 'img': case 'input': case 'isindex': case 'noembed':
+ case 'noframes': case 'param': case 'select': case 'spacer':
+ case 'table': case 'textarea': case 'wbr':
+ // Parse error. Ignore the token.
+ break;
+
+ /* An end tag token not covered by the previous entries */
+ default:
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ /* Initialise node to be the current node (the bottommost
+ node of the stack). */
+ $node = end($this->stack);
+
+ /* If node has the same tag name as the end tag token,
+ then: */
+ if($token['name'] === $node->nodeName) {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* If the tag name of the end tag token does not
+ match the tag name of the current node, this is a
+ parse error. */
+ // k
+
+ /* Pop all the nodes from the current node up to
+ node, including node, then stop this algorithm. */
+ for($x = count($this->stack) - $n; $x >= $n; $x--) {
+ array_pop($this->stack);
+ }
+
+ } else {
+ $category = $this->getElementCategory($node);
+
+ if($category !== self::SPECIAL && $category !== self::SCOPING) {
+ /* Otherwise, if node is in neither the formatting
+ category nor the phrasing category, then this is a
+ parse error. Stop this algorithm. The end tag token
+ is ignored. */
+ return false;
+ }
+ }
+ }
+ break;
+ }
+ break;
+ }
+ }
+
+ private function inTable($token) {
+ $clear = array('html', 'table');
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $text = $this->dom->createTextNode($token['data']);
+ end($this->stack)->appendChild($text);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ end($this->stack)->appendChild($comment);
+
+ /* A start tag whose tag name is "caption" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'caption') {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ /* Insert an HTML element for the token, then switch the
+ insertion mode to "in caption". */
+ $this->insertElement($token);
+ $this->mode = self::IN_CAPTION;
+
+ /* A start tag whose tag name is "colgroup" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'colgroup') {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the
+ insertion mode to "in column group". */
+ $this->insertElement($token);
+ $this->mode = self::IN_CGROUP;
+
+ /* A start tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'col') {
+ $this->inTable(array(
+ 'name' => 'colgroup',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ $this->inColumnGroup($token);
+
+ /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('tbody', 'tfoot', 'thead'))) {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the insertion
+ mode to "in table body". */
+ $this->insertElement($token);
+ $this->mode = self::IN_TBODY;
+
+ /* A start tag whose tag name is one of: "td", "th", "tr" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ in_array($token['name'], array('td', 'th', 'tr'))) {
+ /* Act as if a start tag token with the tag name "tbody" had been
+ seen, then reprocess the current token. */
+ $this->inTable(array(
+ 'name' => 'tbody',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ return $this->inTableBody($token);
+
+ /* A start tag whose tag name is "table" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'table') {
+ /* Parse error. Act as if an end tag token with the tag name "table"
+ had been seen, then, if that token wasn't ignored, reprocess the
+ current token. */
+ $this->inTable(array(
+ 'name' => 'table',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->mainPhase($token);
+
+ /* An end tag whose tag name is "table" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'table') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (innerHTML case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ return false;
+
+ /* Otherwise: */
+ } else {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not a table element, then this
+ is a parse error. */
+ // w/e
+
+ /* Pop elements from this stack until a table element has been
+ popped from the stack. */
+ while(true) {
+ $current = end($this->stack)->nodeName;
+ array_pop($this->stack);
+
+ if($current === 'table') {
+ break;
+ }
+ }
+
+ /* Reset the insertion mode appropriately. */
+ $this->resetInsertionMode();
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
+ 'tfoot', 'th', 'thead', 'tr'))) {
+ // Parse error. Ignore the token.
+
+ /* Anything else */
+ } else {
+ /* Parse error. Process the token as if the insertion mode was "in
+ body", with the following exception: */
+
+ /* If the current node is a table, tbody, tfoot, thead, or tr
+ element, then, whenever a node would be inserted into the current
+ node, it must instead be inserted into the foster parent element. */
+ if(in_array(end($this->stack)->nodeName,
+ array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* The foster parent element is the parent element of the last
+ table element in the stack of open elements, if there is a
+ table element and it has such a parent element. If there is no
+ table element in the stack of open elements (innerHTML case),
+ then the foster parent element is the first element in the
+ stack of open elements (the html element). Otherwise, if there
+ is a table element in the stack of open elements, but the last
+ table element in the stack of open elements has no parent, or
+ its parent node is not an element, then the foster parent
+ element is the element before the last table element in the
+ stack of open elements. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->nodeName === 'table') {
+ $table = $this->stack[$n];
+ break;
+ }
+ }
+
+ if(isset($table) && $table->parentNode !== null) {
+ $this->foster_parent = $table->parentNode;
+
+ } elseif(!isset($table)) {
+ $this->foster_parent = $this->stack[0];
+
+ } elseif(isset($table) && ($table->parentNode === null ||
+ $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
+ $this->foster_parent = $this->stack[$n - 1];
+ }
+ }
+
+ $this->inBody($token);
+ }
+ }
+
+ private function inCaption($token) {
+ /* An end tag whose tag name is "caption" */
+ if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (innerHTML case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore
+
+ /* Otherwise: */
+ } else {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not a caption element, then this
+ is a parse error. */
+ // w/e
+
+ /* Pop elements from this stack until a caption element has
+ been popped from the stack. */
+ while(true) {
+ $node = end($this->stack)->nodeName;
+ array_pop($this->stack);
+
+ if($node === 'caption') {
+ break;
+ }
+ }
+
+ /* Clear the list of active formatting elements up to the last
+ marker. */
+ $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+ /* Switch the insertion mode to "in table". */
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
+ name is "table" */
+ } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'table')) {
+ /* Parse error. Act as if an end tag with the tag name "caption"
+ had been seen, then, if that token wasn't ignored, reprocess the
+ current token. */
+ $this->inCaption(array(
+ 'name' => 'caption',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->inTable($token);
+
+ /* An end tag whose tag name is one of: "body", "col", "colgroup",
+ "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
+ 'thead', 'tr'))) {
+ // Parse error. Ignore the token.
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in body". */
+ $this->inBody($token);
+ }
+ }
+
+ private function inColumnGroup($token) {
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $text = $this->dom->createTextNode($token['data']);
+ end($this->stack)->appendChild($text);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ end($this->stack)->appendChild($comment);
+
+ /* A start tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
+ /* Insert a col element for the token. Immediately pop the current
+ node off the stack of open elements. */
+ $this->insertElement($token);
+ array_pop($this->stack);
+
+ /* An end tag whose tag name is "colgroup" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'colgroup') {
+ /* If the current node is the root html element, then this is a
+ parse error, ignore the token. (innerHTML case) */
+ if(end($this->stack)->nodeName === 'html') {
+ // Ignore
+
+ /* Otherwise, pop the current node (which will be a colgroup
+ element) from the stack of open elements. Switch the insertion
+ mode to "in table". */
+ } else {
+ array_pop($this->stack);
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* An end tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
+ /* Parse error. Ignore the token. */
+
+ /* Anything else */
+ } else {
+ /* Act as if an end tag with the tag name "colgroup" had been seen,
+ and then, if that token wasn't ignored, reprocess the current token. */
+ $this->inColumnGroup(array(
+ 'name' => 'colgroup',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->inTable($token);
+ }
+ }
+
+ private function inTableBody($token) {
+ $clear = array('tbody', 'tfoot', 'thead', 'html');
+
+ /* A start tag whose tag name is "tr" */
+ if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert a tr element for the token, then switch the insertion
+ mode to "in row". */
+ $this->insertElement($token);
+ $this->mode = self::IN_ROW;
+
+ /* A start tag whose tag name is one of: "th", "td" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')) {
+ /* Parse error. Act as if a start tag with the tag name "tr" had
+ been seen, then reprocess the current token. */
+ $this->inTableBody(array(
+ 'name' => 'tr',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ ));
+
+ return $this->inRow($token);
+
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Pop the current node from the stack of open elements. Switch
+ the insertion mode to "in table". */
+ array_pop($this->stack);
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
+ } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
+ ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
+ /* If the stack of open elements does not have a tbody, thead, or
+ tfoot element in table scope, this is a parse error. Ignore the
+ token. (innerHTML case) */
+ if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Act as if an end tag with the same tag name as the current
+ node ("tbody", "tfoot", or "thead") had been seen, then
+ reprocess the current token. */
+ $this->inTableBody(array(
+ 'name' => end($this->stack)->nodeName,
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->mainPhase($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+ /* Parse error. Ignore the token. */
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in table". */
+ $this->inTable($token);
+ }
+ }
+
+ private function inRow($token) {
+ $clear = array('tr', 'html');
+
+ /* A start tag whose tag name is one of: "th", "td" */
+ if($token['type'] === HTML5::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')) {
+ /* Clear the stack back to a table row context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the insertion
+ mode to "in cell". */
+ $this->insertElement($token);
+ $this->mode = self::IN_CELL;
+
+ /* Insert a marker at the end of the list of active formatting
+ elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ /* An end tag whose tag name is "tr" */
+ } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (innerHTML case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table row context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Pop the current node (which will be a tr element) from the
+ stack of open elements. Switch the insertion mode to "in table
+ body". */
+ array_pop($this->stack);
+ $this->mode = self::IN_TBODY;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* Act as if an end tag with the tag name "tr" had been seen, then,
+ if that token wasn't ignored, reprocess the current token. */
+ $this->inRow(array(
+ 'name' => 'tr',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->inCell($token);
+
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* Otherwise, act as if an end tag with the tag name "tr" had
+ been seen, then reprocess the current token. */
+ $this->inRow(array(
+ 'name' => 'tr',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ return $this->inCell($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+ /* Parse error. Ignore the token. */
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in table". */
+ $this->inTable($token);
+ }
+ }
+
+ private function inCell($token) {
+ /* An end tag whose tag name is one of: "td", "th" */
+ if($token['type'] === HTML5::ENDTAG &&
+ ($token['name'] === 'td' || $token['name'] === 'th')) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as that of the token, then this is a
+ parse error and the token must be ignored. */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* Generate implied end tags, except for elements with the same
+ tag name as the token. */
+ $this->generateImpliedEndTags(array($token['name']));
+
+ /* Now, if the current node is not an element with the same tag
+ name as the token, then this is a parse error. */
+ // k
+
+ /* Pop elements from this stack until an element with the same
+ tag name as the token has been popped from the stack. */
+ while(true) {
+ $node = end($this->stack)->nodeName;
+ array_pop($this->stack);
+
+ if($node === $token['name']) {
+ break;
+ }
+ }
+
+ /* Clear the list of active formatting elements up to the last
+ marker. */
+ $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+ /* Switch the insertion mode to "in row". (The current node
+ will be a tr element at this point.) */
+ $this->mode = self::IN_ROW;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'))) {
+ /* If the stack of open elements does not have a td or th element
+ in table scope, then this is a parse error; ignore the token.
+ (innerHTML case) */
+ if(!$this->elementInScope(array('td', 'th'), true)) {
+ // Ignore.
+
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
+ } else {
+ $this->closeCell();
+ return $this->inRow($token);
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'))) {
+ /* If the stack of open elements does not have a td or th element
+ in table scope, then this is a parse error; ignore the token.
+ (innerHTML case) */
+ if(!$this->elementInScope(array('td', 'th'), true)) {
+ // Ignore.
+
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
+ } else {
+ $this->closeCell();
+ return $this->inRow($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html'))) {
+ /* Parse error. Ignore the token. */
+
+ /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
+ "thead", "tr" */
+ } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+ array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as that of the token (which can only
+ happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
+ then this is a parse error and the token must be ignored. */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore.
+
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
+ } else {
+ $this->closeCell();
+ return $this->inRow($token);
+ }
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in body". */
+ $this->inBody($token);
+ }
+ }
+
+ private function inSelect($token) {
+ /* Handle the token as follows: */
+
+ /* A character token */
+ if($token['type'] === HTML5::CHARACTR) {
+ /* Append the token's character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A start tag token whose tag name is "option" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'option') {
+ /* If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen. */
+ if(end($this->stack)->nodeName === 'option') {
+ $this->inSelect(array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* A start tag token whose tag name is "optgroup" */
+ } elseif($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'optgroup') {
+ /* If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen. */
+ if(end($this->stack)->nodeName === 'option') {
+ $this->inSelect(array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* If the current node is an optgroup element, act as if an end tag
+ with the tag name "optgroup" had been seen. */
+ if(end($this->stack)->nodeName === 'optgroup') {
+ $this->inSelect(array(
+ 'name' => 'optgroup',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* An end tag token whose tag name is "optgroup" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'optgroup') {
+ /* First, if the current node is an option element, and the node
+ immediately before it in the stack of open elements is an optgroup
+ element, then act as if an end tag with the tag name "option" had
+ been seen. */
+ $elements_in_stack = count($this->stack);
+
+ if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
+ $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
+ $this->inSelect(array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ ));
+ }
+
+ /* If the current node is an optgroup element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse error,
+ ignore the token. */
+ if($this->stack[$elements_in_stack - 1] === 'optgroup') {
+ array_pop($this->stack);
+ }
+
+ /* An end tag token whose tag name is "option" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'option') {
+ /* If the current node is an option element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse error,
+ ignore the token. */
+ if(end($this->stack)->nodeName === 'option') {
+ array_pop($this->stack);
+ }
+
+ /* An end tag whose tag name is "select" */
+ } elseif($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'select') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (innerHTML case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ // w/e
+
+ /* Otherwise: */
+ } else {
+ /* Pop elements from the stack of open elements until a select
+ element has been popped from the stack. */
+ while(true) {
+ $current = end($this->stack)->nodeName;
+ array_pop($this->stack);
+
+ if($current === 'select') {
+ break;
+ }
+ }
+
+ /* Reset the insertion mode appropriately. */
+ $this->resetInsertionMode();
+ }
+
+ /* A start tag whose tag name is "select" */
+ } elseif($token['name'] === 'select' &&
+ $token['type'] === HTML5::STARTTAG) {
+ /* Parse error. Act as if the token had been an end tag with the
+ tag name "select" instead. */
+ $this->inSelect(array(
+ 'name' => 'select',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ /* An end tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th" */
+ } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
+ 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
+ /* Parse error. */
+ // w/e
+
+ /* If the stack of open elements has an element in table scope with
+ the same tag name as that of the token, then act as if an end tag
+ with the tag name "select" had been seen, and reprocess the token.
+ Otherwise, ignore the token. */
+ if($this->elementInScope($token['name'], true)) {
+ $this->inSelect(array(
+ 'name' => 'select',
+ 'type' => HTML5::ENDTAG
+ ));
+
+ $this->mainPhase($token);
+ }
+
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ }
+ }
+
+ private function afterBody($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Process the token as it would be processed if the insertion mode
+ was "in body". */
+ $this->inBody($token);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the first element in the stack of open
+ elements (the html element), with the data attribute set to the
+ data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->stack[0]->appendChild($comment);
+
+ /* An end tag with the tag name "html" */
+ } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
+ /* If the parser was originally created in order to handle the
+ setting of an element's innerHTML attribute, this is a parse error;
+ ignore the token. (The element will be an html element in this
+ case.) (innerHTML case) */
+
+ /* Otherwise, switch to the trailing end phase. */
+ $this->phase = self::END_PHASE;
+
+ /* Anything else */
+ } else {
+ /* Parse error. Set the insertion mode to "in body" and reprocess
+ the token. */
+ $this->mode = self::IN_BODY;
+ return $this->inBody($token);
+ }
+ }
+
+ private function inFrameset($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A start tag with the tag name "frameset" */
+ } elseif($token['name'] === 'frameset' &&
+ $token['type'] === HTML5::STARTTAG) {
+ $this->insertElement($token);
+
+ /* An end tag with the tag name "frameset" */
+ } elseif($token['name'] === 'frameset' &&
+ $token['type'] === HTML5::ENDTAG) {
+ /* If the current node is the root html element, then this is a
+ parse error; ignore the token. (innerHTML case) */
+ if(end($this->stack)->nodeName === 'html') {
+ // Ignore
+
+ } else {
+ /* Otherwise, pop the current node from the stack of open
+ elements. */
+ array_pop($this->stack);
+
+ /* If the parser was not originally created in order to handle
+ the setting of an element's innerHTML attribute (innerHTML case),
+ and the current node is no longer a frameset element, then change
+ the insertion mode to "after frameset". */
+ $this->mode = self::AFTR_FRAME;
+ }
+
+ /* A start tag with the tag name "frame" */
+ } elseif($token['name'] === 'frame' &&
+ $token['type'] === HTML5::STARTTAG) {
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+
+ /* A start tag with the tag name "noframes" */
+ } elseif($token['name'] === 'noframes' &&
+ $token['type'] === HTML5::STARTTAG) {
+ /* Process the token as if the insertion mode had been "in body". */
+ $this->inBody($token);
+
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ }
+ }
+
+ private function afterFrameset($token) {
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+ if($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* An end tag with the tag name "html" */
+ } elseif($token['name'] === 'html' &&
+ $token['type'] === HTML5::ENDTAG) {
+ /* Switch to the trailing end phase. */
+ $this->phase = self::END_PHASE;
+
+ /* A start tag with the tag name "noframes" */
+ } elseif($token['name'] === 'noframes' &&
+ $token['type'] === HTML5::STARTTAG) {
+ /* Process the token as if the insertion mode had been "in body". */
+ $this->inBody($token);
+
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ }
+ }
+
+ private function trailingEndPhase($token) {
+ /* After the main phase, as each token is emitted from the tokenisation
+ stage, it must be processed as described in this section. */
+
+ /* A DOCTYPE token */
+ if($token['type'] === HTML5::DOCTYPE) {
+ // Parse error. Ignore the token.
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5::COMMENT) {
+ /* Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->dom->appendChild($comment);
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* Process the token as it would be processed in the main phase. */
+ $this->mainPhase($token);
+
+ /* A character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE. Or a start tag token. Or an end tag token. */
+ } elseif(($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+ $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
+ /* Parse error. Switch back to the main phase and reprocess the
+ token. */
+ $this->phase = self::MAIN_PHASE;
+ return $this->mainPhase($token);
+
+ /* An end-of-file token */
+ } elseif($token['type'] === HTML5::EOF) {
+ /* OMG DONE!! */
+ }
+ }
+
+ private function insertElement($token, $append = true) {
+ $el = $this->dom->createElement($token['name']);
+
+ foreach($token['attr'] as $attr) {
+ if(!$el->hasAttribute($attr['name'])) {
+ $el->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+
+ $this->appendToRealParent($el);
+ $this->stack[] = $el;
+
+ return $el;
+ }
+
+ private function insertText($data) {
+ $text = $this->dom->createTextNode($data);
+ $this->appendToRealParent($text);
+ }
+
+ private function insertComment($data) {
+ $comment = $this->dom->createComment($data);
+ $this->appendToRealParent($comment);
+ }
+
+ private function appendToRealParent($node) {
+ if($this->foster_parent === null) {
+ end($this->stack)->appendChild($node);
+
+ } elseif($this->foster_parent !== null) {
+ /* If the foster parent element is the parent element of the
+ last table element in the stack of open elements, then the new
+ node must be inserted immediately before the last table element
+ in the stack of open elements in the foster parent element;
+ otherwise, the new node must be appended to the foster parent
+ element. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->nodeName === 'table' &&
+ $this->stack[$n]->parentNode !== null) {
+ $table = $this->stack[$n];
+ break;
+ }
+ }
+
+ if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
+ $this->foster_parent->insertBefore($node, $table);
+ else
+ $this->foster_parent->appendChild($node);
+
+ $this->foster_parent = null;
+ }
+ }
+
+ private function elementInScope($el, $table = false) {
+ if(is_array($el)) {
+ foreach($el as $element) {
+ if($this->elementInScope($element, $table)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ $leng = count($this->stack);
+
+ for($n = 0; $n < $leng; $n++) {
+ /* 1. Initialise node to be the current node (the bottommost node of
+ the stack). */
+ $node = $this->stack[$leng - 1 - $n];
+
+ if($node->tagName === $el) {
+ /* 2. If node is the target node, terminate in a match state. */
+ return true;
+
+ } elseif($node->tagName === 'table') {
+ /* 3. Otherwise, if node is a table element, terminate in a failure
+ state. */
+ return false;
+
+ } elseif($table === true && in_array($node->tagName, array('caption', 'td',
+ 'th', 'button', 'marquee', 'object'))) {
+ /* 4. Otherwise, if the algorithm is the "has an element in scope"
+ variant (rather than the "has an element in table scope" variant),
+ and node is one of the following, terminate in a failure state. */
+ return false;
+
+ } elseif($node === $node->ownerDocument->documentElement) {
+ /* 5. Otherwise, if node is an html element (root element), terminate
+ in a failure state. (This can only happen if the node is the topmost
+ node of the stack of open elements, and prevents the next step from
+ being invoked if there are no more elements in the stack.) */
+ return false;
+ }
+
+ /* Otherwise, set node to the previous entry in the stack of open
+ elements and return to step 2. (This will never fail, since the loop
+ will always terminate in the previous step if the top of the stack
+ is reached.) */
+ }
+ }
+
+ private function reconstructActiveFormattingElements() {
+ /* 1. If there are no entries in the list of active formatting elements,
+ then there is nothing to reconstruct; stop this algorithm. */
+ $formatting_elements = count($this->a_formatting);
+
+ if($formatting_elements === 0) {
+ return false;
+ }
+
+ /* 3. Let entry be the last (most recently added) element in the list
+ of active formatting elements. */
+ $entry = end($this->a_formatting);
+
+ /* 2. If the last (most recently added) entry in the list of active
+ formatting elements is a marker, or if it is an element that is in the
+ stack of open elements, then there is nothing to reconstruct; stop this
+ algorithm. */
+ if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ return false;
+ }
+
+ for($a = $formatting_elements - 1; $a >= 0; true) {
+ /* 4. If there are no entries before entry in the list of active
+ formatting elements, then jump to step 8. */
+ if($a === 0) {
+ $step_seven = false;
+ break;
+ }
+
+ /* 5. Let entry be the entry one earlier than entry in the list of
+ active formatting elements. */
+ $a--;
+ $entry = $this->a_formatting[$a];
+
+ /* 6. If entry is neither a marker nor an element that is also in
+ thetack of open elements, go to step 4. */
+ if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ break;
+ }
+ }
+
+ while(true) {
+ /* 7. Let entry be the element one later than entry in the list of
+ active formatting elements. */
+ if(isset($step_seven) && $step_seven === true) {
+ $a++;
+ $entry = $this->a_formatting[$a];
+ }
+
+ /* 8. Perform a shallow clone of the element entry to obtain clone. */
+ $clone = $entry->cloneNode();
+
+ /* 9. Append clone to the current node and push it onto the stack
+ of open elements so that it is the new current node. */
+ end($this->stack)->appendChild($clone);
+ $this->stack[] = $clone;
+
+ /* 10. Replace the entry for entry in the list with an entry for
+ clone. */
+ $this->a_formatting[$a] = $clone;
+
+ /* 11. If the entry for clone in the list of active formatting
+ elements is not the last entry in the list, return to step 7. */
+ if(end($this->a_formatting) !== $clone) {
+ $step_seven = true;
+ } else {
+ break;
+ }
+ }
+ }
+
+ private function clearTheActiveFormattingElementsUpToTheLastMarker() {
+ /* When the steps below require the UA to clear the list of active
+ formatting elements up to the last marker, the UA must perform the
+ following steps: */
+
+ while(true) {
+ /* 1. Let entry be the last (most recently added) entry in the list
+ of active formatting elements. */
+ $entry = end($this->a_formatting);
+
+ /* 2. Remove entry from the list of active formatting elements. */
+ array_pop($this->a_formatting);
+
+ /* 3. If entry was a marker, then stop the algorithm at this point.
+ The list has been cleared up to the last marker. */
+ if($entry === self::MARKER) {
+ break;
+ }
+ }
+ }
+
+ private function generateImpliedEndTags(array $exclude = array()) {
+ /* When the steps below require the UA to generate implied end tags,
+ then, if the current node is a dd element, a dt element, an li element,
+ a p element, a td element, a th element, or a tr element, the UA must
+ act as if an end tag with the respective tag name had been seen and
+ then generate implied end tags again. */
+ $node = end($this->stack);
+ $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
+
+ while(in_array(end($this->stack)->nodeName, $elements)) {
+ array_pop($this->stack);
+ }
+ }
+
+ private function getElementCategory($name) {
+ if(in_array($name, $this->special))
+ return self::SPECIAL;
+
+ elseif(in_array($name, $this->scoping))
+ return self::SCOPING;
+
+ elseif(in_array($name, $this->formatting))
+ return self::FORMATTING;
+
+ else
+ return self::PHRASING;
+ }
+
+ private function clearStackToTableContext($elements) {
+ /* When the steps above require the UA to clear the stack back to a
+ table context, it means that the UA must, while the current node is not
+ a table element or an html element, pop elements from the stack of open
+ elements. If this causes any elements to be popped from the stack, then
+ this is a parse error. */
+ while(true) {
+ $node = end($this->stack)->nodeName;
+
+ if(in_array($node, $elements)) {
+ break;
+ } else {
+ array_pop($this->stack);
+ }
+ }
+ }
+
+ private function resetInsertionMode() {
+ /* 1. Let last be false. */
+ $last = false;
+ $leng = count($this->stack);
+
+ for($n = $leng - 1; $n >= 0; $n--) {
+ /* 2. Let node be the last node in the stack of open elements. */
+ $node = $this->stack[$n];
+
+ /* 3. If node is the first node in the stack of open elements, then
+ set last to true. If the element whose innerHTML attribute is being
+ set is neither a td element nor a th element, then set node to the
+ element whose innerHTML attribute is being set. (innerHTML case) */
+ if($this->stack[0]->isSameNode($node)) {
+ $last = true;
+ }
+
+ /* 4. If node is a select element, then switch the insertion mode to
+ "in select" and abort these steps. (innerHTML case) */
+ if($node->nodeName === 'select') {
+ $this->mode = self::IN_SELECT;
+ break;
+
+ /* 5. If node is a td or th element, then switch the insertion mode
+ to "in cell" and abort these steps. */
+ } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
+ $this->mode = self::IN_CELL;
+ break;
+
+ /* 6. If node is a tr element, then switch the insertion mode to
+ "in row" and abort these steps. */
+ } elseif($node->nodeName === 'tr') {
+ $this->mode = self::IN_ROW;
+ break;
+
+ /* 7. If node is a tbody, thead, or tfoot element, then switch the
+ insertion mode to "in table body" and abort these steps. */
+ } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
+ $this->mode = self::IN_TBODY;
+ break;
+
+ /* 8. If node is a caption element, then switch the insertion mode
+ to "in caption" and abort these steps. */
+ } elseif($node->nodeName === 'caption') {
+ $this->mode = self::IN_CAPTION;
+ break;
+
+ /* 9. If node is a colgroup element, then switch the insertion mode
+ to "in column group" and abort these steps. (innerHTML case) */
+ } elseif($node->nodeName === 'colgroup') {
+ $this->mode = self::IN_CGROUP;
+ break;
+
+ /* 10. If node is a table element, then switch the insertion mode
+ to "in table" and abort these steps. */
+ } elseif($node->nodeName === 'table') {
+ $this->mode = self::IN_TABLE;
+ break;
+
+ /* 11. If node is a head element, then switch the insertion mode
+ to "in body" ("in body"! not "in head"!) and abort these steps.
+ (innerHTML case) */
+ } elseif($node->nodeName === 'head') {
+ $this->mode = self::IN_BODY;
+ break;
+
+ /* 12. If node is a body element, then switch the insertion mode to
+ "in body" and abort these steps. */
+ } elseif($node->nodeName === 'body') {
+ $this->mode = self::IN_BODY;
+ break;
+
+ /* 13. If node is a frameset element, then switch the insertion
+ mode to "in frameset" and abort these steps. (innerHTML case) */
+ } elseif($node->nodeName === 'frameset') {
+ $this->mode = self::IN_FRAME;
+ break;
+
+ /* 14. If node is an html element, then: if the head element
+ pointer is null, switch the insertion mode to "before head",
+ otherwise, switch the insertion mode to "after head". In either
+ case, abort these steps. (innerHTML case) */
+ } elseif($node->nodeName === 'html') {
+ $this->mode = ($this->head_pointer === null)
+ ? self::BEFOR_HEAD
+ : self::AFTER_HEAD;
+
+ break;
+
+ /* 15. If last is true, then set the insertion mode to "in body"
+ and abort these steps. (innerHTML case) */
+ } elseif($last) {
+ $this->mode = self::IN_BODY;
+ break;
+ }
+ }
+ }
+
+ private function closeCell() {
+ /* If the stack of open elements has a td or th element in table scope,
+ then act as if an end tag token with that tag name had been seen. */
+ foreach(array('td', 'th') as $cell) {
+ if($this->elementInScope($cell, true)) {
+ $this->inCell(array(
+ 'name' => $cell,
+ 'type' => HTML5::ENDTAG
+ ));
+
+ break;
+ }
+ }
+ }
+
+ public function save() {
+ return $this->dom;
+ }
+}
+?>
diff --git a/maintenance/flush-definition-cache.php b/maintenance/flush-definition-cache.php
index 6d51ab06..93af3b75 100755
--- a/maintenance/flush-definition-cache.php
+++ b/maintenance/flush-definition-cache.php
@@ -32,5 +32,5 @@ foreach ($names as $name) {
$cache->flush($config);
}
-echo 'Cache flushed successfully.';
+echo "Cache flushed successfully.\n";
diff --git a/maintenance/generate-ph5p-patch.php b/maintenance/generate-ph5p-patch.php
new file mode 100644
index 00000000..ecd9fa3f
--- /dev/null
+++ b/maintenance/generate-ph5p-patch.php
@@ -0,0 +1,13 @@
+ PH5P.patch");
+unlink($newt);
diff --git a/plugins/phorum/htmlpurifier.php b/plugins/phorum/htmlpurifier.php
index 4654c65d..ae2c276c 100644
--- a/plugins/phorum/htmlpurifier.php
+++ b/plugins/phorum/htmlpurifier.php
@@ -261,12 +261,42 @@ function phorum_htmlpurifier_editor_after_subject() {
// don't show this message if it's a WYSIWYG editor, since it will
// then be handled automatically
if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
- ?>
- HTML input is on. Make sure you escape all HTML and
- angled-brackets with < and > (you can also use CDATA
- tags, simply wrap the suspect text with
-<![CDATA[text]]>. Paragraphs will only be applied to
-double-spaces; single-spaces will not generate <br> tags.
+ ?> |
+
+ HTML input is enabled. Make sure you escape all HTML and
+ angled brackets with < and > .
+ config;
+ if ($config->get('AutoFormat', 'AutoParagraph')) {
+ ?>
+ Auto-paragraphing is enabled. Double
+ newlines will be converted to paragraphs; for single
+ newlines, use the pre tag.
+ getDefinition('HTML');
+ $allowed = array();
+ foreach ($html_definition->info as $name => $x) $allowed[] = "$name ";
+ sort($allowed);
+ $allowed_text = implode(', ', $allowed);
+ ?>Allowed tags: .
+
+
+ For inputting literal code such as HTML and PHP for display, use
+ CDATA tags to auto-escape your angled brackets, and pre
+ to preserve newlines:
+
+ <pre><![CDATA[
+Place code here
+]]></pre>
+
+ Power users, you can hide this notice with:
+ .htmlpurifier-help {display:none;}
+
|
$top_id) { // test for end condition
diff --git a/tests/HTMLPurifier/ChildDef/OptionalTest.php b/tests/HTMLPurifier/ChildDef/OptionalTest.php
index 154353df..40dc17ee 100644
--- a/tests/HTMLPurifier/ChildDef/OptionalTest.php
+++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php
@@ -19,5 +19,9 @@ class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
$this->assertResult('Not allowed text', '');
}
+ function testEmpty() {
+ $this->assertResult('');
+ }
+
}
diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
index 256d3a34..e55f96e5 100644
--- a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
+++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
@@ -74,10 +74,11 @@ extends HTMLPurifier_ChildDefHarness
}
function testError() {
- $this->expectError('Cannot use non-block element as block wrapper');
+ // $this->expectError('Cannot use non-block element as block wrapper');
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
$this->config->set('HTML', 'BlockWrapper', 'dav');
$this->assertResult('Needs wrap', 'Needs wrap
');
+ $this->swallowErrors();
}
}
diff --git a/tests/HTMLPurifier/ErrorCollectorEMock.php b/tests/HTMLPurifier/ErrorCollectorEMock.php
index c0f577d5..d3461331 100644
--- a/tests/HTMLPurifier/ErrorCollectorEMock.php
+++ b/tests/HTMLPurifier/ErrorCollectorEMock.php
@@ -27,11 +27,20 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
function send($severity, $msg) {
// test for context
- $test = &$this->_getCurrentTestCase();
+ $context =& SimpleTest::getContext();
+ $test =& $context->getTest();
+
+ // compat
+ if (empty($this->_mock)) {
+ $mock =& $this;
+ } else {
+ $mock =& $this->_mock;
+ }
+
foreach ($this->_expected_context as $key => $value) {
$test->assertEqual($value, $this->_context->get($key));
}
- $step = $this->getCallCount('send');
+ $step = $mock->getCallCount('send');
if (isset($this->_expected_context_at[$step])) {
foreach ($this->_expected_context_at[$step] as $key => $value) {
$test->assertEqual($value, $this->_context->get($key));
@@ -39,7 +48,7 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
}
// boilerplate mock code, does not have return value or references
$args = func_get_args();
- $this->_invoke('send', $args);
+ $mock->_invoke('send', $args);
}
}
diff --git a/tests/HTMLPurifier/ErrorsHarness.php b/tests/HTMLPurifier/ErrorsHarness.php
index 67f7c6b3..9ab204e7 100644
--- a/tests/HTMLPurifier/ErrorsHarness.php
+++ b/tests/HTMLPurifier/ErrorsHarness.php
@@ -3,11 +3,15 @@
require_once 'HTMLPurifier/ErrorCollectorEMock.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php';
+/**
+ * @todo Make the callCount variable actually work, so we can precisely
+ * specify what errors we want: no more, no less
+ */
class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
{
var $config, $context;
- var $collector, $generator;
+ var $collector, $generator, $callCount;
function setup() {
$this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true));
@@ -16,6 +20,11 @@ class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
$this->collector = new HTMLPurifier_ErrorCollectorEMock();
$this->collector->prepare($this->context);
$this->context->register('ErrorCollector', $this->collector);
+ $this->callCount = 0;
+ }
+
+ function expectNoErrorCollection() {
+ $this->collector->expectNever('send');
}
function expectErrorCollection() {
diff --git a/tests/HTMLPurifier/IDAccumulatorTest.php b/tests/HTMLPurifier/IDAccumulatorTest.php
index 006d689c..c6249eca 100644
--- a/tests/HTMLPurifier/IDAccumulatorTest.php
+++ b/tests/HTMLPurifier/IDAccumulatorTest.php
@@ -30,5 +30,11 @@ class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness
}
+ function testBuild() {
+ $this->config->set('Attr', 'IDBlacklist', array('foo'));
+ $accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context);
+ $this->assertTrue( isset($accumulator->ids['foo']) );
+ }
+
}
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 23743dff..5c726a11 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -194,10 +194,7 @@ Bar
',
}
function testNoParagraphSingleInlineNodeInBlockNode() {
- $this->assertResult(
-'Foo
',
- 'Foo
'
- );
+ $this->assertResult( 'Foo
' );
}
function testParagraphInBlockquote() {
@@ -277,9 +274,7 @@ Par1
function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() {
$this->assertResult(
'',
-''
+Par2
'
);
}
@@ -351,6 +346,30 @@ Par2'
);
}
+ function testInlineAndBlockTagInDivNoParagraph() {
+ $this->assertResult(
+ ''
+ );
+ }
+
+ function testInlineAndBlockTagInDivNeedingParagraph() {
+ $this->assertResult(
+'',
+''
+ );
+ }
+
+ function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() {
+ $this->assertResult(
+'',
+''
+ );
+ }
+
function testErrorNeeded() {
$this->config->set('HTML', 'Allowed', 'b');
$this->expectError('Cannot enable AutoParagraph injector because p is not allowed');
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index e67a3e44..bf4c374d 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -109,8 +109,9 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
function testInvalidParentError() {
// test fallback to div
$this->config->set('HTML', 'Parent', 'obviously-impossible');
- $this->expectError('Cannot use unrecognized element as parent');
+ // $this->expectError('Cannot use unrecognized element as parent');
$this->assertResult('Accept
');
+ $this->swallowErrors();
}
function testCascadingRemovalOfNodesMissingRequiredChildren() {
@@ -129,5 +130,10 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('', '');
}
+ function testStrictBlockquoteInHTML401() {
+ $this->config->set('HTML', 'Doctype', 'HTML 4.01 Strict');
+ $this->assertResult('text
', 'text
');
+ }
+
}
diff --git a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
index 4ba30f36..db5b989f 100644
--- a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
@@ -28,6 +28,11 @@ class HTMLPurifier_Strategy_FixNesting_ErrorsTest extends HTMLPurifier_Strategy_
$this->invoke("ValidInvalid
");
}
+ function testNoNodeReorganizedForEmptyNode() {
+ $this->expectNoErrorCollection();
+ $this->invoke("");
+ }
+
function testNodeContentsRemoved() {
$this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1));
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
index e8e6c797..0249d5ef 100644
--- a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
@@ -11,6 +11,19 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
$this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
$this->config->set('AutoFormat', 'AutoParagraph', true);
$this->config->set('AutoFormat', 'Linkify', true);
+ generate_mock_once('HTMLPurifier_Injector');
+ }
+
+ function testEndNotification() {
+ $mock = new HTMLPurifier_InjectorMock();
+ $mock->skip = false;
+ $mock->expectAt(0, 'notifyEnd', array(new HTMLPurifier_Token_End('b')));
+ $mock->expectAt(1, 'notifyEnd', array(new HTMLPurifier_Token_End('i')));
+ $mock->expectCallCount('notifyEnd', 2);
+ $this->config->set('AutoFormat', 'AutoParagraph', false);
+ $this->config->set('AutoFormat', 'Linkify', false);
+ $this->config->set('AutoFormat', 'Custom', array($mock));
+ $this->assertResult('asdf', 'asdf');
}
function testOnlyAutoParagraph() {
@@ -62,4 +75,11 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
);
}
+ function testParagraphAfterLinkifiedURL() {
+ $this->assertResult(
+ "http://google.com\n\nb",
+ "http://google.com
b
"
+ );
+ }
+
}
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
index 19a37b24..4b397b09 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
@@ -82,5 +82,14 @@ alert(<b>bold</b>);
);
}
+ function testRequiredAttributesTestNotPerformedOnEndTag() {
+ $this->config->set('HTML', 'DefinitionID',
+ 'HTMLPurifier_Strategy_RemoveForeignElementsTest'.
+ '->testRequiredAttributesTestNotPerformedOnEndTag');
+ $def =& $this->config->getHTMLDefinition(true);
+ $def->addElement('f', 'Block', 'Optional: #PCDATA', false, array('req*' => 'Text'));
+ $this->assertResult('Foo Bar');
+ }
+
}
diff --git a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
index d509a6a1..51f47358 100644
--- a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
+++ b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
@@ -111,6 +111,12 @@ class HTMLPurifier_URIFilter_MakeAbsoluteTest extends HTMLPurifier_URIFilterHarn
$this->assertFiltering('.', '../');
}
+ function testRemoveJavaScriptWithEmbeddedLink() {
+ // credits: NykO18
+ $this->setBase('http://www.example.com/');
+ $this->assertFiltering('javascript: window.location = \'http://www.example.com\';', false);
+ }
+
// error case
function testErrorNoBase() {
diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php
index 3ad307bb..6a221b24 100644
--- a/tests/HTMLPurifierTest.php
+++ b/tests/HTMLPurifierTest.php
@@ -94,6 +94,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
$this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
$this->assertPurification('foobar');
+ $this->assertPurification('');
}
diff --git a/tests/index.php b/tests/index.php
index a98e20e6..5063d7ed 100755
--- a/tests/index.php
+++ b/tests/index.php
@@ -3,7 +3,9 @@
// call one file using /?f=FileTest.php , see $test_files array for
// valid values
-error_reporting(E_ALL | E_STRICT);
+if (version_compare(PHP_VERSION, '5.1', '>=')) error_reporting(E_ALL | E_STRICT);
+else error_reporting(E_ALL);
+
define('HTMLPurifierTest', 1);
define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas
@@ -17,6 +19,7 @@ $GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") &
$simpletest_location = 'simpletest/'; // reasonable guess
// load SimpleTest
+if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
if (file_exists('../test-settings.php')) include '../test-settings.php';
require_once $simpletest_location . 'unit_tester.php';
require_once $simpletest_location . 'reporter.php';
@@ -79,7 +82,7 @@ if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) {
} else {
- $test = new GroupTest('All Tests');
+ $test = new GroupTest('All tests on PHP ' . PHP_VERSION);
foreach ($test_files as $test_file) {
require_once $test_file;
$test->addTestClass(path2class($test_file));
@@ -91,5 +94,3 @@ if (SimpleReporter::inCli()) $reporter = new TextReporter();
else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8');
$test->run($reporter);
-
-
diff --git a/tests/multitest.php b/tests/multitest.php
new file mode 100644
index 00000000..1490bc7b
--- /dev/null
+++ b/tests/multitest.php
@@ -0,0 +1,33 @@
+