diff --git a/Doxyfile b/Doxyfile
index 044217be..5bd7625b 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -1,19 +1,93 @@
-# Doxyfile 1.4.7
+# Doxyfile 1.5.3
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
 
 #---------------------------------------------------------------------------
 # Project related configuration options
 #---------------------------------------------------------------------------
-PROJECT_NAME           = HTML Purifier
-PROJECT_NUMBER         = 2.1.2
-OUTPUT_DIRECTORY       = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
+
+# This tag specifies the encoding used for all characters in the config file that 
+# follow. The default is UTF-8 which is also the encoding used for all text before 
+# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into 
+# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of 
+# possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = HTMLPurifier
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
+PROJECT_NUMBER         = 2.1.3
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = "docs/doxygen "
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
+# 4096 sub-directories (in 2 levels) under the output directory of each output 
+# format and will distribute the generated files over these directories. 
+# Enabling this option can be useful when feeding doxygen a huge amount of 
+# source files, where putting all generated files in the same directory would 
+# otherwise cause performance problems for the file system.
+
 CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
+# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, 
+# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, 
+# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, 
+# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
+
 OUTPUT_LANGUAGE        = English
-USE_WINDOWS_ENCODING   = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this.
+
 BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed.
+
 REPEAT_BRIEF           = YES
-ABBREVIATE_BRIEF       = "The $name class" \
-                         "The $name widget" \
-                         "The $name file" \
+
+# This tag implements a quasi-intelligent brief description abbreviator 
+# that is used to form the text in various listings. Each string 
+# in this list, if found as the leading text of the brief description, will be 
+# stripped from the text and the result after processing the whole list, is 
+# used as the annotated text. Otherwise, the brief description is used as-is. 
+# If left blank, the following values are used ("$name" is automatically 
+# replaced with the name of the entity): "The $name class" "The $name widget" 
+# "The $name file" "is" "provides" "specifies" "contains" 
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = "The $name class " \
+                         "The $name widget " \
+                         "The $name file " \
                          is \
                          provides \
                          specifies \
@@ -22,71 +96,440 @@ ABBREVIATE_BRIEF       = "The $name class" \
                          a \
                          an \
                          the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description.
+
 ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
+# inherited members of a class in the documentation of that class as if those 
+# members were ordinary class members. Constructors, destructors and assignment 
+# operators of the base classes will not be shown.
+
 INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used.
+
 FULL_PATH_NAMES        = YES
-STRIP_FROM_PATH        = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user-defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. The tag can be used to show relative paths in the file list. 
+# If left blank the directory from which doxygen is run is used as the 
+# path to strip.
+
+STRIP_FROM_PATH        = "C:/Users/Edward/Webs/htmlpurifier " \
+                         "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier "
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
+# the path mentioned in the documentation of a class, which tells 
+# the reader which header file to include in order to use a class. 
+# If left blank only the name of the header file containing the class 
+# definition is used. Otherwise one should specify the include paths that 
+# are normally passed to the compiler using the -I flag.
+
 STRIP_FROM_INC_PATH    = 
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
+# (but less readable) file names. This can be useful is your file systems 
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
 SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like regular Qt-style comments 
+# (thus requiring an explicit @brief command for a brief description.)
+
 JAVADOC_AUTOBRIEF      = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
+# interpret the first line (until the first dot) of a Qt-style 
+# comment as the brief description. If set to NO, the comments 
+# will behave just like regular Qt-style comments (thus requiring 
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
+# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
+# comments) as a brief description. This used to be the default behaviour. 
+# The new default is to treat a multi-line C++ comment block as a detailed 
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
 MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen 
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member 
+# documentation.
+
 DETAILS_AT_TOP         = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# re-implements.
+
 INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
+# a new page for each member. If set to NO, the documentation of a member will 
+# be part of the file/class/namespace that contains it.
+
 SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
 TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user-defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines.
+
 ALIASES                = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
+# sources only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
 OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
+# sources only. Doxygen will then generate output that is more tailored for Java. 
+# For instance, namespaces will be presented as packages, qualified scopes 
+# will look different, etc.
+
 OPTIMIZE_OUTPUT_JAVA   = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to 
+# include (a tag file for) the STL sources as input, then you should 
+# set this tag to YES in order to let doxygen match functions declarations and 
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
+# func(std::string) {}). This also make the inheritance and collaboration 
+# diagrams that involve STL classes more complete and accurate.
+
 BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
 DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
+# the same type (for instance a group of public functions) to be put as a 
+# subgroup of that type (e.g. under the Public Functions section). Set it to 
+# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# the \nosubgrouping command.
+
 SUBGROUPING            = YES
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
 EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation.
+
 EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation.
+
 EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
+# defined locally in source files will be included in the documentation. 
+# If set to NO only classes defined in header files are included.
+
 EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local 
+# methods, which are defined in the implementation section but not in 
+# the interface are included in the documentation. 
+# If set to NO (the default) only methods in the interface are included.
+
 EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be extracted 
+# and appear in the documentation as a namespace called 'anonymous_namespace{file}', 
+# where file will be replaced with the base name of the file that contains the anonymous 
+# namespace. By default anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled.
+
 HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these classes will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
 HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
+# friend (class|struct|union) declarations. 
+# If set to NO (the default) these declarations will be included in the 
+# documentation.
+
 HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
+# documentation blocks found inside the body of a function. 
+# If set to NO (the default) these blocks will be appended to the 
+# function's detailed documentation block.
+
 HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation.
+
 INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower-case letters. If set to YES upper-case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# and Mac users are advised to set this option to NO.
+
 CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden.
+
 HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put a list of the files that are included by a file in the documentation 
+# of that file.
+
 SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members.
+
 INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order.
+
 SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
+# brief documentation of file, namespace and class members alphabetically 
+# by member name. If set to NO (the default) the members will appear in 
+# declaration order.
+
 SORT_BRIEF_DOCS        = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
+# sorted by fully-qualified names, including namespaces. If set to 
+# NO (the default), the class list will be sorted only by class name, 
+# not including the namespace part. 
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the 
+# alphabetical list.
+
 SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
 GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
 GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or 
+# disable (NO) the bug list. This list is created by putting \bug 
+# commands in the documentation.
+
 GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
+# disable (NO) the deprecated list. This list is created by putting 
+# \deprecated commands in the documentation.
+
 GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if sectionname ... \endif.
+
 ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
+# the initial value of a variable or define consists of for it to appear in 
+# the documentation. If the initializer consists of more lines than specified 
+# here it will be hidden. Use a value of 0 to hide initializers completely. 
+# The appearance of the initializer of individual variables and defines in the 
+# documentation can be controlled using \showinitializer or \hideinitializer 
+# command in the documentation regardless of this setting.
+
 MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
+# at the bottom of the documentation of classes and structs. If set to YES the 
+# list will mention the files that were used to generate the documentation.
+
 SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories 
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
+# in the documentation. The default is NO.
+
 SHOW_DIRECTORIES       = NO
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
+# doxygen should invoke to get the current version for each file (typically from the 
+# version control system). Doxygen will invoke the program by executing (via 
+# popen()) the command <command> <input-file>, where <command> is the value of 
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
+# provided by doxygen. Whatever the program writes to standard output 
+# is used as the file version. See the manual for examples.
+
 FILE_VERSION_FILTER    = 
+
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
 QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used.
+
 WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled.
+
 WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
+# potential errors in the documentation, such as not documenting some 
+# parameters in a documented function, or documenting parameters that 
+# don't exist or using markup commands wrongly.
+
 WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for 
+# functions that are documented, but have no documentation for their parameters 
+# or return value. If set to NO (the default) doxygen will only warn about 
+# wrong or incomplete parameter documentation, but not about the absence of 
+# documentation.
+
 WARN_NO_PARAMDOC       = NO
-WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text. Optionally the format may contain 
+# $version, which will be replaced by the version of the file (if it could 
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text "
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr.
+
 WARN_LOGFILE           = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
-INPUT                  = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier"
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = ". "
+
+# This tag can be used to specify the character encoding of the source files that 
+# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default 
+# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding. 
+# See http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank the following patterns are tested: 
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py
+
 FILE_PATTERNS          = *.php
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used.
+
 RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
 EXCLUDE                = 
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
+# directories that are symbolic links (a Unix filesystem feature) are excluded 
+# from the input.
+
 EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories. Note that the wildcards are matched 
+# against the file with absolute path, so to exclude all test directories 
+# for example use the pattern */test/*
+
 EXCLUDE_PATTERNS       = */tests/* \
                          */benchmarks/* \
                          */docs/* \
@@ -94,149 +537,778 @@ EXCLUDE_PATTERNS       = */tests/* \
                          */configdoc/* \
                          */test-settings.php \
                          */maintenance/* \
-                         */smoketests/*
+                         */smoketests/* \
+                         */library/standalone/* \
+                         */.svn*
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
+# (namespaces, classes, functions, etc.) that should be excluded from the output. 
+# The symbol name can be a fully qualified name, a word, or if the wildcard * is used, 
+# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command).
+
 EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included.
+
 EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
+# searched for input files to be used with the \include or \dontinclude 
+# commands irrespective of the value of the RECURSIVE tag. 
+# Possible values are YES and NO. If left blank NO is used.
+
 EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command).
+
 IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be 
+# ignored.
+
 INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
+# basis.  Doxygen will compare the file name with each pattern and apply the 
+# filter if there is a match.  The filters are a list of the form: 
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
+# is applied to all files.
+
 FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
 FILTER_SOURCE_FILES    = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+# Note: To get rid of all source code in the generated output, make sure also 
+# VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH 
+# then you must also enable this option. If you don't then doxygen will produce 
+# a warning and turn it on anyway
+
 SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation.
+
 INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C and C++ comments will always remain visible.
+
 STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default) 
+# then for each documented function all documented 
+# functions referencing it will be listed.
+
 REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default) 
+# then for each documented function all documented entities 
+# called/used by that function will be listed.
+
 REFERENCES_RELATION    = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.  Otherwise they will link to the documentstion.
+
 REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code 
+# will point to the HTML generated by the htags(1) tool instead of doxygen 
+# built-in source browser. The htags tool is part of GNU's global source 
+# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# will need version 4.8.6 or higher.
+
 USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this.
+
 VERBATIM_HEADERS       = YES
+
 #---------------------------------------------------------------------------
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces.
+
 ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20])
+
 COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers.
+
 IGNORE_PREFIX          = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
 GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path.
+
 HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# doxygen will generate files with .html extension.
+
 HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header.
+
 HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
 HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If the tag is left blank doxygen 
+# will generate a default style sheet. Note that doxygen will try to copy 
+# the style sheet file to the HTML output directory, so don't put your own 
+# stylesheet in the HTML output directory as well, or it will be erased!
+
 HTML_STYLESHEET        = 
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
+# files or namespaces will be aligned in HTML using tables. If set to 
+# NO a bullet list will be used.
+
 HTML_ALIGN_MEMBERS     = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) 
+# of the generated HTML documentation.
+
 GENERATE_HTMLHELP      = NO
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
+# documentation will contain sections that can be hidden and shown after the 
+# page has loaded. For this to work a browser that supports 
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
+# be used to specify the file name of the resulting .chm file. You 
+# can add a path in front of the file if the result should not be 
+# written to the html output directory.
+
 CHM_FILE               = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
+# be used to specify the location (absolute path including file name) of 
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# the HTML help compiler on the generated index.hhp.
+
 HHC_LOCATION           = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
+# controls if a separate .chi index file is generated (YES) or that 
+# it should be included in the master .chm file (NO).
+
 GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
+# controls whether a binary table of contents is generated (YES) or a 
+# normal table of contents (NO) in the .chm file.
+
 BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# to the contents of the HTML help documentation and to the tree view.
+
 TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
+# top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it.
+
 DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20]) 
+# that doxygen will group on one line in the generated HTML documentation.
+
 ENUM_VALUES_PER_LINE   = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that 
+# is generated for HTML Help). For this to work a browser that supports 
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
+# probably better off using the HTML help feature.
+
 GENERATE_TREEVIEW      = YES
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
+# used to set the initial width (in pixels) of the frame in which the tree 
+# is shown.
+
 TREEVIEW_WIDTH         = 250
+
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
 GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
 LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name.
+
 LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
 MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
 COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used.
+
 PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
 EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
 LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
 PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
 USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
 LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
 LATEX_HIDE_INDICES     = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimized for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
 GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path.
+
 RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
 COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using WORD or other 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links.
+
 RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assignments. You only have to provide 
+# replacements, missing definitions are set to their default value.
+
 RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an rtf document. 
+# Syntax is similar to doxygen's config file.
+
 RTF_EXTENSIONS_FILE    = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
 GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path.
+
 MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
 MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
 MAN_LINKS              = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation.
+
 GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `xml' will be used as the default path.
+
 XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
 XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify an XML DTD, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
 XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
+# dump the program listings (including syntax highlighting 
+# and cross-referencing information) to the XML output. Note that 
+# enabling this will significantly increase the size of the XML output.
+
 XML_PROGRAMLISTING     = YES
+
 #---------------------------------------------------------------------------
 # configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
+# generate an AutoGen Definitions (see autogen.sf.net) file 
+# that captures the structure of the code including all 
+# documentation. Note that this feature is still experimental 
+# and incomplete at the moment.
+
 GENERATE_AUTOGEN_DEF   = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the Perl module output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
+# generate a Perl module file that captures the structure of 
+# the code including all documentation. Note that this 
+# feature is still experimental and incomplete at the 
+# moment.
+
 GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# to generate PDF and DVI output from the Perl module output.
+
 PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
+# nicely formatted so it can be parsed by a human reader.  This is useful 
+# if you want to understand what is going on.  On the other hand, if this 
+# tag is set to NO the size of the Perl module output will be much smaller 
+# and Perl will parse it just the same.
+
 PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file 
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
+# This is useful so different doxyrules.make files included by the same 
+# Makefile don't overwrite each other's variables.
+
 PERLMOD_MAKEVAR_PREFIX = 
+
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor   
 #---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
 ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
 MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
 EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
 SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
 INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
 INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. To prevent a macro definition from being 
+# undefined via #undef or recursively expanded use the := operator 
+# instead of the = operator.
+
 PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
 EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all function-like macros that are alone 
+# on a line, have an all uppercase name, and do not end with a semicolon. Such 
+# function macros are typically used for boiler-plate code, and will confuse 
+# the parser if not removed.
+
 SKIP_FUNCTION_MACROS   = YES
+
 #---------------------------------------------------------------------------
 # Configuration::additions related to external references   
 #---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. 
+# Optionally an initial location of the external documentation 
+# can be added for each tagfile. The format of a tag file without 
+# this location is as follows: 
+#   TAGFILES = file1 file2 ... 
+# Adding location for the tag files is done as follows: 
+#   TAGFILES = file1=loc1 "file2 = loc2" ... 
+# where "loc1" and "loc2" can be relative or absolute paths or 
+# URLs. If a location is present for each tag, the installdox tool 
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen 
+# is run, you must also specify the path to the tagfile here.
+
 TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads.
+
 GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed.
+
 ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
+# in the modules index. If set to NO, only the current project's groups will 
+# be listed.
+
 EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl').
+
 PERL_PATH              = /usr/bin/perl
+
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool   
 #---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
+# or super classes. Setting the tag to NO turns the diagrams off. Note that 
+# this option is superseded by the HAVE_DOT option below. This is only a 
+# fallback. It is recommended to install and use dot, since it yields more 
+# powerful graphs.
+
 CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc 
+# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to 
+# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to 
+# specify the directory where the mscgen tool resides. If left empty the tool is assumed to 
+# be found in the default search path.
+
+MSCGEN_PATH            = 
+
+# If set to YES, the inheritance and collaboration graphs will hide 
+# inheritance and usage relations if the target is undocumented 
+# or is not a class.
+
 HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default)
+
 HAVE_DOT               = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# the CLASS_DIAGRAMS tag to NO.
+
 CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes.
+
 COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for groups, showing the direct groups dependencies
+
 GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
+# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# Language.
+
 UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the 
+# relations between templates and their instances.
+
 TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
+# tags are set to YES then doxygen will generate a graph for each documented 
+# file showing the direct and indirect include dependencies of the file with 
+# other documented files.
+
 INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
+# documented header file showing the documented files that directly or 
+# indirectly include this file.
+
 INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will 
+# generate a call dependency graph for every global function or class method. 
+# Note that enabling this option will significantly increase the time of a run. 
+# So in most cases it will be better to enable call graphs for selected 
+# functions only using the \callgraph command.
+
 CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will 
+# generate a caller dependency graph for every global function or class method. 
+# Note that enabling this option will significantly increase the time of a run. 
+# So in most cases it will be better to enable caller graphs for selected 
+# functions only using the \callergraph command.
+
 CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will graphical hierarchy of all classes instead of a textual one.
+
 GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
+# then doxygen will show the dependencies a directory has on other directories 
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
 DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
 DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
 DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that 
+# contain dot files that are included in the documentation (see the 
+# \dotfile command).
+
 DOTFILE_DIRS           = 
-MAX_DOT_GRAPH_WIDTH    = 1024
-MAX_DOT_GRAPH_HEIGHT   = 1024
+
+# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
+# nodes that will be shown in the graph. If the number of nodes in a graph 
+# becomes larger than this value, doxygen will truncate the graph, which is 
+# visualized by representing a node as a red box. Note that doxygen if the number 
+# of direct children of the root node in a graph is already larger than 
+# MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note 
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
+# graphs generated by dot. A depth value of 3 means that only nodes reachable 
+# from the root by following a path via at most 3 edges will be shown. Nodes 
+# that lay further from the root node will be omitted. Note that setting this 
+# option to 1 or 2 may greatly reduce the computation time needed for large 
+# code bases. Also note that the size of a graph can be further restricted by 
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
 MAX_DOT_GRAPH_DEPTH    = 1000
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
+# background. This is disabled by default, which results in a white background. 
+# Warning: Depending on the platform used, enabling this option may lead to 
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to 
+# read).
+
 DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
+# files in one run (i.e. multiple -o and -T options on the command line). This 
+# makes dot run faster, but since only newer versions of dot (>1.8.10) 
+# support this, this feature is disabled by default.
+
 DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs.
+
 GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
+# remove the intermediate dot files that are used to generate 
+# the various graphs.
+
 DOT_CLEANUP            = YES
+
 #---------------------------------------------------------------------------
 # Configuration::additions related to the search engine   
 #---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be 
+# used. If set to NO the values of all tags below this one will be ignored.
+
 SEARCHENGINE           = NO
diff --git a/INSTALL b/INSTALL
index 1d68c0c9..317c89bb 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,64 +1,56 @@
+
 Install
     How to install HTML Purifier
 
-HTML Purifier is designed to run out of the box,  so actually using the library
-is extremely easy. (Although, if you were looking for a step-by-step
-installation GUI, you've come to the wrong place!)  The impatient can scroll
-down to the bottom of this INSTALL document to see the code, but you really
-should make sure a few things are properly done.
-
-
+HTML Purifier is designed to run out of the box, so actually using the 
+library is extremely easy.  (Although... if you were looking for a 
+step-by-step installation GUI, you've downloaded the wrong software!)
+
+While the impatient can get going immediately with some of the sample
+code at the bottom of this library, it's well worth performing some
+basic sanity checks to get the most out of this library.
 
 
+---------------------------------------------------------------------------
 1.  Compatibility
 
-HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
-core dependencies with other libraries.
+HTML Purifier works in both PHP 4 and PHP 5, and is actively tested from 
+PHP 4.3.7 and up (see tests/multitest.php for specific versions). It has 
+no core dependencies with other libraries. PHP 4 support will be 
+deprecated on December 31, 2007, at which time only essential security 
+fixes will be issued for the PHP 4 version until August 8, 2008. 
 
-Optional extensions are iconv (usually installed) and tidy (also common).
-If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
-not having either of these extensions.
+These optional extensions can enhance the capabilities of HTML Purifier:
+
+    * iconv : Converts text to and from non-UTF-8 encodings
+    * tidy  : Used for pretty-printing HTML
 
 
+---------------------------------------------------------------------------
+2.  Reconnaissance
 
-2.  Including the library
+A big plus of HTML Purifier is its inerrant support of standards, so
+your web-pages should be standards-compliant.  (They should also use
+semantic markup, but that's another issue altogether, one HTML Purifier
+cannot fix without reading your mind.)
 
-Simply use:
-
-    require_once '/path/to/library/HTMLPurifier.auto.php';
-
-...and you're good to go.  Since HTML Purifier's codebase is fairly
-large, I recommend only including HTML Purifier when you need it.
-
-If you don't like your include_path to be fiddled around with, simply set
-HTML Purifier's library/ directory to the include path yourself and then:
-
-    require_once 'HTMLPurifier.php';
-
-Only the contents in the library/ folder are necessary, so you can remove
-everything else when using HTML Purifier in a production environment.  
-
-
-
-3.  Preparing the proper output environment
-
-HTML Purifier is all about web-standards, so accordingly your webpages should
-be standards compliant.  HTML Purifier can deal with these doctypes:
+HTML Purifier can process these doctypes:
 
 * XHTML 1.0 Transitional (default)
 * XHTML 1.0 Strict
 * HTML 4.01 Transitional
 * HTML 4.01 Strict
-* XHTML 1.1 (sans Ruby)
+* XHTML 1.1
 
 ...and these character encodings:
 
 * UTF-8 (default)
-* Any encoding iconv supports (support is crippled for i18n though)
+* Any encoding iconv supports (with crippled internationalization support)
 
-The defaults are there for a reason: they are best-practice choices that
-should not be changed lightly.  For those of you in the dark, you can determine
-the doctype from this code in your HTML documents:
+These defaults reflect what my choices where be if I were authoring an
+HTML document, however, what you choose depends on the nature of your
+codebase.  If you don't know what doctype you are using, you can determine
+the doctype from this identifier at the top of your source code:
 
     <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -67,18 +59,34 @@ the doctype from this code in your HTML documents:
 
     <meta http-equiv="Content-type" content="text/html;charset=ENCODING">
 
-For legacy codebases these declarations may be missing.  If that is the case,
-STOP, and read docs/enduser-utf8.html
+If the character encoding declaration is missing, STOP NOW, and
+read 'docs/enduser-utf8.html' (web accessible at
+http://htmlpurifier.org/docs/enduser-utf8.html).  In fact, even if it is
+present, read this document anyway, as most websites specify character
+encoding incorrectly.
 
 
+---------------------------------------------------------------------------
+3.  Including the library
+
+The procedure is quite simple:
+
+    require_once '/path/to/library/HTMLPurifier.auto.php';
+
+I recommend only including HTML Purifier when you need it, because that
+call represents the inclusion of a lot of PHP files which constitute
+the bulk of HTML Purifier's memory usage.
+
+If you don't like your include_path to be fiddled around with, simply set
+HTML Purifier's library/ directory to the include path yourself and then:
+
+    require_once 'HTMLPurifier.php';
+
+Only the contents in the library/ folder are necessary, so you can remove
+everything else when using HTML Purifier in a production environment. 
 
 
-
-You may currently be vulnerable to XSS and other security threats, and HTML
-Purifier won't be able to fix that.
-
-
-
+---------------------------------------------------------------------------
 4. Configuration
 
 HTML Purifier is designed to run out-of-the-box, but occasionally HTML
@@ -95,7 +103,6 @@ object and read on:
     $config = HTMLPurifier_Config::createDefault();
 
 
-
 4.1. Setting a different character encoding
 
 You really shouldn't use any other encoding except UTF-8, especially if you
@@ -122,10 +129,6 @@ but please be cognizant of the issues the "solution" creates (for this
 reason, I do not include the solution in this document).
 
 
-
-
-
-
 4.2. Setting a different doctype
 
 For those of you using HTML 4.01 Transitional, you can disable
@@ -135,7 +138,6 @@ XHTML output like this:
 
 Other supported doctypes include:
 
-
     * HTML 4.01 Strict
     * HTML 4.01 Transitional
     * XHTML 1.0 Strict
@@ -143,7 +145,6 @@ Other supported doctypes include:
     * XHTML 1.1
 
 
-
 4.3. Other settings
 
 There are more configuration directives which can be read about
@@ -153,55 +154,24 @@ your code.  Some of the more interesting ones are configurable at the
 demo <http://htmlpurifier.org/demo.php> and are well worth looking into
 for your own system.
 
+For example, you can fine tune allowed elements and attributes, convert
+relative URLs to absolute ones, and even autoparagraph input text! These
+are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
+%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
+translates to:
+
+    $config->set('Namespace', 'Directive', $value);
+
+E.g.
+
+    $config->set('HTML', 'Allowed', 'p,b,a[href],i');
+    $config->set('URI', 'Base', 'http://www.example.com');
+    $config->set('URI', 'MakeAbsolute', true);
+    $config->set('AutoFormat', 'AutoParagraph', true);
 
 
-5.   Using the code
-
-The interface is mind-numbingly simple:
-
-    $purifier = new HTMLPurifier();
-    $clean_html = $purifier->purify( $dirty_html );
-
-...or, if you're using the configuration object:
-
-    $purifier = new HTMLPurifier($config);
-    $clean_html = $purifier->purify( $dirty_html );
-
-That's it!  For more examples, check out docs/examples/ (they aren't very
-different though).  Also, docs/enduser-slow.html gives advice on what to
-do if HTML Purifier is slowing down your application.
-
-
-
-6.   Quick install
-
-First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
-writable by the webserver (see Section 7: Caching below for details).
-If your website is in UTF-8 and XHTML Transitional, use this code:
-
-<?php
-    require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
-    
-    $purifier = new HTMLPurifier();
-    $clean_html = $purifier->purify($dirty_html);
-?>
-
-If your website is in a different encoding or doctype, use this code:
-
-<?php
-    require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
-    
-    $config = HTMLPurifier_Config::createDefault();
-    $config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
-    $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
-    $purifier = new HTMLPurifier($config);
-    
-    $clean_html = $purifier->purify($dirty_html);
-?>
-
-
-
-7. Caching
+---------------------------------------------------------------------------
+5. Caching
 
 HTML Purifier generates some cache files (generally one or two) to speed up
 its execution. For maximum performance, make sure that
@@ -236,3 +206,50 @@ hit):
 Or move the cache directory somewhere else (no trailing slash):
 
     $config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
+
+
+---------------------------------------------------------------------------
+6.   Using the code
+
+The interface is mind-numbingly simple:
+
+    $purifier = new HTMLPurifier();
+    $clean_html = $purifier->purify( $dirty_html );
+
+...or, if you're using the configuration object:
+
+    $purifier = new HTMLPurifier($config);
+    $clean_html = $purifier->purify( $dirty_html );
+
+That's it!  For more examples, check out docs/examples/ (they aren't very
+different though).  Also, docs/enduser-slow.html gives advice on what to
+do if HTML Purifier is slowing down your application.
+
+
+---------------------------------------------------------------------------
+7.   Quick install
+
+First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
+writable by the webserver (see Section 5: Caching above for details).
+If your website is in UTF-8 and XHTML Transitional, use this code:
+
+<?php
+    require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
+    
+    $purifier = new HTMLPurifier();
+    $clean_html = $purifier->purify($dirty_html);
+?>
+
+If your website is in a different encoding or doctype, use this code:
+
+<?php
+    require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
+    
+    $config = HTMLPurifier_Config::createDefault();
+    $config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
+    $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
+    $purifier = new HTMLPurifier($config);
+    
+    $clean_html = $purifier->purify($dirty_html);
+?>
+
diff --git a/NEWS b/NEWS
index 212edc94..2f97331f 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,55 @@ NEWS ( CHANGELOG and HISTORY )                                     HTMLPurifier
     . Internal change
 ==========================
 
+2.1.3, released 2007-11-05
+! tests/multitest.php allows you to test multiple versions by running
+  tests/index.php through multiple interpreters using `phpv` shell
+  script (you must provide this script!)
+- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
+  on some systems.
+- Injector algorithm further refined: off-by-one error regarding skip 
+  counts for dormant injectors fixed
+- Corrective blockquote definition now enabled for HTML 4.01 Strict
+- Fatal error when <img> tag (or any other element with required attributes)
+  has 'id' attribute fixed, thanks NykO18 for reporting
+- Fix warning emitted when a non-supported URI scheme is passed to the
+  MakeAbsolute URIFilter, thanks NykO18 (again)
+- Further refine AutoParagraph injector. Behavior inside of elements
+  allowing paragraph tags clarified: only inline content delimeted by
+  double newlines (not block elements) are paragraphed.
+- Buggy treatment of end tags of elements that have required attributes
+  fixed (does not manifest on default tag-set)
+- Spurious internal content reorganization error suppressed
+- HTMLDefinition->addElement now returns a reference to the created
+  element object, as implied by the documentation
+- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
+- Fix a theoretical class of infinite loops from DirectLex reported
+  by Nate Abele
+- Work around unnecessary DOMElement type-cast in PH5P that caused errors
+  in PHP 5.1
+- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
+  HTMLDefinition errors, this may indicate problems with error-collecting
+  facilities in PHP 5
+- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
+- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
+. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment 
+  to better communicate its purpose
+. Error unit tests can now specify the expectation of no errors. Future
+  iterations of the harness will be extremely strict about what errors
+  are allowed
+. Extend Injector hooks to allow for more powerful injector routines
+. HTMLDefinition->addBlankElement created, as according to the HTMLModule
+  method
+. Doxygen configuration file updated, with minor improvements
+. Test runner now checks for similarly named files in conf/ directory too.
+. Minor cosmetic change to flush-definition-cache.php: trailing newline is
+  outputted
+. Maintenance script for generating PH5P patch added, original PH5P source
+  file also added under version control
+. Full unit test runner script title made more descriptive with PHP version
+. Updated INSTALL file to state that 4.3.7 is the earliest version we
+  are actively testing
+
 2.1.2, released 2007-09-03
 ! Implemented Object module for trusted users
 ! Implemented experimental HTML5 parsing mode using PH5P. To use, add
diff --git a/VERSION b/VERSION
index 8f9174b4..abae0d9a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.1.2
\ No newline at end of file
+2.1.3
\ No newline at end of file
diff --git a/WHATSNEW b/WHATSNEW
index e9a40184..d3511d5d 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -1,8 +1,6 @@
-Version 2.1.2 is a mix of experimental features and stability updates.
-Among new features: an Object module for trusted users, support for the
-CSS property 'border-spacing', and HTML 5 style parsing using PH5P.
-Bug fixes ihave resolved a few obscure issues including border-collapse:seperate,
-a DirectLex parsing error, broken HTML in printDefinition.php, and problems
-with the experimental standalone distribution. Also, there were large
-amounts of behind-the-scenes refactoring and the removal of URIScheme
-inclusion reflection.
+Stability release 2.1.3 fixes a slew of minor bugs found in HTML Purifier,
+and also includes some internal code enhancements and refactorings.
+Notably, tests/multitest.php automates testing in multiple versions, 
+fatal AttrDef_URI_Email error fixed, blockquote contents are more lenient
+in HTML 4.01 Strict and fatal errors involving ID tags in img tags were
+fixed.
diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index 43fe616b..d5878d30 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -22,8 +22,8 @@
  */
 
 /*
-    HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
-    Copyright (C) 2006 Edward Z. Yang
+    HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
+    Copyright (C) 2006-2007 Edward Z. Yang
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,8 @@
 // constants are slow, but we'll make one exception
 define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
 
-// almost every class has an undocumented dependency to these, so make sure
-// they get included
-require_once 'HTMLPurifier/ConfigSchema.php'; // important
+// every class has an undocumented dependency to these, must be included!
+require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
 require_once 'HTMLPurifier/Config.php';
 require_once 'HTMLPurifier/Context.php';
 
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
 HTMLPurifier_ConfigSchema::define(
     'Core', 'CollectErrors', false, 'bool', '
 Whether or not to collect errors found while filtering the document. This
-is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
-This directive has been available since 2.0.0.
+is a useful way to give feedback to your users. <strong>Warning:</strong>
+Currently this feature is very patchy and experimental, with lots of
+possible error messages not yet implemented. It will not cause any problems,
+but it may not help your users either. This directive has been available
+since 2.0.0.
 ');
 
 /**
- * Main library execution class.
+ * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
  * 
- * Facade that performs calls to the HTMLPurifier_Lexer,
- * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
- * purify HTML.
+ * @note There are several points in which configuration can be specified 
+ *       for HTML Purifier.  The precedence of these (from lowest to
+ *       highest) is as follows:
+ *          -# Instance: new HTMLPurifier($config)
+ *          -# Invocation: purify($html, $config)
+ *       These configurations are entirely independent of each other and
+ *       are *not* merged.
  * 
  * @todo We need an easier way to inject strategies, it'll probably end
  *       up getting done through config though.
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
 class HTMLPurifier
 {
     
-    var $version = '2.1.2';
+    var $version = '2.1.3';
     
     var $config;
-    var $filters;
+    var $filters = array();
     
     var $strategy, $generator;
     
     /**
-     * Final HTMLPurifier_Context of last run purification. Might be an array.
+     * Resultant HTMLPurifier_Context of last run purification. Is an array
+     * of contexts if the last called method was purifyArray().
      * @public
      */
     var $context;
@@ -150,6 +157,11 @@ class HTMLPurifier
             $context->register('ErrorCollector', $error_collector);
         }
         
+        // setup id_accumulator context, necessary due to the fact that
+        // AttrValidator can be called from many places
+        $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+        $context->register('IDAccumulator', $id_accumulator);
+        
         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
         
         for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@@ -198,6 +210,8 @@ class HTMLPurifier
     
     /**
      * Singleton for enforcing just one HTML Purifier in your system
+     * @param $prototype Optional prototype HTMLPurifier instance to
+     *                   overload singleton with.
      */
     static function &getInstance($prototype = null) {
         static $htmlpurifier;
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
index 365748c0..0e9a5f47 100644
--- a/library/HTMLPurifier/AttrDef/URI.php
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
             $result = $uri->validate($config, $context);
             if (!$result) break;
             
-            // chained validation
+            // chained filtering
             $uri_def =& $config->getDefinition('URI');
             $result = $uri_def->filter($uri, $config, $context);
             if (!$result) break;
diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php
index aaec099a..ababd9ea 100644
--- a/library/HTMLPurifier/AttrDef/URI/Email.php
+++ b/library/HTMLPurifier/AttrDef/URI/Email.php
@@ -1,7 +1,6 @@
 <?php
 
 require_once 'HTMLPurifier/AttrDef.php';
-require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
 
 class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
 {
@@ -15,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
     
 }
 
+// sub-implementations
+require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
diff --git a/library/HTMLPurifier/AttrValidator.php b/library/HTMLPurifier/AttrValidator.php
index f02bd208..a471b093 100644
--- a/library/HTMLPurifier/AttrValidator.php
+++ b/library/HTMLPurifier/AttrValidator.php
@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
         $definition = $config->getHTMLDefinition();
         $e =& $context->get('ErrorCollector', true);
         
+        // initialize IDAccumulator if necessary
+        $ok =& $context->get('IDAccumulator', true);
+        if (!$ok) {
+            $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
+            $context->register('IDAccumulator', $id_accumulator);
+        }
+        
         // initialize CurrentToken if necessary
         $current_token =& $context->get('CurrentToken', true);
         if (!$current_token) $context->register('CurrentToken', $token);
diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php
index 779a7f06..e9f14edf 100644
--- a/library/HTMLPurifier/ChildDef/Optional.php
+++ b/library/HTMLPurifier/ChildDef/Optional.php
@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
     var $type = 'optional';
     function validateChildren($tokens_of_children, $config, &$context) {
         $result = parent::validateChildren($tokens_of_children, $config, $context);
-        if ($result === false) return array();
+        if ($result === false) {
+            if (empty($tokens_of_children)) return true;
+            else return array();
+        }
         return $result;
     }
 }
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index 7e330e47..0d75b609 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
     /**
      * HTML Purifier's version
      */
-    var $version = '2.1.2';
+    var $version = '2.1.3';
     
     /**
      * Two-level associative array of configuration directives
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index fe6bd141..e13e0c62 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
     /**
      * Adds a custom element to your HTML definition
      * @note See HTMLPurifier_HTMLModule::addElement for detailed 
-     *       parameter descriptions.
+     *       parameter and return value descriptions.
      */
-    function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
+    function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
         $module =& $this->getAnonymousModule();
         // assume that if the user is calling this, the element
         // is safe. This may not be a good idea
-        $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+        $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
+        return $element;
+    }
+    
+    /**
+     * Adds a blank element to your HTML definition, for overriding
+     * existing behavior
+     * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
+     *       parameter and return value descriptions.
+     */
+    function &addBlankElement($element_name) {
+        $module  =& $this->getAnonymousModule();
+        $element =& $module->addBlankElement($element_name);
+        return $element;
     }
     
     /**
diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
index 386cf365..dcf306a0 100644
--- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
+++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php
@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
 require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
 require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
 
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
 class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
       HTMLPurifier_HTMLModule_Tidy
 {
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
 {
     var $name = 'Tidy_Strict';
     var $defaultLevel = 'light';
+    
+    function makeFixes() {
+        $r = parent::makeFixes();
+        $r['blockquote#content_model_type'] = 'strictblockquote';
+        return $r;
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
+        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+    }
 }
 
diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php
deleted file mode 100644
index b701491e..00000000
--- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php
+++ /dev/null
@@ -1,26 +0,0 @@
-<?php
-
-require_once 'HTMLPurifier/HTMLModule/Tidy.php';
-require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
-
-class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
-      HTMLPurifier_HTMLModule_Tidy
-{
-    
-    var $name = 'Tidy_XHTMLStrict';
-    var $defaultLevel = 'light';
-    
-    function makeFixes() {
-        $r = array();
-        $r['blockquote#content_model_type'] = 'strictblockquote';
-        return $r;
-    }
-    
-    var $defines_child_def = true;
-    function getChildDef($def) {
-        if ($def->content_model_type != 'strictblockquote') return false;
-        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
-    }
-    
-}
-
diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php
index 74a233ff..3fc86160 100644
--- a/library/HTMLPurifier/HTMLModuleManager.php
+++ b/library/HTMLPurifier/HTMLModuleManager.php
@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
 require_once 'HTMLPurifier/HTMLModule/Tidy.php';
 require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
 require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
-require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
 require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
 
 HTMLPurifier_ConfigSchema::define(
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
         $this->doctypes->register(
             'XHTML 1.0 Strict', true,
             array_merge($common, $xml, $non_xml),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
+            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
             array(),
             '-//W3C//DTD XHTML 1.0 Strict//EN',
             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
         $this->doctypes->register(
             'XHTML 1.1', true,
             array_merge($common, $xml, array('Ruby')),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
+            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
             array(),
             '-//W3C//DTD XHTML 1.1//EN',
             'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
diff --git a/library/HTMLPurifier/IDAccumulator.php b/library/HTMLPurifier/IDAccumulator.php
index 525c9aa0..c9f58835 100644
--- a/library/HTMLPurifier/IDAccumulator.php
+++ b/library/HTMLPurifier/IDAccumulator.php
@@ -1,11 +1,15 @@
 <?php
 
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'IDBlacklist', array(), 'list',
+    'Array of IDs not allowed in the document.'
+);
+
 /**
  * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
  * @note In Slashdot-speak, dupe means duplicate.
- * @note This class does not accept $config or $context, thus, it is the
- *       burden of the callee to register the appropriate errors or
- *       configuration.
+ * @note The default constructor does not accept $config or $context objects:
+ *       use must use the static build() factory method to perform initialization.
  */
 class HTMLPurifier_IDAccumulator
 {
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
      */
     var $ids = array();
     
+    /**
+     * Builds an IDAccumulator, also initializing the default blacklist
+     * @param $config Instance of HTMLPurifier_Config
+     * @param $context Instance of HTMLPurifier_Context
+     * @return Fully initialized HTMLPurifier_IDAccumulator
+     * @static
+     */
+    static function build($config, &$context) {
+        $id_accumulator = new HTMLPurifier_IDAccumulator();
+        $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
+        return $id_accumulator;
+    }
+    
     /**
      * Add an ID to the lookup table.
      * @param $id ID to be added.
diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php
index 59017163..3b847097 100644
--- a/library/HTMLPurifier/Injector.php
+++ b/library/HTMLPurifier/Injector.php
@@ -4,6 +4,9 @@
  * Injects tokens into the document while parsing for well-formedness.
  * This enables "formatter-like" functionality such as auto-paragraphing,
  * smiley-ification and linkification to take place.
+ * 
+ * @todo Allow injectors to request a re-run on their output. This 
+ *       would help if an operation is recursive.
  */
 class HTMLPurifier_Injector
 {
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
      */
     function handleElement(&$token) {}
     
+    /**
+     * Notifier that is called when an end token is processed
+     * @note This differs from handlers in that the token is read-only
+     */
+    function notifyEnd($token) {}
+    
+    
 }
 
diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php
index 6e0a6a3e..56a6a268 100644
--- a/library/HTMLPurifier/Injector/AutoParagraph.php
+++ b/library/HTMLPurifier/Injector/AutoParagraph.php
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
     'AutoFormat', 'AutoParagraph', false, 'bool', '
 <p>
   This directive turns on auto-paragraphing, where double newlines are
-  converted in to paragraphs whenever possible. Auto-paragraphing
-  applies when:
+  converted in to paragraphs whenever possible. Auto-paragraphing:
 </p>
 <ul>
-  <li>There are inline elements or text in the root node</li>
-  <li>There are inline elements or text with double newlines or
-      block elements in nodes that allow paragraph tags</li>
-  <li>There are double newlines in paragraph tags</li>
+  <li>Always applies to inline elements or text in the root node,</li>
+  <li>Applies to inline elements or text with double newlines in nodes
+      that allow paragraph tags,</li>
+  <li>Applies to double newlines in paragraph tags</li>
 </ul>
 <p>
   <code>p</code> tags must be allowed for this directive to take effect.
   We do not use <code>br</code> tags for paragraphing, as that is
   semantically incorrect.
 </p>
+<p>
+  To prevent auto-paragraphing as a content-producer, refrain from using
+  double-newlines except to specify a new paragraph or in contexts where
+  it has special meaning (whitespace usually has no meaning except in
+  tags like <code>pre</code>, so this should not be difficult.) To prevent
+  the paragraphing of inline text adjacent to block elements, wrap them
+  in <code>div</code> tags (the behavior is slightly different outside of
+  the root node.)
+</p>
 <p>
   This directive has been available since 2.0.1.
 </p>
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
                 $ok = false;
                 // test if up-coming tokens are either block or have
                 // a double newline in them
+                $nesting = 0;
                 for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
                     if ($this->inputTokens[$i]->type == 'start'){
                         if (!$this->_isInline($this->inputTokens[$i])) {
-                            $ok = true;
+                            // we haven't found a double-newline, and
+                            // we've hit a block element, so don't paragraph
+                            $ok = false;
+                            break;
                         }
-                        break;
+                        $nesting++;
+                    }
+                    if ($this->inputTokens[$i]->type == 'end') {
+                        if ($nesting <= 0) break;
+                        $nesting--;
                     }
-                    if ($this->inputTokens[$i]->type == 'end') break;
                     if ($this->inputTokens[$i]->type == 'text') {
+                        // found it!
                         if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
                             $ok = true;
+                            break;
                         }
-                        if (!$this->inputTokens[$i]->is_whitespace) break;
                     }
                 }
                 if ($ok) {
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index f52579ab..3819ad22 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
 }
 
 HTMLPurifier_ConfigSchema::define(
-    'Core', 'AcceptFullDocuments', true, 'bool',
-    'This parameter determines whether or not the filter should accept full '.
-    'HTML documents, not just HTML fragments.  When on, it will '.
-    'drop all sections except the content between body.'
-);
+    'Core', 'ConvertDocumentToFragment', true, 'bool', '
+This parameter determines whether or not the filter should convert
+input that is a full document with html and body tags to a fragment
+of just the contents of a body tag. This parameter is simply something
+HTML Purifier can do during an edge-case: for most inputs, this
+processing is not necessary.
+');
+HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
 
 HTMLPurifier_ConfigSchema::define(
     'Core', 'LexerImpl', null, 'mixed/null', '
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
     function normalize($html, $config, &$context) {
         
         // extract body from document if applicable
-        if ($config->get('Core', 'AcceptFullDocuments')) {
+        if ($config->get('Core', 'ConvertDocumentToFragment')) {
             $html = $this->extractBody($html);
         }
         
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 6f8c8ff6..cdcf2aa1 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 
                 $segment = substr($html, $cursor, $strlen_segment);
                 
+                if ($segment === false) {
+                    // somehow, we attempted to access beyond the end of
+                    // the string, defense-in-depth, reported by Nate Abele
+                    break;
+                }
+                
                 // Check if it's a comment
                 if (
-                    substr($segment, 0, 3) == '!--'
+                    substr($segment, 0, 3) === '!--'
                 ) {
                     // re-determine segment length, looking for -->
                     $position_comment_end = strpos($html, '-->', $cursor);
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                 // trailing slash. Remember, we could have a tag like <br>, so
                 // any later token processing scripts must convert improperly
                 // classified EmptyTags from StartTags.
-                $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
+                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
                 if ($is_self_closing) {
                     $strlen_segment--;
                     $segment = substr($segment, 0, $strlen_segment);
diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php
index 5720c33a..b6762379 100644
--- a/library/HTMLPurifier/Lexer/PH5P.php
+++ b/library/HTMLPurifier/Lexer/PH5P.php
@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
     
 }
 
-// begin PHP5P source code here
-
 /*
 
 Copyright 2007 Jeroen van der Meer <http://jero.net/> 
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
         }
     }
 
-    private function generateImpliedEndTags(array $exclude = array()) {
+    private function generateImpliedEndTags($exclude = array()) {
         /* When the steps below require the UA to generate implied end tags,
         then, if the current node is a dd element, a dt element, an li element,
         a p element, a td element, a th  element, or a tr element, the UA must
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
         }
     }
 
-    private function getElementCategory($name) {
+    private function getElementCategory($node) {
+        $name = $node->tagName;
         if(in_array($name, $this->special))
             return self::SPECIAL;
 
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
         return $this->dom;
     }
 }
+?>
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index 51a14a78..25e9f8ac 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
             //################################################################//
             // Process result by interpreting $result
             
-            if ($result === true) {
+            if ($result === true || $child_tokens === $result) {
                 // leave the node as is
                 
                 // register start token as a parental node start
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index b3e8aa74..4b6f498f 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
         
         $definition = $config->getHTMLDefinition();
         
-        // CurrentNesting
-        $this->currentNesting = array();
-        $context->register('CurrentNesting', $this->currentNesting);
-        
-        // InputIndex
-        $this->inputIndex = false;
-        $context->register('InputIndex', $this->inputIndex);
-        
-        // InputTokens
-        $context->register('InputTokens', $tokens);
-        $this->inputTokens =& $tokens;
-        
-        // OutputTokens
+        // local variables
         $result = array();
-        $this->outputTokens =& $result;
-        
-        // %Core.EscapeInvalidTags
-        $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
         $generator = new HTMLPurifier_Generator();
-        
+        $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
         $e =& $context->get('ErrorCollector', true);
         
+        // member variables
+        $this->currentNesting = array();
+        $this->inputIndex     = false;
+        $this->inputTokens    =& $tokens;
+        $this->outputTokens   =& $result;
+        
+        // context variables
+        $context->register('CurrentNesting', $this->currentNesting);
+        $context->register('InputIndex', $this->inputIndex);
+        $context->register('InputTokens', $tokens);
+        
         // -- begin INJECTOR --
         
         $this->injectors = array();
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
         }
         
+        // warning: most foreach loops follow the convention $i => $x.
+        // be sure, for PHP4 compatibility, to only perform write operations
+        // directly referencing the object using $i: $x is only safe for reads
+        
         // -- end INJECTOR --
         
         $token = false;
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             // if all goes well, this token will be passed through unharmed
             $token = $tokens[$this->inputIndex];
             
+            //printTokens($tokens, $this->inputIndex);
+            
             foreach ($this->injectors as $i => $x) {
                 if ($x->skip > 0) $this->injectors[$i]->skip--;
             }
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
                 if ($token->type === 'text') {
                      // injector handler code; duplicated for performance reasons
                      foreach ($this->injectors as $i => $x) {
-                         if (!$x->skip) $x->handleText($token);
+                         if (!$x->skip) $this->injectors[$i]->handleText($token);
                          if (is_array($token)) {
                              $this->currentInjector = $i;
                              break;
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             // injector handler code; duplicated for performance reasons
             if ($ok) {
                 foreach ($this->injectors as $i => $x) {
-                    if (!$x->skip) $x->handleElement($token);
+                    if (!$x->skip) $this->injectors[$i]->handleElement($token);
                     if (is_array($token)) {
                         $this->currentInjector = $i;
                         break;
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             $current_parent = array_pop($this->currentNesting);
             if ($current_parent->name == $token->name) {
                 $result[] = $token;
+                foreach ($this->injectors as $i => $x) {
+                    $this->injectors[$i]->notifyEnd($token);
+                }
                 continue;
             }
             
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             
             // okay, we found it, close all the skipped tags
             // note that skipped tags contains the element we need closed
-            $size = count($skipped_tags);
-            for ($i = $size - 1; $i > 0; $i--) {
-                if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
+            for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
+                if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
                 }
-                $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+                $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
+                foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+                    $this->injectors[$j]->notifyEnd($new_token);
+                }
             }
             
-            $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
-            
         }
         
         $context->destroy('CurrentNesting');
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
         $context->destroy('InputIndex');
         $context->destroy('CurrentToken');
         
-        // we're at the end now, fix all still unclosed tags
-        // not using processToken() because at this point we don't
-        // care about current nesting
+        // we're at the end now, fix all still unclosed tags (this is
+        // duplicated from the end of the loop with some slight modifications)
+        // not using $skipped_tags since it would invariably be all of them
         if (!empty($this->currentNesting)) {
-            $size = count($this->currentNesting);
-            for ($i = $size - 1; $i >= 0; $i--) {
+            for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
                 if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
                 }
-                $result[] =
-                    new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+                $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
+                foreach ($this->injectors as $j => $x) { // $j, not $i!!!
+                    $this->injectors[$j]->notifyEnd($new_token);
+                }
             }
         }
         
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
             
             // adjust the injector skips based on the array substitution
             if ($this->injectors) {
-                $offset = count($token) + 1;
+                $offset = count($token);
                 for ($i = 0; $i <= $this->currentInjector; $i++) {
+                    // because of the skip back, we need to add one more
+                    // for uninitialized injectors. I'm not exactly
+                    // sure why this is the case, but I think it has to
+                    // do with the fact that we're decrementing skips
+                    // before re-checking text
+                    if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
                     $this->injectors[$i]->skip += $offset;
                 }
             }
diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
index 2c280b23..5d26e4f5 100644
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                     // mostly everything's good, but
                     // we need to make sure required attributes are in order
                     if (
+                        ($token->type === 'start' || $token->type === 'empty') &&
                         $definition->info[$token->name]->required_attr &&
                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
                     ) {
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
                         $token->armor['ValidateAttributes'] = true;
                     }
                     
-                    // CAN BE GENERICIZED
                     if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
                         $textify_comments = $token->name;
                     } elseif ($token->name === $textify_comments && $token->type == 'end') {
diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php
index 869f3fab..6debcc33 100644
--- a/library/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
 
 require_once 'HTMLPurifier/AttrValidator.php';
 
-HTMLPurifier_ConfigSchema::define(
-    'Attr', 'IDBlacklist', array(), 'list',
-    'Array of IDs not allowed in the document.');
-
 /**
  * Validate all attributes in the tokens.
  */
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
     
     function execute($tokens, $config, &$context) {
         
-        // setup id_accumulator context
-        $id_accumulator = new HTMLPurifier_IDAccumulator();
-        $id_accumulator->load($config->get('Attr', 'IDBlacklist'));
-        $context->register('IDAccumulator', $id_accumulator);
-        
         // setup validator
         $validator = new HTMLPurifier_AttrValidator();
         
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
             
             $tokens[$key] = $token; // for PHP 4
         }
-        
-        $context->destroy('IDAccumulator');
         $context->destroy('CurrentToken');
         
         return $tokens;
diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php
index e0066f3b..ca000ea5 100644
--- a/library/HTMLPurifier/URIFilter.php
+++ b/library/HTMLPurifier/URIFilter.php
@@ -1,10 +1,22 @@
 <?php
 
 /**
- * Chainable filters for custom URI processing 
+ * Chainable filters for custom URI processing.
+ * 
+ * These filters can perform custom actions on a URI filter object,
+ * including transformation or blacklisting.
+ * 
+ * @warning This filter is called before scheme object validation occurs.
+ *          Make sure, if you require a specific scheme object, you
+ *          you check that it exists. This allows filters to convert
+ *          proprietary URI schemes into regular ones.
  */
 class HTMLPurifier_URIFilter
 {
+    
+    /**
+     * Unique identifier of filter
+     */
     var $name;
     
     /**
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
      * @param &$uri Reference to URI object
      * @param $config Instance of HTMLPurifier_Config
      * @param &$context Instance of HTMLPurifier_Context
+     * @return bool Whether or not to continue processing: false indicates
+     *         URL is no good, true indicates continue processing. Note that
+     *         all changes are committed directly on the URI object
      */
     function filter(&$uri, $config, &$context) {
         trigger_error('Cannot call abstract function', E_USER_ERROR);
     }
+    
 }
diff --git a/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
index 9935dc6e..8fe4f73e 100644
--- a/library/HTMLPurifier/URIFilter/MakeAbsolute.php
+++ b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
             // absolute URI already: don't change
             if (!is_null($uri->host)) return true;
             $scheme_obj = $uri->getSchemeObj($config, $context);
+            if (!$scheme_obj) {
+                // scheme not recognized
+                return false;
+            }
             if (!$scheme_obj->hierarchical) {
                 // non-hierarchal URI with explicit scheme, don't change
                 return true;
diff --git a/maintenance/PH5P.patch b/maintenance/PH5P.patch
index 37e4dbf1..9365cffe 100644
--- a/maintenance/PH5P.patch
+++ b/maintenance/PH5P.patch
@@ -1,5 +1,5 @@
---- old.php	2007-08-19 14:42:33.640625000 -0400
-+++ new.php	2007-08-19 14:41:51.609375000 -0400
+--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php	2007-11-04 23:41:49.074543700 -0500
++++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php	2007-11-05 00:23:52.839543700 -0500
 @@ -211,7 +211,10 @@
          // If nothing is returned, emit a U+0026 AMPERSAND character token.
          // Otherwise, emit the character token that was returned.
@@ -43,3 +43,22 @@
                          $entity = $id;
                          break;
                      }
+@@ -3659,7 +3668,7 @@
+         }
+     }
+ 
+-    private function generateImpliedEndTags(array $exclude = array()) {
++    private function generateImpliedEndTags($exclude = array()) {
+         /* When the steps below require the UA to generate implied end tags,
+         then, if the current node is a dd element, a dt element, an li element,
+         a p element, a td element, a th  element, or a tr element, the UA must
+@@ -3673,7 +3682,8 @@
+         }
+     }
+ 
+-    private function getElementCategory($name) {
++    private function getElementCategory($node) {
++        $name = $node->tagName;
+         if(in_array($name, $this->special))
+             return self::SPECIAL;
+ 
diff --git a/maintenance/PH5P.php b/maintenance/PH5P.php
new file mode 100644
index 00000000..96d0d13f
--- /dev/null
+++ b/maintenance/PH5P.php
@@ -0,0 +1,3824 @@
+<?php
+class HTML5 {
+    private $data;
+    private $char;
+    private $EOF;
+    private $state;
+    private $tree;
+    private $token;
+    private $content_model;
+    private $escape = false;
+    private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
+    'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
+    'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
+    'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
+    'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
+    'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
+    'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
+    'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
+    'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
+    'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
+    'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
+    'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
+    'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
+    'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
+    'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
+    'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
+    'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
+    'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
+    'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
+    'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
+    'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
+    'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
+    'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
+    'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
+    'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
+    'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
+    'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
+    'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
+    'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
+    'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
+    'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
+    'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
+    'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
+    'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
+    'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
+    'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
+    'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
+    'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
+    'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
+    'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
+    'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
+    'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
+
+    const PCDATA    = 0;
+    const RCDATA    = 1;
+    const CDATA     = 2;
+    const PLAINTEXT = 3;
+
+    const DOCTYPE  = 0;
+    const STARTTAG = 1;
+    const ENDTAG   = 2;
+    const COMMENT  = 3;
+    const CHARACTR = 4;
+    const EOF      = 5;
+
+    public function __construct($data) {
+        $data = str_replace("\r\n", "\n", $data);
+        $date = str_replace("\r", null, $data);
+
+        $this->data = $data;
+        $this->char = -1;
+        $this->EOF  = strlen($data);
+        $this->tree = new HTML5TreeConstructer;
+        $this->content_model = self::PCDATA;
+
+        $this->state = 'data';
+
+        while($this->state !== null) {
+            $this->{$this->state.'State'}();
+        }
+    }
+
+    public function save() {
+        return $this->tree->save();
+    }
+
+    private function char() {
+        return ($this->char < $this->EOF)
+            ? $this->data[$this->char]
+            : false;
+    }
+
+    private function character($s, $l = 0) {
+        if($s + $l < $this->EOF) {
+            if($l === 0) {
+                return $this->data[$s];
+            } else {
+                return substr($this->data, $s, $l);
+            }
+        }
+    }
+
+    private function characters($char_class, $start) {
+        return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
+    }
+
+    private function dataState() {
+        // Consume the next input character
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
+            /* U+0026 AMPERSAND (&)
+            When the content model flag is set to one of the PCDATA or RCDATA
+            states: switch to the entity data state. Otherwise: treat it as per
+            the "anything else"    entry below. */
+            $this->state = 'entityData';
+
+        } elseif($char === '-') {
+            /* If the content model flag is set to either the RCDATA state or
+            the CDATA state, and the escape flag is false, and there are at
+            least three characters before this one in the input stream, and the
+            last four characters in the input stream, including this one, are
+            U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
+            and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
+            if(($this->content_model === self::RCDATA || $this->content_model ===
+            self::CDATA) && $this->escape === false &&
+            $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') {
+                $this->escape = true;
+            }
+
+            /* In any case, emit the input character as a character token. Stay
+            in the data state. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+        /* U+003C LESS-THAN SIGN (<) */
+        } elseif($char === '<' && ($this->content_model === self::PCDATA ||
+        (($this->content_model === self::RCDATA ||
+        $this->content_model === self::CDATA) && $this->escape === false))) {
+            /* When the content model flag is set to the PCDATA state: switch
+            to the tag open state.
+
+            When the content model flag is set to either the RCDATA state or
+            the CDATA state and the escape flag is false: switch to the tag
+            open state.
+
+            Otherwise: treat it as per the "anything else" entry below. */
+            $this->state = 'tagOpen';
+
+        /* U+003E GREATER-THAN SIGN (>) */
+        } elseif($char === '>') {
+            /* If the content model flag is set to either the RCDATA state or
+            the CDATA state, and the escape flag is true, and the last three
+            characters in the input stream including this one are U+002D
+            HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
+            set the escape flag to false. */
+            if(($this->content_model === self::RCDATA ||
+            $this->content_model === self::CDATA) && $this->escape === true &&
+            $this->character($this->char, 3) === '-->') {
+                $this->escape = false;
+            }
+
+            /* In any case, emit the input character as a character token.
+            Stay in the data state. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Emit an end-of-file token. */
+            $this->EOF();
+
+        } elseif($this->content_model === self::PLAINTEXT) {
+            /* When the content model flag is set to the PLAINTEXT state
+            THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
+            the text and emit it as a character token. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => substr($this->data, $this->char)
+            ));
+
+            $this->EOF();
+
+        } else {
+            /* Anything else
+            THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
+            otherwise would also be treated as a character token and emit it
+            as a single character token. Stay in the data state. */
+            $len  = strcspn($this->data, '<&', $this->char);
+            $char = substr($this->data, $this->char, $len);
+            $this->char += $len - 1;
+
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => $char
+            ));
+
+            $this->state = 'data';
+        }
+    }
+
+    private function entityDataState() {
+        // Attempt to consume an entity.
+        $entity = $this->entity();
+
+        // If nothing is returned, emit a U+0026 AMPERSAND character token.
+        // Otherwise, emit the character token that was returned.
+        $char = (!$entity) ? '&' : $entity;
+        $this->emitToken($char);
+
+        // Finally, switch to the data state.
+        $this->state = 'data';
+    }
+
+    private function tagOpenState() {
+        switch($this->content_model) {
+            case self::RCDATA:
+            case self::CDATA:
+                /* If the next input character is a U+002F SOLIDUS (/) character,
+                consume it and switch to the close tag open state. If the next
+                input character is not a U+002F SOLIDUS (/) character, emit a
+                U+003C LESS-THAN SIGN character token and switch to the data
+                state to process the next input character. */
+                if($this->character($this->char + 1) === '/') {
+                    $this->char++;
+                    $this->state = 'closeTagOpen';
+
+                } else {
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<'
+                    ));
+
+                    $this->state = 'data';
+                }
+            break;
+
+            case self::PCDATA:
+                // If the content model flag is set to the PCDATA state
+                // Consume the next input character:
+                $this->char++;
+                $char = $this->char();
+
+                if($char === '!') {
+                    /* U+0021 EXCLAMATION MARK (!)
+                    Switch to the markup declaration open state. */
+                    $this->state = 'markupDeclarationOpen';
+
+                } elseif($char === '/') {
+                    /* U+002F SOLIDUS (/)
+                    Switch to the close tag open state. */
+                    $this->state = 'closeTagOpen';
+
+                } elseif(preg_match('/^[A-Za-z]$/', $char)) {
+                    /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+                    Create a new start tag token, set its tag name to the lowercase
+                    version of the input character (add 0x0020 to the character's code
+                    point), then switch to the tag name state. (Don't emit the token
+                    yet; further details will be filled in before it is emitted.) */
+                    $this->token = array(
+                        'name'  => strtolower($char),
+                        'type'  => self::STARTTAG,
+                        'attr'  => array()
+                    );
+
+                    $this->state = 'tagName';
+
+                } elseif($char === '>') {
+                    /* U+003E GREATER-THAN SIGN (>)
+                    Parse error. Emit a U+003C LESS-THAN SIGN character token and a
+                    U+003E GREATER-THAN SIGN character token. Switch to the data state. */
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<>'
+                    ));
+
+                    $this->state = 'data';
+
+                } elseif($char === '?') {
+                    /* U+003F QUESTION MARK (?)
+                    Parse error. Switch to the bogus comment state. */
+                    $this->state = 'bogusComment';
+
+                } else {
+                    /* Anything else
+                    Parse error. Emit a U+003C LESS-THAN SIGN character token and
+                    reconsume the current input character in the data state. */
+                    $this->emitToken(array(
+                        'type' => self::CHARACTR,
+                        'data' => '<'
+                    ));
+
+                    $this->char--;
+                    $this->state = 'data';
+                }
+            break;
+        }
+    }
+
+    private function closeTagOpenState() {
+        $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
+        $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
+
+        if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
+        (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
+        $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
+            /* If the content model flag is set to the RCDATA or CDATA states then
+            examine the next few characters. If they do not match the tag name of
+            the last start tag token emitted (case insensitively), or if they do but
+            they are not immediately followed by one of the following characters:
+                * U+0009 CHARACTER TABULATION
+                * U+000A LINE FEED (LF)
+                * U+000B LINE TABULATION
+                * U+000C FORM FEED (FF)
+                * U+0020 SPACE
+                * U+003E GREATER-THAN SIGN (>)
+                * U+002F SOLIDUS (/)
+                * EOF
+            ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
+            token, a U+002F SOLIDUS character token, and switch to the data state
+            to process the next input character. */
+            $this->emitToken(array(
+                'type' => self::CHARACTR,
+                'data' => '</'
+            ));
+
+            $this->state = 'data';
+
+        } else {
+            /* Otherwise, if the content model flag is set to the PCDATA state,
+            or if the next few characters do match that tag name, consume the
+            next input character: */
+            $this->char++;
+            $char = $this->char();
+
+            if(preg_match('/^[A-Za-z]$/', $char)) {
+                /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+                Create a new end tag token, set its tag name to the lowercase version
+                of the input character (add 0x0020 to the character's code point), then
+                switch to the tag name state. (Don't emit the token yet; further details
+                will be filled in before it is emitted.) */
+                $this->token = array(
+                    'name'  => strtolower($char),
+                    'type'  => self::ENDTAG
+                );
+
+                $this->state = 'tagName';
+
+            } elseif($char === '>') {
+                /* U+003E GREATER-THAN SIGN (>)
+                Parse error. Switch to the data state. */
+                $this->state = 'data';
+
+            } elseif($this->char === $this->EOF) {
+                /* EOF
+                Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
+                SOLIDUS character token. Reconsume the EOF character in the data state. */
+                $this->emitToken(array(
+                    'type' => self::CHARACTR,
+                    'data' => '</'
+                ));
+
+                $this->char--;
+                $this->state = 'data';
+
+            } else {
+                /* Parse error. Switch to the bogus comment state. */
+                $this->state = 'bogusComment';
+            }
+        }
+    }
+
+    private function tagNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } elseif($char === '/') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current tag token's tag name.
+            Stay in the tag name state. */
+            $this->token['name'] .= strtolower($char);
+            $this->state = 'tagName';
+        }
+    }
+
+    private function beforeAttributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Stay in the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Start a new attribute in the current tag token. Set that attribute's
+            name to the current input character, and its value to the empty string.
+            Switch to the attribute name state. */
+            $this->token['attr'][] = array(
+                'name'  => strtolower($char),
+                'value' => null
+            );
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function attributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute name state. */
+            $this->state = 'afterAttributeName';
+
+        } elseif($char === '=') {
+            /* U+003D EQUALS SIGN (=)
+            Switch to the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the before
+            attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's name.
+            Stay in the attribute name state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['name'] .= strtolower($char);
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function afterAttributeNameState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the after attribute name state. */
+            $this->state = 'afterAttributeName';
+
+        } elseif($char === '=') {
+            /* U+003D EQUALS SIGN (=)
+            Switch to the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+            /* U+002F SOLIDUS (/)
+            Parse error unless this is a permitted slash. Switch to the
+            before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the EOF
+            character in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Start a new attribute in the current tag token. Set that attribute's
+            name to the current input character, and its value to the empty string.
+            Switch to the attribute name state. */
+            $this->token['attr'][] = array(
+                'name'  => strtolower($char),
+                'value' => null
+            );
+
+            $this->state = 'attributeName';
+        }
+    }
+
+    private function beforeAttributeValueState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Stay in the before attribute value state. */
+            $this->state = 'beforeAttributeValue';
+
+        } elseif($char === '"') {
+            /* U+0022 QUOTATION MARK (")
+            Switch to the attribute value (double-quoted) state. */
+            $this->state = 'attributeValueDoubleQuoted';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the attribute value (unquoted) state and reconsume
+            this input character. */
+            $this->char--;
+            $this->state = 'attributeValueUnquoted';
+
+        } elseif($char === '\'') {
+            /* U+0027 APOSTROPHE (')
+            Switch to the attribute value (single-quoted) state. */
+            $this->state = 'attributeValueSingleQuoted';
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Switch to the attribute value (unquoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueUnquoted';
+        }
+    }
+
+    private function attributeValueDoubleQuotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if($char === '"') {
+            /* U+0022 QUOTATION MARK (")
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState('double');
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the character
+            in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (double-quoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueDoubleQuoted';
+        }
+    }
+
+    private function attributeValueSingleQuotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if($char === '\'') {
+            /* U+0022 QUOTATION MARK (')
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState('single');
+
+        } elseif($this->char === $this->EOF) {
+            /* EOF
+            Parse error. Emit the current tag token. Reconsume the character
+            in the data state. */
+            $this->emitToken($this->token);
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (single-quoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueSingleQuoted';
+        }
+    }
+
+    private function attributeValueUnquotedState() {
+        // Consume the next input character:
+        $this->char++;
+        $char = $this->character($this->char);
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            /* U+0009 CHARACTER TABULATION
+            U+000A LINE FEED (LF)
+            U+000B LINE TABULATION
+            U+000C FORM FEED (FF)
+            U+0020 SPACE
+            Switch to the before attribute name state. */
+            $this->state = 'beforeAttributeName';
+
+        } elseif($char === '&') {
+            /* U+0026 AMPERSAND (&)
+            Switch to the entity in attribute value state. */
+            $this->entityInAttributeValueState('non');
+
+        } elseif($char === '>') {
+            /* U+003E GREATER-THAN SIGN (>)
+            Emit the current tag token. Switch to the data state. */
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } else {
+            /* Anything else
+            Append the current input character to the current attribute's value.
+            Stay in the attribute value (unquoted) state. */
+            $last = count($this->token['attr']) - 1;
+            $this->token['attr'][$last]['value'] .= $char;
+
+            $this->state = 'attributeValueUnquoted';
+        }
+    }
+
+    private function entityInAttributeValueState() {
+        // Attempt to consume an entity.
+        $entity = $this->entity();
+
+        // If nothing is returned, append a U+0026 AMPERSAND character to the
+        // current attribute's value. Otherwise, emit the character token that
+        // was returned.
+        $char = (!$entity)
+            ? '&'
+            : $entity;
+
+        $this->emitToken($char);
+    }
+
+    private function bogusCommentState() {
+        /* Consume every character up to the first U+003E GREATER-THAN SIGN
+        character (>) or the end of the file (EOF), whichever comes first. Emit
+        a comment token whose data is the concatenation of all the characters
+        starting from and including the character that caused the state machine
+        to switch into the bogus comment state, up to and including the last
+        consumed character before the U+003E character, if any, or up to the
+        end of the file otherwise. (If the comment was started by the end of
+        the file (EOF), the token is empty.) */
+        $data = $this->characters('^>', $this->char);
+        $this->emitToken(array(
+            'data' => $data,
+            'type' => self::COMMENT
+        ));
+
+        $this->char += strlen($data);
+
+        /* Switch to the data state. */
+        $this->state = 'data';
+
+        /* If the end of the file was reached, reconsume the EOF character. */
+        if($this->char === $this->EOF) {
+            $this->char = $this->EOF - 1;
+        }
+    }
+
+    private function markupDeclarationOpenState() {
+        /* If the next two characters are both U+002D HYPHEN-MINUS (-)
+        characters, consume those two characters, create a comment token whose
+        data is the empty string, and switch to the comment state. */
+        if($this->character($this->char + 1, 2) === '--') {
+            $this->char += 2;
+            $this->state = 'comment';
+            $this->token = array(
+                'data' => null,
+                'type' => self::COMMENT
+            );
+
+        /* Otherwise if the next seven chacacters are a case-insensitive match
+        for the word "DOCTYPE", then consume those characters and switch to the
+        DOCTYPE state. */
+        } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
+            $this->char += 7;
+            $this->state = 'doctype';
+
+        /* Otherwise, is is a parse error. Switch to the bogus comment state.
+        The next character that is consumed, if any, is the first character
+        that will be in the comment. */
+        } else {
+            $this->char++;
+            $this->state = 'bogusComment';
+        }
+    }
+
+    private function commentState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        /* U+002D HYPHEN-MINUS (-) */
+        if($char === '-') {
+            /* Switch to the comment dash state  */
+            $this->state = 'commentDash';
+
+        /* EOF */
+        } elseif($this->char === $this->EOF) {
+            /* Parse error. Emit the comment token. Reconsume the EOF character
+            in the data state. */
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        /* Anything else */
+        } else {
+            /* Append the input character to the comment token's data. Stay in
+            the comment state. */
+            $this->token['data'] .= $char;
+        }
+    }
+
+    private function commentDashState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        /* U+002D HYPHEN-MINUS (-) */
+        if($char === '-') {
+            /* Switch to the comment end state  */
+            $this->state = 'commentEnd';
+
+        /* EOF */
+        } elseif($this->char === $this->EOF) {
+            /* Parse error. Emit the comment token. Reconsume the EOF character
+            in the data state. */
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        /* Anything else */
+        } else {
+            /* Append a U+002D HYPHEN-MINUS (-) character and the input
+            character to the comment token's data. Switch to the comment state. */
+            $this->token['data'] .= '-'.$char;
+            $this->state = 'comment';
+        }
+    }
+
+    private function commentEndState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($char === '-') {
+            $this->token['data'] .= '-';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['data'] .= '--'.$char;
+            $this->state = 'comment';
+        }
+    }
+
+    private function doctypeState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            $this->state = 'beforeDoctypeName';
+
+        } else {
+            $this->char--;
+            $this->state = 'beforeDoctypeName';
+        }
+    }
+
+    private function beforeDoctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            // Stay in the before DOCTYPE name state.
+
+        } elseif(preg_match('/^[a-z]$/', $char)) {
+            $this->token = array(
+                'name' => strtoupper($char),
+                'type' => self::DOCTYPE,
+                'error' => true
+            );
+
+            $this->state = 'doctypeName';
+
+        } elseif($char === '>') {
+            $this->emitToken(array(
+                'name' => null,
+                'type' => self::DOCTYPE,
+                'error' => true
+            ));
+
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken(array(
+                'name' => null,
+                'type' => self::DOCTYPE,
+                'error' => true
+            ));
+
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token = array(
+                'name' => $char,
+                'type' => self::DOCTYPE,
+                'error' => true
+            );
+
+            $this->state = 'doctypeName';
+        }
+    }
+
+    private function doctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            $this->state = 'AfterDoctypeName';
+
+        } elseif($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif(preg_match('/^[a-z]$/', $char)) {
+            $this->token['name'] .= strtoupper($char);
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['name'] .= $char;
+        }
+
+        $this->token['error'] = ($this->token['name'] === 'HTML')
+            ? false
+            : true;
+    }
+
+    private function afterDoctypeNameState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+            // Stay in the DOCTYPE name state.
+
+        } elseif($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            $this->token['error'] = true;
+            $this->state = 'bogusDoctype';
+        }
+    }
+
+    private function bogusDoctypeState() {
+        /* Consume the next input character: */
+        $this->char++;
+        $char = $this->char();
+
+        if($char === '>') {
+            $this->emitToken($this->token);
+            $this->state = 'data';
+
+        } elseif($this->char === $this->EOF) {
+            $this->emitToken($this->token);
+            $this->char--;
+            $this->state = 'data';
+
+        } else {
+            // Stay in the bogus DOCTYPE state.
+        }
+    }
+
+    private function entity() {
+        $start = $this->char;
+
+        // This section defines how to consume an entity. This definition is
+        // used when parsing entities in text and in attributes.
+
+        // The behaviour depends on the identity of the next character (the
+        // one immediately after the U+0026 AMPERSAND character): 
+
+        switch($this->character($this->char + 1)) {
+            // U+0023 NUMBER SIGN (#)
+            case '#':
+
+                // The behaviour further depends on the character after the
+                // U+0023 NUMBER SIGN:
+                switch($this->character($this->char + 1)) {
+                    // U+0078 LATIN SMALL LETTER X
+                    // U+0058 LATIN CAPITAL LETTER X
+                    case 'x':
+                    case 'X':
+                        // Follow the steps below, but using the range of
+                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+                        // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
+                        // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
+                        // A, through to U+0046 LATIN CAPITAL LETTER F (in other
+                        // words, 0-9, A-F, a-f).
+                        $char = 1;
+                        $char_class = '0-9A-Fa-f';
+                    break;
+
+                    // Anything else
+                    default:
+                        // Follow the steps below, but using the range of
+                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+                        // NINE (i.e. just 0-9).
+                        $char = 0;
+                        $char_class = '0-9';
+                    break;
+                }
+
+                // Consume as many characters as match the range of characters
+                // given above.
+                $this->char++;
+                $e_name = $this->characters($char_class, $this->char + $char + 1);
+                $entity = $this->character($start, $this->char);
+                $cond = strlen($e_name) > 0;
+
+                // The rest of the parsing happens bellow.
+            break;
+
+            // Anything else
+            default:
+                // Consume the maximum number of characters possible, with the
+                // consumed characters case-sensitively matching one of the
+                // identifiers in the first column of the entities table.
+                $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
+                $len = strlen($e_name);
+
+                for($c = 1; $c <= $len; $c++) {
+                    $id = substr($e_name, 0, $c);
+                    $this->char++;
+
+                    if(in_array($id, $this->entities)) {
+                        $entity = $id;
+                        break;
+                    }
+                }
+
+                $cond = isset($entity);
+                // The rest of the parsing happens bellow.
+            break;
+        }
+
+        if(!$cond) {
+            // If no match can be made, then this is a parse error. No
+            // characters are consumed, and nothing is returned.
+            $this->char = $start;
+            return false;
+        }
+
+        // Return a character token for the character corresponding to the
+        // entity name (as given by the second column of the entities table).
+        return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
+    }
+
+    private function emitToken($token) {
+        $emit = $this->tree->emitToken($token);
+
+        if(is_int($emit)) {
+            $this->content_model = $emit;
+
+        } elseif($token['type'] === self::ENDTAG) {
+            $this->content_model = self::PCDATA;
+        }
+    }
+
+    private function EOF() {
+        $this->state = null;
+        $this->tree->emitToken(array(
+            'type' => self::EOF
+        ));
+    }
+}
+
+class HTML5TreeConstructer {
+    public $stack = array();
+
+    private $phase;
+    private $mode;
+    private $dom;
+    private $foster_parent = null;
+    private $a_formatting  = array();
+
+    private $head_pointer = null;
+    private $form_pointer = null;
+
+    private $scoping = array('button','caption','html','marquee','object','table','td','th');
+    private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
+    private $special = array('address','area','base','basefont','bgsound',
+    'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
+    'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
+    'h6','head','hr','iframe','image','img','input','isindex','li','link',
+    'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
+    'option','p','param','plaintext','pre','script','select','spacer','style',
+    'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
+
+    // The different phases.
+    const INIT_PHASE = 0;
+    const ROOT_PHASE = 1;
+    const MAIN_PHASE = 2;
+    const END_PHASE  = 3;
+
+    // The different insertion modes for the main phase.
+    const BEFOR_HEAD = 0;
+    const IN_HEAD    = 1;
+    const AFTER_HEAD = 2;
+    const IN_BODY    = 3;
+    const IN_TABLE   = 4;
+    const IN_CAPTION = 5;
+    const IN_CGROUP  = 6;
+    const IN_TBODY   = 7;
+    const IN_ROW     = 8;
+    const IN_CELL    = 9;
+    const IN_SELECT  = 10;
+    const AFTER_BODY = 11;
+    const IN_FRAME   = 12;
+    const AFTR_FRAME = 13;
+
+    // The different types of elements.
+    const SPECIAL    = 0;
+    const SCOPING    = 1;
+    const FORMATTING = 2;
+    const PHRASING   = 3;
+
+    const MARKER     = 0;
+
+    public function __construct() {
+        $this->phase = self::INIT_PHASE;
+        $this->mode = self::BEFOR_HEAD;
+        $this->dom = new DOMDocument;
+
+        $this->dom->encoding = 'UTF-8';
+        $this->dom->preserveWhiteSpace = true;
+        $this->dom->substituteEntities = true;
+        $this->dom->strictErrorChecking = false;
+    }
+
+    // Process tag tokens
+    public function emitToken($token) {
+        switch($this->phase) {
+            case self::INIT_PHASE: return $this->initPhase($token); break;
+            case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
+            case self::MAIN_PHASE: return $this->mainPhase($token); break;
+            case self::END_PHASE : return $this->trailingEndPhase($token); break;
+        }
+    }
+
+    private function initPhase($token) {
+        /* Initially, the tree construction stage must handle each token
+        emitted from the tokenisation stage as follows: */
+
+        /* A DOCTYPE token that is marked as being in error
+        A comment token
+        A start tag token
+        An end tag token
+        A character token that is not one of one of U+0009 CHARACTER TABULATION,
+            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+            or U+0020 SPACE
+        An end-of-file token */
+        if((isset($token['error']) && $token['error']) ||
+        $token['type'] === HTML5::COMMENT ||
+        $token['type'] === HTML5::STARTTAG ||
+        $token['type'] === HTML5::ENDTAG ||
+        $token['type'] === HTML5::EOF ||
+        ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
+        !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
+            /* This specification does not define how to handle this case. In
+            particular, user agents may ignore the entirety of this specification
+            altogether for such documents, and instead invoke special parse modes
+            with a greater emphasis on backwards compatibility. */
+
+            $this->phase = self::ROOT_PHASE;
+            return $this->rootElementPhase($token);
+
+        /* A DOCTYPE token marked as being correct */
+        } elseif(isset($token['error']) && !$token['error']) {
+            /* Append a DocumentType node to the Document  node, with the name
+            attribute set to the name given in the DOCTYPE token (which will be
+            "HTML"), and the other attributes specific to DocumentType objects
+            set to null, empty lists, or the empty string as appropriate. */
+            $doctype = new DOMDocumentType(null, null, 'HTML');
+
+            /* Then, switch to the root element phase of the tree construction
+            stage. */
+            $this->phase = self::ROOT_PHASE;
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
+        $token['data'])) {
+            /* Append that character  to the Document node. */
+            $text = $this->dom->createTextNode($token['data']);
+            $this->dom->appendChild($text);
+        }
+    }
+
+    private function rootElementPhase($token) {
+        /* After the initial phase, as each token is emitted from the tokenisation
+        stage, it must be processed as described in this section. */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the Document object with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->dom->appendChild($comment);
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append that character  to the Document node. */
+            $text = $this->dom->createTextNode($token['data']);
+            $this->dom->appendChild($text);
+
+        /* A character token that is not one of U+0009 CHARACTER TABULATION,
+            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
+            (FF), or U+0020 SPACE
+        A start tag token
+        An end tag token
+        An end-of-file token */
+        } elseif(($token['type'] === HTML5::CHARACTR &&
+        !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+        $token['type'] === HTML5::STARTTAG ||
+        $token['type'] === HTML5::ENDTAG ||
+        $token['type'] === HTML5::EOF) {
+            /* Create an HTMLElement node with the tag name html, in the HTML
+            namespace. Append it to the Document object. Switch to the main
+            phase and reprocess the current token. */
+            $html = $this->dom->createElement('html');
+            $this->dom->appendChild($html);
+            $this->stack[] = $html;
+
+            $this->phase = self::MAIN_PHASE;
+            return $this->mainPhase($token);
+        }
+    }
+
+    private function mainPhase($token) {
+        /* Tokens in the main phase must be handled as follows: */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A start tag token with the tag name "html" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
+            /* If this start tag token was not the first start tag token, then
+            it is a parse error. */
+
+            /* For each attribute on the token, check to see if the attribute
+            is already present on the top element of the stack of open elements.
+            If it is not, add the attribute and its corresponding value to that
+            element. */
+            foreach($token['attr'] as $attr) {
+                if(!$this->stack[0]->hasAttribute($attr['name'])) {
+                    $this->stack[0]->setAttribute($attr['name'], $attr['value']);
+                }
+            }
+
+        /* An end-of-file token */
+        } elseif($token['type'] === HTML5::EOF) {
+            /* Generate implied end tags. */
+            $this->generateImpliedEndTags();
+
+        /* Anything else. */
+        } else {
+            /* Depends on the insertion mode: */
+            switch($this->mode) {
+                case self::BEFOR_HEAD: return $this->beforeHead($token); break;
+                case self::IN_HEAD:    return $this->inHead($token); break;
+                case self::AFTER_HEAD: return $this->afterHead($token); break;
+                case self::IN_BODY:    return $this->inBody($token); break;
+                case self::IN_TABLE:   return $this->inTable($token); break;
+                case self::IN_CAPTION: return $this->inCaption($token); break;
+                case self::IN_CGROUP:  return $this->inColumnGroup($token); break;
+                case self::IN_TBODY:   return $this->inTableBody($token); break;
+                case self::IN_ROW:     return $this->inRow($token); break;
+                case self::IN_CELL:    return $this->inCell($token); break;
+                case self::IN_SELECT:  return $this->inSelect($token); break;
+                case self::AFTER_BODY: return $this->afterBody($token); break;
+                case self::IN_FRAME:   return $this->inFrameset($token); break;
+                case self::AFTR_FRAME: return $this->afterFrameset($token); break;
+                case self::END_PHASE:  return $this->trailingEndPhase($token); break;
+            }
+        }
+    }
+
+    private function beforeHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token with the tag name "head" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
+            /* Create an element for the token, append the new element to the
+            current node and push it onto the stack of open elements. */
+            $element = $this->insertElement($token);
+
+            /* Set the head element pointer to this new element node. */
+            $this->head_pointer = $element;
+
+            /* Change the insertion mode to "in head". */
+            $this->mode = self::IN_HEAD;
+
+        /* A start tag token whose tag name is one of: "base", "link", "meta",
+        "script", "style", "title". Or an end tag with the tag name "html".
+        Or a character token that is not one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE. Or any other start tag token */
+        } elseif($token['type'] === HTML5::STARTTAG ||
+        ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
+        ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
+        $token['data']))) {
+            /* Act as if a start tag token with the tag name "head" and no
+            attributes had been seen, then reprocess the current token. */
+            $this->beforeHead(array(
+                'name' => 'head',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inHead($token);
+
+        /* Any other end tag */
+        } elseif($token['type'] === HTML5::ENDTAG) {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function inHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE.
+
+        THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
+        or script element, append the character to the current node regardless
+        of its content. */
+        if(($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
+        $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
+        array('title', 'style', 'script')))) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('title', 'style', 'script'))) {
+            array_pop($this->stack);
+            return HTML5::PCDATA;
+
+        /* A start tag with the tag name "title" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+
+            } else {
+                $element = $this->insertElement($token);
+            }
+
+            /* Switch the tokeniser's content model flag  to the RCDATA state. */
+            return HTML5::RCDATA;
+
+        /* A start tag with the tag name "style" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+
+            } else {
+                $this->insertElement($token);
+            }
+
+            /* Switch the tokeniser's content model flag  to the CDATA state. */
+            return HTML5::CDATA;
+
+        /* A start tag with the tag name "script" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
+            /* Create an element for the token. */
+            $element = $this->insertElement($token, false);
+            $this->head_pointer->appendChild($element);
+
+            /* Switch the tokeniser's content model flag  to the CDATA state. */
+            return HTML5::CDATA;
+
+        /* A start tag with the tag name "base", "link", or "meta" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('base', 'link', 'meta'))) {
+            /* Create an element for the token and append the new element to the
+            node pointed to by the head element pointer, or, if that is null
+            (innerHTML case), to the current node. */
+            if($this->head_pointer !== null) {
+                $element = $this->insertElement($token, false);
+                $this->head_pointer->appendChild($element);
+                array_pop($this->stack);
+
+            } else {
+                $this->insertElement($token);
+            }
+
+        /* An end tag with the tag name "head" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
+            /* If the current node is a head element, pop the current node off
+            the stack of open elements. */
+            if($this->head_pointer->isSameNode(end($this->stack))) {
+                array_pop($this->stack);
+
+            /* Otherwise, this is a parse error. */
+            } else {
+                // k
+            }
+
+            /* Change the insertion mode to "after head". */
+            $this->mode = self::AFTER_HEAD;
+
+        /* A start tag with the tag name "head" or an end tag except "html". */
+        } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
+        ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* If the current node is a head element, act as if an end tag
+            token with the tag name "head" had been seen. */
+            if($this->head_pointer->isSameNode(end($this->stack))) {
+                $this->inHead(array(
+                    'name' => 'head',
+                    'type' => HTML5::ENDTAG
+                ));
+
+            /* Otherwise, change the insertion mode to "after head". */
+            } else {
+                $this->mode = self::AFTER_HEAD;
+            }
+
+            /* Then, reprocess the current token. */
+            return $this->afterHead($token);
+        }
+    }
+
+    private function afterHead($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data attribute
+            set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token with the tag name "body" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
+            /* Insert a body element for the token. */
+            $this->insertElement($token);
+
+            /* Change the insertion mode to "in body". */
+            $this->mode = self::IN_BODY;
+
+        /* A start tag token with the tag name "frameset" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
+            /* Insert a frameset element for the token. */
+            $this->insertElement($token);
+
+            /* Change the insertion mode to "in frameset". */
+            $this->mode = self::IN_FRAME;
+
+        /* A start tag token whose tag name is one of: "base", "link", "meta",
+        "script", "style", "title" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('base', 'link', 'meta', 'script', 'style', 'title'))) {
+            /* Parse error. Switch the insertion mode back to "in head" and
+            reprocess the token. */
+            $this->mode = self::IN_HEAD;
+            return $this->inHead($token);
+
+        /* Anything else */
+        } else {
+            /* Act as if a start tag token with the tag name "body" and no
+            attributes had been seen, and then reprocess the current token. */
+            $this->afterHead(array(
+                'name' => 'body',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inBody($token);
+        }
+    }
+
+    private function inBody($token) {
+        /* Handle the token as follows: */
+
+        switch($token['type']) {
+            /* A character token */
+            case HTML5::CHARACTR:
+                /* Reconstruct the active formatting elements, if any. */
+                $this->reconstructActiveFormattingElements();
+
+                /* Append the token's character to the current node. */
+                $this->insertText($token['data']);
+            break;
+
+            /* A comment token */
+            case HTML5::COMMENT:
+                /* Append a Comment node to the current node with the data
+                attribute set to the data given in the comment token. */
+                $this->insertComment($token['data']);
+            break;
+
+            case HTML5::STARTTAG:
+            switch($token['name']) {
+                /* A start tag token whose tag name is one of: "script",
+                "style" */
+                case 'script': case 'style':
+                    /* Process the token as if the insertion mode had been "in
+                    head". */
+                    return $this->inHead($token);
+                break;
+
+                /* A start tag token whose tag name is one of: "base", "link",
+                "meta", "title" */
+                case 'base': case 'link': case 'meta': case 'title':
+                    /* Parse error. Process the token as if the insertion mode
+                    had    been "in head". */
+                    return $this->inHead($token);
+                break;
+
+                /* A start tag token with the tag name "body" */
+                case 'body':
+                    /* Parse error. If the second element on the stack of open
+                    elements is not a body element, or, if the stack of open
+                    elements has only one node on it, then ignore the token.
+                    (innerHTML case) */
+                    if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
+                        // Ignore
+
+                    /* Otherwise, for each attribute on the token, check to see
+                    if the attribute is already present on the body element (the
+                    second element)    on the stack of open elements. If it is not,
+                    add the attribute and its corresponding value to that
+                    element. */
+                    } else {
+                        foreach($token['attr'] as $attr) {
+                            if(!$this->stack[1]->hasAttribute($attr['name'])) {
+                                $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+                            }
+                        }
+                    }
+                break;
+
+                /* A start tag whose tag name is one of: "address",
+                "blockquote", "center", "dir", "div", "dl", "fieldset",
+                "listing", "menu", "ol", "p", "ul" */
+                case 'address': case 'blockquote': case 'center': case 'dir':
+                case 'div': case 'dl': case 'fieldset': case 'listing':
+                case 'menu': case 'ol': case 'p': case 'ul':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag whose tag name is "form" */
+                case 'form':
+                    /* If the form element pointer is not null, ignore the
+                    token with a parse error. */
+                    if($this->form_pointer !== null) {
+                        // Ignore.
+
+                    /* Otherwise: */
+                    } else {
+                        /* If the stack of open elements has a p element in
+                        scope, then act as if an end tag with the tag name p
+                        had been seen. */
+                        if($this->elementInScope('p')) {
+                            $this->emitToken(array(
+                                'name' => 'p',
+                                'type' => HTML5::ENDTAG
+                            ));
+                        }
+
+                        /* Insert an HTML element for the token, and set the
+                        form element pointer to point to the element created. */
+                        $element = $this->insertElement($token);
+                        $this->form_pointer = $element;
+                    }
+                break;
+
+                /* A start tag whose tag name is "li", "dd" or "dt" */
+                case 'li': case 'dd': case 'dt':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    $stack_length = count($this->stack) - 1;
+
+                    for($n = $stack_length; 0 <= $n; $n--) {
+                        /* 1. Initialise node to be the current node (the
+                        bottommost node of the stack). */
+                        $stop = false;
+                        $node = $this->stack[$n];
+                        $cat  = $this->getElementCategory($node->tagName);
+
+                        /* 2. If node is an li, dd or dt element, then pop all
+                        the    nodes from the current node up to node, including
+                        node, then stop this algorithm. */
+                        if($token['name'] === $node->tagName ||    ($token['name'] !== 'li'
+                        && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
+                            for($x = $stack_length; $x >= $n ; $x--) {
+                                array_pop($this->stack);
+                            }
+
+                            break;
+                        }
+
+                        /* 3. If node is not in the formatting category, and is
+                        not    in the phrasing category, and is not an address or
+                        div element, then stop this algorithm. */
+                        if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
+                        $node->tagName !== 'address' && $node->tagName !== 'div') {
+                            break;
+                        }
+                    }
+
+                    /* Finally, insert an HTML element with the same tag
+                    name as the    token's. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag token whose tag name is "plaintext" */
+                case 'plaintext':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been
+                    seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    return HTML5::PLAINTEXT;
+                break;
+
+                /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+                "h5", "h6" */
+                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+                    /* If the stack of open elements has a p  element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* If the stack of open elements has in scope an element whose
+                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+                    this is a parse error; pop elements from the stack until an
+                    element with one of those tag names has been popped from the
+                    stack. */
+                    while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
+                        array_pop($this->stack);
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+                break;
+
+                /* A start tag whose tag name is "a" */
+                case 'a':
+                    /* If the list of active formatting elements contains
+                    an element whose tag name is "a" between the end of the
+                    list and the last marker on the list (or the start of
+                    the list if there is no marker on the list), then this
+                    is a parse error; act as if an end tag with the tag name
+                    "a" had been seen, then remove that element from the list
+                    of active formatting elements and the stack of open
+                    elements if the end tag didn't already remove it (it
+                    might not have if the element is not in table scope). */
+                    $leng = count($this->a_formatting);
+
+                    for($n = $leng - 1; $n >= 0; $n--) {
+                        if($this->a_formatting[$n] === self::MARKER) {
+                            break;
+
+                        } elseif($this->a_formatting[$n]->nodeName === 'a') {
+                            $this->emitToken(array(
+                                'name' => 'a',
+                                'type' => HTML5::ENDTAG
+                            ));
+                            break;
+                        }
+                    }
+
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $el = $this->insertElement($token);
+
+                    /* Add that element to the list of active formatting
+                    elements. */
+                    $this->a_formatting[] = $el;
+                break;
+
+                /* A start tag whose tag name is one of: "b", "big", "em", "font",
+                "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+                case 'b': case 'big': case 'em': case 'font': case 'i':
+                case 'nobr': case 's': case 'small': case 'strike':
+                case 'strong': case 'tt': case 'u':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $el = $this->insertElement($token);
+
+                    /* Add that element to the list of active formatting
+                    elements. */
+                    $this->a_formatting[] = $el;
+                break;
+
+                /* A start tag token whose tag name is "button" */
+                case 'button':
+                    /* If the stack of open elements has a button element in scope,
+                    then this is a parse error; act as if an end tag with the tag
+                    name "button" had been seen, then reprocess the token. (We don't
+                    do that. Unnecessary.) */
+                    if($this->elementInScope('button')) {
+                        $this->inBody(array(
+                            'name' => 'button',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Insert a marker at the end of the list of active
+                    formatting elements. */
+                    $this->a_formatting[] = self::MARKER;
+                break;
+
+                /* A start tag token whose tag name is one of: "marquee", "object" */
+                case 'marquee': case 'object':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Insert a marker at the end of the list of active
+                    formatting elements. */
+                    $this->a_formatting[] = self::MARKER;
+                break;
+
+                /* A start tag token whose tag name is "xmp" */
+                case 'xmp':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Switch the content model flag to the CDATA state. */
+                    return HTML5::CDATA;
+                break;
+
+                /* A start tag whose tag name is "table" */
+                case 'table':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Change the insertion mode to "in table". */
+                    $this->mode = self::IN_TABLE;
+                break;
+
+                /* A start tag whose tag name is one of: "area", "basefont",
+                "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
+                case 'area': case 'basefont': case 'bgsound': case 'br':
+                case 'embed': case 'img': case 'param': case 'spacer':
+                case 'wbr':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Immediately pop the current node off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "hr" */
+                case 'hr':
+                    /* If the stack of open elements has a p element in scope,
+                    then act as if an end tag with the tag name p had been seen. */
+                    if($this->elementInScope('p')) {
+                        $this->emitToken(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Immediately pop the current node off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "image" */
+                case 'image':
+                    /* Parse error. Change the token's tag name to "img" and
+                    reprocess it. (Don't ask.) */
+                    $token['name'] = 'img';
+                    return $this->inBody($token);
+                break;
+
+                /* A start tag whose tag name is "input" */
+                case 'input':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an input element for the token. */
+                    $element = $this->insertElement($token, false);
+
+                    /* If the form element pointer is not null, then associate the
+                    input element with the form element pointed to by the form
+                    element pointer. */
+                    $this->form_pointer !== null
+                        ? $this->form_pointer->appendChild($element)
+                        : end($this->stack)->appendChild($element);
+
+                    /* Pop that input element off the stack of open elements. */
+                    array_pop($this->stack);
+                break;
+
+                /* A start tag whose tag name is "isindex" */
+                case 'isindex':
+                    /* Parse error. */
+                    // w/e
+
+                    /* If the form element pointer is not null,
+                    then ignore the token. */
+                    if($this->form_pointer === null) {
+                        /* Act as if a start tag token with the tag name "form" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'body',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "hr" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'hr',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "p" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'p',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a start tag token with the tag name "label"
+                        had been seen. */
+                        $this->inBody(array(
+                            'name' => 'label',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => array()
+                        ));
+
+                        /* Act as if a stream of character tokens had been seen. */
+                        $this->insertText('This is a searchable index. '.
+                        'Insert your search keywords here: ');
+
+                        /* Act as if a start tag token with the tag name "input"
+                        had been seen, with all the attributes from the "isindex"
+                        token, except with the "name" attribute set to the value
+                        "isindex" (ignoring any explicit "name" attribute). */
+                        $attr = $token['attr'];
+                        $attr[] = array('name' => 'name', 'value' => 'isindex');
+
+                        $this->inBody(array(
+                            'name' => 'input',
+                            'type' => HTML5::STARTTAG,
+                            'attr' => $attr
+                        ));
+
+                        /* Act as if a stream of character tokens had been seen
+                        (see below for what they should say). */
+                        $this->insertText('This is a searchable index. '.
+                        'Insert your search keywords here: ');
+
+                        /* Act as if an end tag token with the tag name "label"
+                        had been seen. */
+                        $this->inBody(array(
+                            'name' => 'label',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if an end tag token with the tag name "p" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'p',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if a start tag token with the tag name "hr" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'hr',
+                            'type' => HTML5::ENDTAG
+                        ));
+
+                        /* Act as if an end tag token with the tag name "form" had
+                        been seen. */
+                        $this->inBody(array(
+                            'name' => 'form',
+                            'type' => HTML5::ENDTAG
+                        ));
+                    }
+                break;
+
+                /* A start tag whose tag name is "textarea" */
+                case 'textarea':
+                    $this->insertElement($token);
+
+                    /* Switch the tokeniser's content model flag to the
+                    RCDATA state. */
+                    return HTML5::RCDATA;
+                break;
+
+                /* A start tag whose tag name is one of: "iframe", "noembed",
+                "noframes" */
+                case 'iframe': case 'noembed': case 'noframes':
+                    $this->insertElement($token);
+
+                    /* Switch the tokeniser's content model flag to the CDATA state. */
+                    return HTML5::CDATA;
+                break;
+
+                /* A start tag whose tag name is "select" */
+                case 'select':
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    /* Insert an HTML element for the token. */
+                    $this->insertElement($token);
+
+                    /* Change the insertion mode to "in select". */
+                    $this->mode = self::IN_SELECT;
+                break;
+
+                /* A start or end tag whose tag name is one of: "caption", "col",
+                "colgroup", "frame", "frameset", "head", "option", "optgroup",
+                "tbody", "td", "tfoot", "th", "thead", "tr". */
+                case 'caption': case 'col': case 'colgroup': case 'frame':
+                case 'frameset': case 'head': case 'option': case 'optgroup':
+                case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
+                case 'tr':
+                    // Parse error. Ignore the token.
+                break;
+
+                /* A start or end tag whose tag name is one of: "event-source",
+                "section", "nav", "article", "aside", "header", "footer",
+                "datagrid", "command" */
+                case 'event-source': case 'section': case 'nav': case 'article':
+                case 'aside': case 'header': case 'footer': case 'datagrid':
+                case 'command':
+                    // Work in progress!
+                break;
+
+                /* A start tag token not covered by the previous entries */
+                default:
+                    /* Reconstruct the active formatting elements, if any. */
+                    $this->reconstructActiveFormattingElements();
+
+                    $this->insertElement($token);
+                break;
+            }
+            break;
+
+            case HTML5::ENDTAG:
+            switch($token['name']) {
+                /* An end tag with the tag name "body" */
+                case 'body':
+                    /* If the second element in the stack of open elements is
+                    not a body element, this is a parse error. Ignore the token.
+                    (innerHTML case) */
+                    if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
+                        // Ignore.
+
+                    /* If the current node is not the body element, then this
+                    is a parse error. */
+                    } elseif(end($this->stack)->nodeName !== 'body') {
+                        // Parse error.
+                    }
+
+                    /* Change the insertion mode to "after body". */
+                    $this->mode = self::AFTER_BODY;
+                break;
+
+                /* An end tag with the tag name "html" */
+                case 'html':
+                    /* Act as if an end tag with tag name "body" had been seen,
+                    then, if that token wasn't ignored, reprocess the current
+                    token. */
+                    $this->inBody(array(
+                        'name' => 'body',
+                        'type' => HTML5::ENDTAG
+                    ));
+
+                    return $this->afterBody($token);
+                break;
+
+                /* An end tag whose tag name is one of: "address", "blockquote",
+                "center", "dir", "div", "dl", "fieldset", "listing", "menu",
+                "ol", "pre", "ul" */
+                case 'address': case 'blockquote': case 'center': case 'dir':
+                case 'div': case 'dl': case 'fieldset': case 'listing':
+                case 'menu': case 'ol': case 'pre': case 'ul':
+                    /* If the stack of open elements has an element in scope
+                    with the same tag name as that of the token, then generate
+                    implied end tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with
+                        the same tag name as that of the token, then this
+                        is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has an element in
+                        scope with the same tag name as that of the token,
+                        then pop elements from this stack until an element
+                        with that tag name has been popped from the stack. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is "form" */
+                case 'form':
+                    /* If the stack of open elements has an element in scope
+                    with the same tag name as that of the token, then generate
+                    implied    end tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                    } 
+
+                    if(end($this->stack)->nodeName !== $token['name']) {
+                        /* Now, if the current node is not an element with the
+                        same tag name as that of the token, then this is a parse
+                        error. */
+                        // w/e
+
+                    } else {
+                        /* Otherwise, if the current node is an element with
+                        the same tag name as that of the token pop that element
+                        from the stack. */
+                        array_pop($this->stack);
+                    }
+
+                    /* In any case, set the form element pointer to null. */
+                    $this->form_pointer = null;
+                break;
+
+                /* An end tag whose tag name is "p" */
+                case 'p':
+                    /* If the stack of open elements has a p element in scope,
+                    then generate implied end tags, except for p elements. */
+                    if($this->elementInScope('p')) {
+                        $this->generateImpliedEndTags(array('p'));
+
+                        /* If the current node is not a p element, then this is
+                        a parse error. */
+                        // k
+
+                        /* If the stack of open elements has a p element in
+                        scope, then pop elements from this stack until the stack
+                        no longer has a p element in scope. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->elementInScope('p')) {
+                                array_pop($this->stack);
+
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is "dd", "dt", or "li" */
+                case 'dd': case 'dt': case 'li':
+                    /* If the stack of open elements has an element in scope
+                    whose tag name matches the tag name of the token, then
+                    generate implied end tags, except for elements with the
+                    same tag name as the token. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags(array($token['name']));
+
+                        /* If the current node is not an element with the same
+                        tag name as the token, then this is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has an element in scope
+                        whose tag name matches the tag name of the token, then
+                        pop elements from this stack until an element with that
+                        tag name has been popped from the stack. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
+                "h5", "h6" */
+                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+                    $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+
+                    /* If the stack of open elements has in scope an element whose
+                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+                    generate implied end tags. */
+                    if($this->elementInScope($elements)) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with the same
+                        tag name as that of the token, then this is a parse error. */
+                        // w/e
+
+                        /* If the stack of open elements has in scope an element
+                        whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+                        "h6", then pop elements from the stack until an element
+                        with one of those tag names has been popped from the stack. */
+                        while($this->elementInScope($elements)) {
+                            array_pop($this->stack);
+                        }
+                    }
+                break;
+
+                /* An end tag whose tag name is one of: "a", "b", "big", "em",
+                "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+                case 'a': case 'b': case 'big': case 'em': case 'font':
+                case 'i': case 'nobr': case 's': case 'small': case 'strike':
+                case 'strong': case 'tt': case 'u':
+                    /* 1. Let the formatting element be the last element in
+                    the list of active formatting elements that:
+                        * is between the end of the list and the last scope
+                        marker in the list, if any, or the start of the list
+                        otherwise, and
+                        * has the same tag name as the token.
+                    */
+                    while(true) {
+                        for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
+                            if($this->a_formatting[$a] === self::MARKER) {
+                                break;
+
+                            } elseif($this->a_formatting[$a]->tagName === $token['name']) {
+                                $formatting_element = $this->a_formatting[$a];
+                                $in_stack = in_array($formatting_element, $this->stack, true);
+                                $fe_af_pos = $a;
+                                break;
+                            }
+                        }
+
+                        /* If there is no such node, or, if that node is
+                        also in the stack of open elements but the element
+                        is not in scope, then this is a parse error. Abort
+                        these steps. The token is ignored. */
+                        if(!isset($formatting_element) || ($in_stack &&
+                        !$this->elementInScope($token['name']))) {
+                            break;
+
+                        /* Otherwise, if there is such a node, but that node
+                        is not in the stack of open elements, then this is a
+                        parse error; remove the element from the list, and
+                        abort these steps. */
+                        } elseif(isset($formatting_element) && !$in_stack) {
+                            unset($this->a_formatting[$fe_af_pos]);
+                            $this->a_formatting = array_merge($this->a_formatting);
+                            break;
+                        }
+
+                        /* 2. Let the furthest block be the topmost node in the
+                        stack of open elements that is lower in the stack
+                        than the formatting element, and is not an element in
+                        the phrasing or formatting categories. There might
+                        not be one. */
+                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
+                        $length = count($this->stack);
+
+                        for($s = $fe_s_pos + 1; $s < $length; $s++) {
+                            $category = $this->getElementCategory($this->stack[$s]->nodeName);
+
+                            if($category !== self::PHRASING && $category !== self::FORMATTING) {
+                                $furthest_block = $this->stack[$s];
+                            }
+                        }
+
+                        /* 3. If there is no furthest block, then the UA must
+                        skip the subsequent steps and instead just pop all
+                        the nodes from the bottom of the stack of open
+                        elements, from the current node up to the formatting
+                        element, and remove the formatting element from the
+                        list of active formatting elements. */
+                        if(!isset($furthest_block)) {
+                            for($n = $length - 1; $n >= $fe_s_pos; $n--) {
+                                array_pop($this->stack);
+                            }
+
+                            unset($this->a_formatting[$fe_af_pos]);
+                            $this->a_formatting = array_merge($this->a_formatting);
+                            break;
+                        }
+
+                        /* 4. Let the common ancestor be the element
+                        immediately above the formatting element in the stack
+                        of open elements. */
+                        $common_ancestor = $this->stack[$fe_s_pos - 1];
+
+                        /* 5. If the furthest block has a parent node, then
+                        remove the furthest block from its parent node. */
+                        if($furthest_block->parentNode !== null) {
+                            $furthest_block->parentNode->removeChild($furthest_block);
+                        }
+
+                        /* 6. Let a bookmark note the position of the
+                        formatting element in the list of active formatting
+                        elements relative to the elements on either side
+                        of it in the list. */
+                        $bookmark = $fe_af_pos;
+
+                        /* 7. Let node and last node  be the furthest block.
+                        Follow these steps: */
+                        $node = $furthest_block;
+                        $last_node = $furthest_block;
+
+                        while(true) {
+                            for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
+                                /* 7.1 Let node be the element immediately
+                                prior to node in the stack of open elements. */
+                                $node = $this->stack[$n];
+
+                                /* 7.2 If node is not in the list of active
+                                formatting elements, then remove node from
+                                the stack of open elements and then go back
+                                to step 1. */
+                                if(!in_array($node, $this->a_formatting, true)) {
+                                    unset($this->stack[$n]);
+                                    $this->stack = array_merge($this->stack);
+
+                                } else {
+                                    break;
+                                }
+                            }
+
+                            /* 7.3 Otherwise, if node is the formatting
+                            element, then go to the next step in the overall
+                            algorithm. */
+                            if($node === $formatting_element) {
+                                break;
+
+                            /* 7.4 Otherwise, if last node is the furthest
+                            block, then move the aforementioned bookmark to
+                            be immediately after the node in the list of
+                            active formatting elements. */
+                            } elseif($last_node === $furthest_block) {
+                                $bookmark = array_search($node, $this->a_formatting, true) + 1;
+                            }
+
+                            /* 7.5 If node has any children, perform a
+                            shallow clone of node, replace the entry for
+                            node in the list of active formatting elements
+                            with an entry for the clone, replace the entry
+                            for node in the stack of open elements with an
+                            entry for the clone, and let node be the clone. */
+                            if($node->hasChildNodes()) {
+                                $clone = $node->cloneNode();
+                                $s_pos = array_search($node, $this->stack, true);
+                                $a_pos = array_search($node, $this->a_formatting, true);
+
+                                $this->stack[$s_pos] = $clone;
+                                $this->a_formatting[$a_pos] = $clone;
+                                $node = $clone;
+                            }
+
+                            /* 7.6 Insert last node into node, first removing
+                            it from its previous parent node if any. */
+                            if($last_node->parentNode !== null) {
+                                $last_node->parentNode->removeChild($last_node);
+                            }
+
+                            $node->appendChild($last_node);
+
+                            /* 7.7 Let last node be node. */
+                            $last_node = $node;
+                        }
+
+                        /* 8. Insert whatever last node ended up being in
+                        the previous step into the common ancestor node,
+                        first removing it from its previous parent node if
+                        any. */
+                        if($last_node->parentNode !== null) {
+                            $last_node->parentNode->removeChild($last_node);
+                        }
+
+                        $common_ancestor->appendChild($last_node);
+
+                        /* 9. Perform a shallow clone of the formatting
+                        element. */
+                        $clone = $formatting_element->cloneNode();
+
+                        /* 10. Take all of the child nodes of the furthest
+                        block and append them to the clone created in the
+                        last step. */
+                        while($furthest_block->hasChildNodes()) {
+                            $child = $furthest_block->firstChild;
+                            $furthest_block->removeChild($child);
+                            $clone->appendChild($child);
+                        }
+
+                        /* 11. Append that clone to the furthest block. */
+                        $furthest_block->appendChild($clone);
+
+                        /* 12. Remove the formatting element from the list
+                        of active formatting elements, and insert the clone
+                        into the list of active formatting elements at the
+                        position of the aforementioned bookmark. */
+                        $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
+                        unset($this->a_formatting[$fe_af_pos]);
+                        $this->a_formatting = array_merge($this->a_formatting);
+
+                        $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
+                        $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
+                        $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
+
+                        /* 13. Remove the formatting element from the stack
+                        of open elements, and insert the clone into the stack
+                        of open elements immediately after (i.e. in a more
+                        deeply nested position than) the position of the
+                        furthest block in that stack. */
+                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
+                        $fb_s_pos = array_search($furthest_block, $this->stack, true);
+                        unset($this->stack[$fe_s_pos]);
+
+                        $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
+                        $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
+                        $this->stack = array_merge($s_part1, array($clone), $s_part2);
+
+                        /* 14. Jump back to step 1 in this series of steps. */
+                        unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
+                    }
+                break;
+
+                /* An end tag token whose tag name is one of: "button",
+                "marquee", "object" */
+                case 'button': case 'marquee': case 'object':
+                    /* If the stack of open elements has an element in scope whose
+                    tag name matches the tag name of the token, then generate implied
+                    tags. */
+                    if($this->elementInScope($token['name'])) {
+                        $this->generateImpliedEndTags();
+
+                        /* Now, if the current node is not an element with the same
+                        tag name as the token, then this is a parse error. */
+                        // k
+
+                        /* Now, if the stack of open elements has an element in scope
+                        whose tag name matches the tag name of the token, then pop
+                        elements from the stack until that element has been popped from
+                        the stack, and clear the list of active formatting elements up
+                        to the last marker. */
+                        for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                            if($this->stack[$n]->nodeName === $token['name']) {
+                                $n = -1;
+                            }
+
+                            array_pop($this->stack);
+                        }
+
+                        $marker = end(array_keys($this->a_formatting, self::MARKER, true));
+
+                        for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
+                            array_pop($this->a_formatting);
+                        }
+                    }
+                break;
+
+                /* Or an end tag whose tag name is one of: "area", "basefont",
+                "bgsound", "br", "embed", "hr", "iframe", "image", "img",
+                "input", "isindex", "noembed", "noframes", "param", "select",
+                "spacer", "table", "textarea", "wbr" */
+                case 'area': case 'basefont': case 'bgsound': case 'br':
+                case 'embed': case 'hr': case 'iframe': case 'image':
+                case 'img': case 'input': case 'isindex': case 'noembed':
+                case 'noframes': case 'param': case 'select': case 'spacer':
+                case 'table': case 'textarea': case 'wbr':
+                    // Parse error. Ignore the token.
+                break;
+
+                /* An end tag token not covered by the previous entries */
+                default:
+                    for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                        /* Initialise node to be the current node (the bottommost
+                        node of the stack). */
+                        $node = end($this->stack);
+
+                        /* If node has the same tag name as the end tag token,
+                        then: */
+                        if($token['name'] === $node->nodeName) {
+                            /* Generate implied end tags. */
+                            $this->generateImpliedEndTags();
+
+                            /* If the tag name of the end tag token does not
+                            match the tag name of the current node, this is a
+                            parse error. */
+                            // k
+
+                            /* Pop all the nodes from the current node up to
+                            node, including node, then stop this algorithm. */
+                            for($x = count($this->stack) - $n; $x >= $n; $x--) {
+                                array_pop($this->stack);
+                            }
+                                    
+                        } else {
+                            $category = $this->getElementCategory($node);
+
+                            if($category !== self::SPECIAL && $category !== self::SCOPING) {
+                                /* Otherwise, if node is in neither the formatting
+                                category nor the phrasing category, then this is a
+                                parse error. Stop this algorithm. The end tag token
+                                is ignored. */
+                                return false;
+                            }
+                        }
+                    }
+                break;
+            }
+            break;
+        }
+    }
+
+    private function inTable($token) {
+        $clear = array('html', 'table');
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $text = $this->dom->createTextNode($token['data']);
+            end($this->stack)->appendChild($text);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            end($this->stack)->appendChild($comment);
+
+        /* A start tag whose tag name is "caption" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'caption') {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert a marker at the end of the list of active
+            formatting elements. */
+            $this->a_formatting[] = self::MARKER;
+
+            /* Insert an HTML element for the token, then switch the
+            insertion mode to "in caption". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CAPTION;
+
+        /* A start tag whose tag name is "colgroup" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'colgroup') {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the
+            insertion mode to "in column group". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CGROUP;
+
+        /* A start tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'col') {
+            $this->inTable(array(
+                'name' => 'colgroup',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            $this->inColumnGroup($token);
+
+        /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('tbody', 'tfoot', 'thead'))) {
+            /* Clear the stack back to a table context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the insertion
+            mode to "in table body". */
+            $this->insertElement($token);
+            $this->mode = self::IN_TBODY;
+
+        /* A start tag whose tag name is one of: "td", "th", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        in_array($token['name'], array('td', 'th', 'tr'))) {
+            /* Act as if a start tag token with the tag name "tbody" had been
+            seen, then reprocess the current token. */
+            $this->inTable(array(
+                'name' => 'tbody',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inTableBody($token);
+
+        /* A start tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'table') {
+            /* Parse error. Act as if an end tag token with the tag name "table"
+            had been seen, then, if that token wasn't ignored, reprocess the
+            current token. */
+            $this->inTable(array(
+                'name' => 'table',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->mainPhase($token);
+
+        /* An end tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'table') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                return false;
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags. */
+                $this->generateImpliedEndTags();
+
+                /* Now, if the current node is not a table element, then this
+                is a parse error. */
+                // w/e
+
+                /* Pop elements from this stack until a table element has been
+                popped from the stack. */
+                while(true) {
+                    $current = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($current === 'table') {
+                        break;
+                    }
+                }
+
+                /* Reset the insertion mode appropriately. */
+                $this->resetInsertionMode();
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
+        'tfoot', 'th', 'thead', 'tr'))) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* Parse error. Process the token as if the insertion mode was "in
+            body", with the following exception: */
+
+            /* If the current node is a table, tbody, tfoot, thead, or tr
+            element, then, whenever a node would be inserted into the current
+            node, it must instead be inserted into the foster parent element. */
+            if(in_array(end($this->stack)->nodeName,
+            array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+                /* The foster parent element is the parent element of the last
+                table element in the stack of open elements, if there is a
+                table element and it has such a parent element. If there is no
+                table element in the stack of open elements (innerHTML case),
+                then the foster parent element is the first element in the
+                stack of open elements (the html  element). Otherwise, if there
+                is a table element in the stack of open elements, but the last
+                table element in the stack of open elements has no parent, or
+                its parent node is not an element, then the foster parent
+                element is the element before the last table element in the
+                stack of open elements. */
+                for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                    if($this->stack[$n]->nodeName === 'table') {
+                        $table = $this->stack[$n];
+                        break;
+                    }
+                }
+
+                if(isset($table) && $table->parentNode !== null) {
+                    $this->foster_parent = $table->parentNode;
+
+                } elseif(!isset($table)) {
+                    $this->foster_parent = $this->stack[0];
+
+                } elseif(isset($table) && ($table->parentNode === null ||
+                $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
+                    $this->foster_parent = $this->stack[$n - 1];
+                }
+            }
+
+            $this->inBody($token);
+        }
+    }
+
+    private function inCaption($token) {
+        /* An end tag whose tag name is "caption" */
+        if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags. */
+                $this->generateImpliedEndTags();
+
+                /* Now, if the current node is not a caption element, then this
+                is a parse error. */
+                // w/e
+
+                /* Pop elements from this stack until a caption element has
+                been popped from the stack. */
+                while(true) {
+                    $node = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($node === 'caption') {
+                        break;
+                    }
+                }
+
+                /* Clear the list of active formatting elements up to the last
+                marker. */
+                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+                /* Switch the insertion mode to "in table". */
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
+        name is "table" */
+        } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'table')) {
+            /* Parse error. Act as if an end tag with the tag name "caption"
+            had been seen, then, if that token wasn't ignored, reprocess the
+            current token. */
+            $this->inCaption(array(
+                'name' => 'caption',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inTable($token);
+
+        /* An end tag whose tag name is one of: "body", "col", "colgroup",
+        "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            // Parse error. Ignore the token.
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in body". */
+            $this->inBody($token);
+        }
+    }
+
+    private function inColumnGroup($token) {
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $text = $this->dom->createTextNode($token['data']);
+            end($this->stack)->appendChild($text);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            end($this->stack)->appendChild($comment);
+
+        /* A start tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
+            /* Insert a col element for the token. Immediately pop the current
+            node off the stack of open elements. */
+            $this->insertElement($token);
+            array_pop($this->stack);
+
+        /* An end tag whose tag name is "colgroup" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'colgroup') {
+            /* If the current node is the root html element, then this is a
+            parse error, ignore the token. (innerHTML case) */
+            if(end($this->stack)->nodeName === 'html') {
+                // Ignore
+
+            /* Otherwise, pop the current node (which will be a colgroup
+            element) from the stack of open elements. Switch the insertion
+            mode to "in table". */
+            } else {
+                array_pop($this->stack);
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* An end tag whose tag name is "col" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Act as if an end tag with the tag name "colgroup" had been seen,
+            and then, if that token wasn't ignored, reprocess the current token. */
+            $this->inColumnGroup(array(
+                'name' => 'colgroup',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inTable($token);
+        }
+    }
+
+    private function inTableBody($token) {
+        $clear = array('tbody', 'tfoot', 'thead', 'html');
+
+        /* A start tag whose tag name is "tr" */
+        if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
+            /* Clear the stack back to a table body context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert a tr element for the token, then switch the insertion
+            mode to "in row". */
+            $this->insertElement($token);
+            $this->mode = self::IN_ROW;
+
+        /* A start tag whose tag name is one of: "th", "td" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        ($token['name'] === 'th' ||    $token['name'] === 'td')) {
+            /* Parse error. Act as if a start tag with the tag name "tr" had
+            been seen, then reprocess the current token. */
+            $this->inTableBody(array(
+                'name' => 'tr',
+                'type' => HTML5::STARTTAG,
+                'attr' => array()
+            ));
+
+            return $this->inRow($token);
+
+        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table body context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Pop the current node from the stack of open elements. Switch
+                the insertion mode to "in table". */
+                array_pop($this->stack);
+                $this->mode = self::IN_TABLE;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
+        } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
+        ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
+            /* If the stack of open elements does not have a tbody, thead, or
+            tfoot element in table scope, this is a parse error. Ignore the
+            token. (innerHTML case) */
+            if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table body context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Act as if an end tag with the same tag name as the current
+                node ("tbody", "tfoot", or "thead") had been seen, then
+                reprocess the current token. */
+                $this->inTableBody(array(
+                    'name' => end($this->stack)->nodeName,
+                    'type' => HTML5::ENDTAG
+                ));
+
+                return $this->mainPhase($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "td", "th", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in table". */
+            $this->inTable($token);
+        }
+    }
+
+    private function inRow($token) {
+        $clear = array('tr', 'html');
+
+        /* A start tag whose tag name is one of: "th", "td" */
+        if($token['type'] === HTML5::STARTTAG &&
+        ($token['name'] === 'th' || $token['name'] === 'td')) {
+            /* Clear the stack back to a table row context. */
+            $this->clearStackToTableContext($clear);
+
+            /* Insert an HTML element for the token, then switch the insertion
+            mode to "in cell". */
+            $this->insertElement($token);
+            $this->mode = self::IN_CELL;
+
+            /* Insert a marker at the end of the list of active formatting
+            elements. */
+            $this->a_formatting[] = self::MARKER;
+
+        /* An end tag whose tag name is "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Clear the stack back to a table row context. */
+                $this->clearStackToTableContext($clear);
+
+                /* Pop the current node (which will be a tr element) from the
+                stack of open elements. Switch the insertion mode to "in table
+                body". */
+                array_pop($this->stack);
+                $this->mode = self::IN_TBODY;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
+            /* Act as if an end tag with the tag name "tr" had been seen, then,
+            if that token wasn't ignored, reprocess the current token. */
+            $this->inRow(array(
+                'name' => 'tr',
+                'type' => HTML5::ENDTAG
+            ));
+
+            return $this->inCell($token);
+
+        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Otherwise, act as if an end tag with the tag name "tr" had
+                been seen, then reprocess the current token. */
+                $this->inRow(array(
+                    'name' => 'tr',
+                    'type' => HTML5::ENDTAG
+                ));
+
+                return $this->inCell($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html", "td", "th" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+            /* Parse error. Ignore the token. */
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in table". */
+            $this->inTable($token);
+        }
+    }
+
+    private function inCell($token) {
+        /* An end tag whose tag name is one of: "td", "th" */
+        if($token['type'] === HTML5::ENDTAG &&
+        ($token['name'] === 'td' || $token['name'] === 'th')) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as that of the token, then this is a
+            parse error and the token must be ignored. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise: */
+            } else {
+                /* Generate implied end tags, except for elements with the same
+                tag name as the token. */
+                $this->generateImpliedEndTags(array($token['name']));
+
+                /* Now, if the current node is not an element with the same tag
+                name as the token, then this is a parse error. */
+                // k
+
+                /* Pop elements from this stack until an element with the same
+                tag name as the token has been popped from the stack. */
+                while(true) {
+                    $node = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($node === $token['name']) {
+                        break;
+                    }
+                }
+
+                /* Clear the list of active formatting elements up to the last
+                marker. */
+                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+                /* Switch the insertion mode to "in row". (The current node
+                will be a tr element at this point.) */
+                $this->mode = self::IN_ROW;
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            /* If the stack of open elements does not have a td or th element
+            in table scope, then this is a parse error; ignore the token.
+            (innerHTML case) */
+            if(!$this->elementInScope(array('td', 'th'), true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+        "tbody", "td", "tfoot", "th", "thead", "tr" */
+        } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
+        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+        'thead', 'tr'))) {
+            /* If the stack of open elements does not have a td or th element
+            in table scope, then this is a parse error; ignore the token.
+            (innerHTML case) */
+            if(!$this->elementInScope(array('td', 'th'), true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* An end tag whose tag name is one of: "body", "caption", "col",
+        "colgroup", "html" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('body', 'caption', 'col', 'colgroup', 'html'))) {
+            /* Parse error. Ignore the token. */
+
+        /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
+        "thead", "tr" */
+        } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
+        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as that of the token (which can only
+            happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
+            then this is a parse error and the token must be ignored. */
+            if(!$this->elementInScope($token['name'], true)) {
+                // Ignore.
+
+            /* Otherwise, close the cell (see below) and reprocess the current
+            token. */
+            } else {
+                $this->closeCell();
+                return $this->inRow($token);
+            }
+
+        /* Anything else */
+        } else {
+            /* Process the token as if the insertion mode was "in body". */
+            $this->inBody($token);
+        }
+    }
+
+    private function inSelect($token) {
+        /* Handle the token as follows: */
+
+        /* A character token */
+        if($token['type'] === HTML5::CHARACTR) {
+            /* Append the token's character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag token whose tag name is "option" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'option') {
+            /* If the current node is an option element, act as if an end tag
+            with the tag name "option" had been seen. */
+            if(end($this->stack)->nodeName === 'option') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+        /* A start tag token whose tag name is "optgroup" */
+        } elseif($token['type'] === HTML5::STARTTAG &&
+        $token['name'] === 'optgroup') {
+            /* If the current node is an option element, act as if an end tag
+            with the tag name "option" had been seen. */
+            if(end($this->stack)->nodeName === 'option') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* If the current node is an optgroup element, act as if an end tag
+            with the tag name "optgroup" had been seen. */
+            if(end($this->stack)->nodeName === 'optgroup') {
+                $this->inSelect(array(
+                    'name' => 'optgroup',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+        /* An end tag token whose tag name is "optgroup" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'optgroup') {
+            /* First, if the current node is an option element, and the node
+            immediately before it in the stack of open elements is an optgroup
+            element, then act as if an end tag with the tag name "option" had
+            been seen. */
+            $elements_in_stack = count($this->stack);
+
+            if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
+            $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
+                $this->inSelect(array(
+                    'name' => 'option',
+                    'type' => HTML5::ENDTAG
+                ));
+            }
+
+            /* If the current node is an optgroup element, then pop that node
+            from the stack of open elements. Otherwise, this is a parse error,
+            ignore the token. */
+            if($this->stack[$elements_in_stack - 1] === 'optgroup') {
+                array_pop($this->stack);
+            }
+
+        /* An end tag token whose tag name is "option" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'option') {
+            /* If the current node is an option element, then pop that node
+            from the stack of open elements. Otherwise, this is a parse error,
+            ignore the token. */
+            if(end($this->stack)->nodeName === 'option') {
+                array_pop($this->stack);
+            }
+
+        /* An end tag whose tag name is "select" */
+        } elseif($token['type'] === HTML5::ENDTAG &&
+        $token['name'] === 'select') {
+            /* If the stack of open elements does not have an element in table
+            scope with the same tag name as the token, this is a parse error.
+            Ignore the token. (innerHTML case) */
+            if(!$this->elementInScope($token['name'], true)) {
+                // w/e
+
+            /* Otherwise: */
+            } else {
+                /* Pop elements from the stack of open elements until a select
+                element has been popped from the stack. */
+                while(true) {
+                    $current = end($this->stack)->nodeName;
+                    array_pop($this->stack);
+
+                    if($current === 'select') {
+                        break;
+                    }
+                }
+
+                /* Reset the insertion mode appropriately. */
+                $this->resetInsertionMode();
+            }
+
+        /* A start tag whose tag name is "select" */
+        } elseif($token['name'] === 'select' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Parse error. Act as if the token had been an end tag with the
+            tag name "select" instead. */
+            $this->inSelect(array(
+                'name' => 'select',
+                'type' => HTML5::ENDTAG
+            ));
+
+        /* An end tag whose tag name is one of: "caption", "table", "tbody",
+        "tfoot", "thead", "tr", "td", "th" */
+        } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
+        'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
+            /* Parse error. */
+            // w/e
+
+            /* If the stack of open elements has an element in table scope with
+            the same tag name as that of the token, then act as if an end tag
+            with the tag name "select" had been seen, and reprocess the token.
+            Otherwise, ignore the token. */
+            if($this->elementInScope($token['name'], true)) {
+                $this->inSelect(array(
+                    'name' => 'select',
+                    'type' => HTML5::ENDTAG
+                ));
+
+                $this->mainPhase($token);
+            }
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function afterBody($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Process the token as it would be processed if the insertion mode
+            was "in body". */
+            $this->inBody($token);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the first element in the stack of open
+            elements (the html element), with the data attribute set to the
+            data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->stack[0]->appendChild($comment);
+
+        /* An end tag with the tag name "html" */
+        } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
+            /* If the parser was originally created in order to handle the
+            setting of an element's innerHTML attribute, this is a parse error;
+            ignore the token. (The element will be an html element in this
+            case.) (innerHTML case) */
+
+            /* Otherwise, switch to the trailing end phase. */
+            $this->phase = self::END_PHASE;
+
+        /* Anything else */
+        } else {
+            /* Parse error. Set the insertion mode to "in body" and reprocess
+            the token. */
+            $this->mode = self::IN_BODY;
+            return $this->inBody($token);
+        }
+    }
+
+    private function inFrameset($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* A start tag with the tag name "frameset" */
+        } elseif($token['name'] === 'frameset' &&
+        $token['type'] === HTML5::STARTTAG) {
+            $this->insertElement($token);
+
+        /* An end tag with the tag name "frameset" */
+        } elseif($token['name'] === 'frameset' &&
+        $token['type'] === HTML5::ENDTAG) {
+            /* If the current node is the root html element, then this is a
+            parse error; ignore the token. (innerHTML case) */
+            if(end($this->stack)->nodeName === 'html') {
+                // Ignore
+
+            } else {
+                /* Otherwise, pop the current node from the stack of open
+                elements. */
+                array_pop($this->stack);
+
+                /* If the parser was not originally created in order to handle
+                the setting of an element's innerHTML attribute (innerHTML case),
+                and the current node is no longer a frameset element, then change
+                the insertion mode to "after frameset". */
+                $this->mode = self::AFTR_FRAME;
+            }
+
+        /* A start tag with the tag name "frame" */
+        } elseif($token['name'] === 'frame' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Insert an HTML element for the token. */
+            $this->insertElement($token);
+
+            /* Immediately pop the current node off the stack of open elements. */
+            array_pop($this->stack);
+
+        /* A start tag with the tag name "noframes" */
+        } elseif($token['name'] === 'noframes' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Process the token as if the insertion mode had been "in body". */
+            $this->inBody($token);
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function afterFrameset($token) {
+        /* Handle the token as follows: */
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+        if($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Append the character to the current node. */
+            $this->insertText($token['data']);
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the current node with the data
+            attribute set to the data given in the comment token. */
+            $this->insertComment($token['data']);
+
+        /* An end tag with the tag name "html" */
+        } elseif($token['name'] === 'html' &&
+        $token['type'] === HTML5::ENDTAG) {
+            /* Switch to the trailing end phase. */
+            $this->phase = self::END_PHASE;
+
+        /* A start tag with the tag name "noframes" */
+        } elseif($token['name'] === 'noframes' &&
+        $token['type'] === HTML5::STARTTAG) {
+            /* Process the token as if the insertion mode had been "in body". */
+            $this->inBody($token);
+
+        /* Anything else */
+        } else {
+            /* Parse error. Ignore the token. */
+        }
+    }
+
+    private function trailingEndPhase($token) {
+        /* After the main phase, as each token is emitted from the tokenisation
+        stage, it must be processed as described in this section. */
+
+        /* A DOCTYPE token */
+        if($token['type'] === HTML5::DOCTYPE) {
+            // Parse error. Ignore the token.
+
+        /* A comment token */
+        } elseif($token['type'] === HTML5::COMMENT) {
+            /* Append a Comment node to the Document object with the data
+            attribute set to the data given in the comment token. */
+            $comment = $this->dom->createComment($token['data']);
+            $this->dom->appendChild($comment);
+
+        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE */
+        } elseif($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+            /* Process the token as it would be processed in the main phase. */
+            $this->mainPhase($token);
+
+        /* A character token that is not one of U+0009 CHARACTER TABULATION,
+        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+        or U+0020 SPACE. Or a start tag token. Or an end tag token. */
+        } elseif(($token['type'] === HTML5::CHARACTR &&
+        preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+        $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
+            /* Parse error. Switch back to the main phase and reprocess the
+            token. */
+            $this->phase = self::MAIN_PHASE;
+            return $this->mainPhase($token);
+
+        /* An end-of-file token */
+        } elseif($token['type'] === HTML5::EOF) {
+            /* OMG DONE!! */
+        }
+    }
+
+    private function insertElement($token, $append = true) {
+        $el = $this->dom->createElement($token['name']);
+
+        foreach($token['attr'] as $attr) {
+            if(!$el->hasAttribute($attr['name'])) {
+                $el->setAttribute($attr['name'], $attr['value']);
+            }
+        }
+
+        $this->appendToRealParent($el);
+        $this->stack[] = $el;
+
+        return $el;
+    }
+
+    private function insertText($data) {
+        $text = $this->dom->createTextNode($data);
+        $this->appendToRealParent($text);
+    }
+
+    private function insertComment($data) {
+        $comment = $this->dom->createComment($data);
+        $this->appendToRealParent($comment);
+    }
+
+    private function appendToRealParent($node) {
+        if($this->foster_parent === null) {
+            end($this->stack)->appendChild($node);
+
+        } elseif($this->foster_parent !== null) {
+            /* If the foster parent element is the parent element of the
+            last table element in the stack of open elements, then the new
+            node must be inserted immediately before the last table element
+            in the stack of open elements in the foster parent element;
+            otherwise, the new node must be appended to the foster parent
+            element. */
+            for($n = count($this->stack) - 1; $n >= 0; $n--) {
+                if($this->stack[$n]->nodeName === 'table' &&
+                $this->stack[$n]->parentNode !== null) {
+                    $table = $this->stack[$n];
+                    break;
+                }
+            }
+
+            if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
+                $this->foster_parent->insertBefore($node, $table);
+            else
+                $this->foster_parent->appendChild($node);
+
+            $this->foster_parent = null;
+        }
+    }
+
+    private function elementInScope($el, $table = false) {
+        if(is_array($el)) {
+            foreach($el as $element) {
+                if($this->elementInScope($element, $table)) {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        $leng = count($this->stack);
+
+        for($n = 0; $n < $leng; $n++) {
+            /* 1. Initialise node to be the current node (the bottommost node of
+            the stack). */
+            $node = $this->stack[$leng - 1 - $n];
+
+            if($node->tagName === $el) {
+                /* 2. If node is the target node, terminate in a match state. */
+                return true;
+
+            } elseif($node->tagName === 'table') {
+                /* 3. Otherwise, if node is a table element, terminate in a failure
+                state. */
+                return false;
+
+            } elseif($table === true && in_array($node->tagName, array('caption', 'td',
+            'th', 'button', 'marquee', 'object'))) {
+                /* 4. Otherwise, if the algorithm is the "has an element in scope"
+                variant (rather than the "has an element in table scope" variant),
+                and node is one of the following, terminate in a failure state. */
+                return false;
+
+            } elseif($node === $node->ownerDocument->documentElement) {
+                /* 5. Otherwise, if node is an html element (root element), terminate
+                in a failure state. (This can only happen if the node is the topmost
+                node of the    stack of open elements, and prevents the next step from
+                being invoked if there are no more elements in the stack.) */
+                return false;
+            }
+
+            /* Otherwise, set node to the previous entry in the stack of open
+            elements and return to step 2. (This will never fail, since the loop
+            will always terminate in the previous step if the top of the stack
+            is reached.) */
+        }
+    }
+
+    private function reconstructActiveFormattingElements() {
+        /* 1. If there are no entries in the list of active formatting elements,
+        then there is nothing to reconstruct; stop this algorithm. */
+        $formatting_elements = count($this->a_formatting);
+
+        if($formatting_elements === 0) {
+            return false;
+        }
+
+        /* 3. Let entry be the last (most recently added) element in the list
+        of active formatting elements. */
+        $entry = end($this->a_formatting);
+
+        /* 2. If the last (most recently added) entry in the list of active
+        formatting elements is a marker, or if it is an element that is in the
+        stack of open elements, then there is nothing to reconstruct; stop this
+        algorithm. */
+        if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+            return false;
+        }
+
+        for($a = $formatting_elements - 1; $a >= 0; true) {
+            /* 4. If there are no entries before entry in the list of active
+            formatting elements, then jump to step 8. */
+            if($a === 0) {
+                $step_seven = false;
+                break;
+            }
+
+            /* 5. Let entry be the entry one earlier than entry in the list of
+            active formatting elements. */
+            $a--;
+            $entry = $this->a_formatting[$a];
+
+            /* 6. If entry is neither a marker nor an element that is also in
+            thetack of open elements, go to step 4. */
+            if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+                break;
+            }
+        }
+
+        while(true) {
+            /* 7. Let entry be the element one later than entry in the list of
+            active formatting elements. */
+            if(isset($step_seven) && $step_seven === true) {
+                $a++;
+                $entry = $this->a_formatting[$a];
+            }
+
+            /* 8. Perform a shallow clone of the element entry to obtain clone. */
+            $clone = $entry->cloneNode();
+
+            /* 9. Append clone to the current node and push it onto the stack
+            of open elements  so that it is the new current node. */
+            end($this->stack)->appendChild($clone);
+            $this->stack[] = $clone;
+
+            /* 10. Replace the entry for entry in the list with an entry for
+            clone. */
+            $this->a_formatting[$a] = $clone;
+
+            /* 11. If the entry for clone in the list of active formatting
+            elements is not the last entry in the list, return to step 7. */
+            if(end($this->a_formatting) !== $clone) {
+                $step_seven = true;
+            } else {
+                break;
+            }
+        }
+    }
+
+    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
+        /* When the steps below require the UA to clear the list of active
+        formatting elements up to the last marker, the UA must perform the
+        following steps: */
+
+        while(true) {
+            /* 1. Let entry be the last (most recently added) entry in the list
+            of active formatting elements. */
+            $entry = end($this->a_formatting);
+
+            /* 2. Remove entry from the list of active formatting elements. */
+            array_pop($this->a_formatting);
+
+            /* 3. If entry was a marker, then stop the algorithm at this point.
+            The list has been cleared up to the last marker. */
+            if($entry === self::MARKER) {
+                break;
+            }
+        }
+    }
+
+    private function generateImpliedEndTags(array $exclude = array()) {
+        /* When the steps below require the UA to generate implied end tags,
+        then, if the current node is a dd element, a dt element, an li element,
+        a p element, a td element, a th  element, or a tr element, the UA must
+        act as if an end tag with the respective tag name had been seen and
+        then generate implied end tags again. */
+        $node = end($this->stack);
+        $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
+
+        while(in_array(end($this->stack)->nodeName, $elements)) {
+            array_pop($this->stack);
+        }
+    }
+
+    private function getElementCategory($name) {
+        if(in_array($name, $this->special))
+            return self::SPECIAL;
+
+        elseif(in_array($name, $this->scoping))
+            return self::SCOPING;
+
+        elseif(in_array($name, $this->formatting))
+            return self::FORMATTING;
+
+        else
+            return self::PHRASING;
+    }
+
+    private function clearStackToTableContext($elements) {
+        /* When the steps above require the UA to clear the stack back to a
+        table context, it means that the UA must, while the current node is not
+        a table element or an html element, pop elements from the stack of open
+        elements. If this causes any elements to be popped from the stack, then
+        this is a parse error. */
+        while(true) {
+            $node = end($this->stack)->nodeName;
+
+            if(in_array($node, $elements)) {
+                break;
+            } else {
+                array_pop($this->stack);
+            }
+        }
+    }
+
+    private function resetInsertionMode() {
+        /* 1. Let last be false. */
+        $last = false;
+        $leng = count($this->stack);
+
+        for($n = $leng - 1; $n >= 0; $n--) {
+            /* 2. Let node be the last node in the stack of open elements. */
+            $node = $this->stack[$n];
+
+            /* 3. If node is the first node in the stack of open elements, then
+            set last to true. If the element whose innerHTML  attribute is being
+            set is neither a td  element nor a th element, then set node to the
+            element whose innerHTML  attribute is being set. (innerHTML  case) */
+            if($this->stack[0]->isSameNode($node)) {
+                $last = true;
+            }
+
+            /* 4. If node is a select element, then switch the insertion mode to
+            "in select" and abort these steps. (innerHTML case) */
+            if($node->nodeName === 'select') {
+                $this->mode = self::IN_SELECT;
+                break;
+
+            /* 5. If node is a td or th element, then switch the insertion mode
+            to "in cell" and abort these steps. */
+            } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
+                $this->mode = self::IN_CELL;
+                break;
+
+            /* 6. If node is a tr element, then switch the insertion mode to
+            "in    row" and abort these steps. */
+            } elseif($node->nodeName === 'tr') {
+                $this->mode = self::IN_ROW;
+                break;
+
+            /* 7. If node is a tbody, thead, or tfoot element, then switch the
+            insertion mode to "in table body" and abort these steps. */
+            } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
+                $this->mode = self::IN_TBODY;
+                break;
+
+            /* 8. If node is a caption element, then switch the insertion mode
+            to "in caption" and abort these steps. */
+            } elseif($node->nodeName === 'caption') {
+                $this->mode = self::IN_CAPTION;
+                break;
+
+            /* 9. If node is a colgroup element, then switch the insertion mode
+            to "in column group" and abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'colgroup') {
+                $this->mode = self::IN_CGROUP;
+                break;
+
+            /* 10. If node is a table element, then switch the insertion mode
+            to "in table" and abort these steps. */
+            } elseif($node->nodeName === 'table') {
+                $this->mode = self::IN_TABLE;
+                break;
+
+            /* 11. If node is a head element, then switch the insertion mode
+            to "in body" ("in body"! not "in head"!) and abort these steps.
+            (innerHTML case) */
+            } elseif($node->nodeName === 'head') {
+                $this->mode = self::IN_BODY;
+                break;
+
+            /* 12. If node is a body element, then switch the insertion mode to
+            "in body" and abort these steps. */
+            } elseif($node->nodeName === 'body') {
+                $this->mode = self::IN_BODY;
+                break;
+
+            /* 13. If node is a frameset element, then switch the insertion
+            mode to "in frameset" and abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'frameset') {
+                $this->mode = self::IN_FRAME;
+                break;
+
+            /* 14. If node is an html element, then: if the head element
+            pointer is null, switch the insertion mode to "before head",
+            otherwise, switch the insertion mode to "after head". In either
+            case, abort these steps. (innerHTML case) */
+            } elseif($node->nodeName === 'html') {
+                $this->mode = ($this->head_pointer === null)
+                    ? self::BEFOR_HEAD
+                    : self::AFTER_HEAD;
+
+                break;
+
+            /* 15. If last is true, then set the insertion mode to "in body"
+            and    abort these steps. (innerHTML case) */
+            } elseif($last) {
+                $this->mode = self::IN_BODY;
+                break;
+            }
+        }
+    }
+
+    private function closeCell() {
+        /* If the stack of open elements has a td or th element in table scope,
+        then act as if an end tag token with that tag name had been seen. */
+        foreach(array('td', 'th') as $cell) {
+            if($this->elementInScope($cell, true)) {
+                $this->inCell(array(
+                    'name' => $cell,
+                    'type' => HTML5::ENDTAG
+                ));
+
+                break;
+            }
+        }
+    }
+
+    public function save() {
+        return $this->dom;
+    }
+}
+?>
diff --git a/maintenance/flush-definition-cache.php b/maintenance/flush-definition-cache.php
index 6d51ab06..93af3b75 100755
--- a/maintenance/flush-definition-cache.php
+++ b/maintenance/flush-definition-cache.php
@@ -32,5 +32,5 @@ foreach ($names as $name) {
     $cache->flush($config);
 }
 
-echo 'Cache flushed successfully.';
+echo "Cache flushed successfully.\n";
 
diff --git a/maintenance/generate-ph5p-patch.php b/maintenance/generate-ph5p-patch.php
new file mode 100644
index 00000000..ecd9fa3f
--- /dev/null
+++ b/maintenance/generate-ph5p-patch.php
@@ -0,0 +1,13 @@
+<?php
+
+$orig = realpath(dirname(__FILE__) . '/PH5P.php');
+$new  = realpath(dirname(__FILE__) . '/../library/HTMLPurifier/Lexer/PH5P.php');
+$newt = dirname(__FILE__) . '/PH5P.new.php'; // temporary file
+
+// minor text-processing of new file to get into same format as original
+$new_src = file_get_contents($new);
+$new_src = '<?php' . PHP_EOL . substr($new_src, strpos($new_src, 'class HTML5 {'));
+
+file_put_contents($newt, $new_src);
+shell_exec("diff -u \"$orig\" \"$newt\" > PH5P.patch");
+unlink($newt);
diff --git a/plugins/phorum/htmlpurifier.php b/plugins/phorum/htmlpurifier.php
index 4654c65d..ae2c276c 100644
--- a/plugins/phorum/htmlpurifier.php
+++ b/plugins/phorum/htmlpurifier.php
@@ -261,12 +261,42 @@ function phorum_htmlpurifier_editor_after_subject() {
     // don't show this message if it's a WYSIWYG editor, since it will
     // then be handled automatically
     if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
-    ?><tr><td colspan="2" style="padding:1em 0.3em;">
-  HTML input is <strong>on</strong>. Make sure you escape all HTML and
-  angled-brackets with &amp;lt; and &amp;gt; (you can also use CDATA
-  tags, simply wrap the suspect text with
-&lt;![CDATA[<em>text</em>]]&gt;. Paragraphs will only be applied to 
-double-spaces; single-spaces will not generate <tt>&lt;br&gt;</tt> tags.
+    ?><tr><td colspan="2" style="padding:1em 0.3em;" class="htmlpurifier-help">
+    <p>
+        <strong>HTML input</strong> is enabled. Make sure you escape all HTML and
+        angled brackets with <code>&amp;lt;</code> and <code>&amp;gt;</code>.
+    </p><?php
+            $purifier =& HTMLPurifier::getInstance();
+            $config = $purifier->config;
+            if ($config->get('AutoFormat', 'AutoParagraph')) {
+                ?><p>
+                    <strong>Auto-paragraphing</strong> is enabled. Double
+                    newlines will be converted to paragraphs; for single
+                    newlines, use the <code>pre</code> tag.
+                </p><?php
+            }
+            $html_definition = $config->getDefinition('HTML');
+            $allowed = array();
+            foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
+            sort($allowed);
+            $allowed_text = implode(', ', $allowed);
+            ?><p><strong>Allowed tags:</strong> <?php
+            echo $allowed_text;
+            ?>.</p><?php
+        ?>
+    </p>
+    <p>
+        For inputting literal code such as HTML and PHP for display, use
+        CDATA tags to auto-escape your angled brackets, and <code>pre</code>
+        to preserve newlines:
+    </p>
+    <pre>&lt;pre&gt;&lt;![CDATA[
+<em>Place code here</em>
+]]&gt;&lt;/pre&gt;</pre>
+    <p>
+        Power users, you can hide this notice with:
+        <pre>.htmlpurifier-help {display:none;}</pre>
+    </p>
     </td></tr><?php
 }
 
diff --git a/plugins/phorum/settings/migrate-sigs.php b/plugins/phorum/settings/migrate-sigs.php
index 7896be36..fd990be7 100644
--- a/plugins/phorum/settings/migrate-sigs.php
+++ b/plugins/phorum/settings/migrate-sigs.php
@@ -20,8 +20,10 @@ function phorum_htmlpurifier_migrate_sigs_check() {
 function phorum_htmlpurifier_migrate_sigs($offset) {
     global $PHORUM;
     
-    if(!$offset) return; // bail out quick of $offset == 0
+    if(!$offset) return; // bail out quick if $offset == 0
     
+    // theoretically, we could get rid of this multi-request
+    // doo-hickery if safe mode is off
     @set_time_limit(0); // attempt to let this run
     $increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment'];
     
@@ -52,21 +54,19 @@ function phorum_htmlpurifier_migrate_sigs($offset) {
     
     // query for highest ID in database
     $type = $PHORUM['DBCONFIG']['type'];
+    $sql = "select MAX(user_id) from {$PHORUM['user_table']}";
     if ($type == 'mysql') {
         $conn = phorum_db_mysql_connect();
-        $sql = "select MAX(user_id) from {$PHORUM['user_table']}";
         $res = mysql_query($sql, $conn);
         $row = mysql_fetch_row($res);
-        $top_id = (int) $row[0];
     } elseif ($type == 'mysqli') {
         $conn = phorum_db_mysqli_connect();
-        $sql = "select MAX(user_id) from {$PHORUM['user_table']}";
         $res = mysqli_query($conn, $sql);
         $row = mysqli_fetch_row($res);
-        $top_id = (int) $row[0];
     } else {
         exit('Unrecognized database!');
     }
+    $top_id = (int) $row[0];
     
     $offset += $increment;
     if ($offset > $top_id) { // test for end condition
diff --git a/tests/HTMLPurifier/ChildDef/OptionalTest.php b/tests/HTMLPurifier/ChildDef/OptionalTest.php
index 154353df..40dc17ee 100644
--- a/tests/HTMLPurifier/ChildDef/OptionalTest.php
+++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php
@@ -19,5 +19,9 @@ class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
         $this->assertResult('Not allowed text', '');
     }
     
+    function testEmpty() {
+        $this->assertResult('');
+    }
+    
 }
 
diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
index 256d3a34..e55f96e5 100644
--- a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
+++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php
@@ -74,10 +74,11 @@ extends HTMLPurifier_ChildDefHarness
     }
     
     function testError() {
-        $this->expectError('Cannot use non-block element as block wrapper');
+        // $this->expectError('Cannot use non-block element as block wrapper');
         $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
         $this->config->set('HTML', 'BlockWrapper', 'dav');
         $this->assertResult('Needs wrap', '<p>Needs wrap</p>');
+        $this->swallowErrors();
     }
     
 }
diff --git a/tests/HTMLPurifier/ErrorCollectorEMock.php b/tests/HTMLPurifier/ErrorCollectorEMock.php
index c0f577d5..d3461331 100644
--- a/tests/HTMLPurifier/ErrorCollectorEMock.php
+++ b/tests/HTMLPurifier/ErrorCollectorEMock.php
@@ -27,11 +27,20 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
     
     function send($severity, $msg) {
         // test for context
-        $test = &$this->_getCurrentTestCase();
+        $context =& SimpleTest::getContext();
+        $test =& $context->getTest();
+        
+        // compat
+        if (empty($this->_mock)) {
+            $mock =& $this;
+        } else {
+            $mock =& $this->_mock;
+        }
+        
         foreach ($this->_expected_context as $key => $value) {
             $test->assertEqual($value, $this->_context->get($key));
         }
-        $step = $this->getCallCount('send');
+        $step = $mock->getCallCount('send');
         if (isset($this->_expected_context_at[$step])) {
             foreach ($this->_expected_context_at[$step] as $key => $value) {
                 $test->assertEqual($value, $this->_context->get($key));
@@ -39,7 +48,7 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
         }
         // boilerplate mock code, does not have return value or references
         $args = func_get_args();
-        $this->_invoke('send', $args);
+        $mock->_invoke('send', $args);
     }
     
 }
diff --git a/tests/HTMLPurifier/ErrorsHarness.php b/tests/HTMLPurifier/ErrorsHarness.php
index 67f7c6b3..9ab204e7 100644
--- a/tests/HTMLPurifier/ErrorsHarness.php
+++ b/tests/HTMLPurifier/ErrorsHarness.php
@@ -3,11 +3,15 @@
 require_once 'HTMLPurifier/ErrorCollectorEMock.php';
 require_once 'HTMLPurifier/Lexer/DirectLex.php';
 
+/**
+ * @todo Make the callCount variable actually work, so we can precisely
+ *       specify what errors we want: no more, no less
+ */
 class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
 {
     
     var $config, $context;
-    var $collector, $generator;
+    var $collector, $generator, $callCount;
     
     function setup() {
         $this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true));
@@ -16,6 +20,11 @@ class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
         $this->collector = new HTMLPurifier_ErrorCollectorEMock();
         $this->collector->prepare($this->context);
         $this->context->register('ErrorCollector', $this->collector);
+        $this->callCount = 0;
+    }
+    
+    function expectNoErrorCollection() {
+        $this->collector->expectNever('send');
     }
     
     function expectErrorCollection() {
diff --git a/tests/HTMLPurifier/IDAccumulatorTest.php b/tests/HTMLPurifier/IDAccumulatorTest.php
index 006d689c..c6249eca 100644
--- a/tests/HTMLPurifier/IDAccumulatorTest.php
+++ b/tests/HTMLPurifier/IDAccumulatorTest.php
@@ -30,5 +30,11 @@ class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness
         
     }
     
+    function testBuild() {
+        $this->config->set('Attr', 'IDBlacklist', array('foo'));
+        $accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context);
+        $this->assertTrue( isset($accumulator->ids['foo']) );
+    }
+    
 }
 
diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
index 23743dff..5c726a11 100644
--- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php
+++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php
@@ -194,10 +194,7 @@ Bar</p></div>',
     }
     
     function testNoParagraphSingleInlineNodeInBlockNode() {
-        $this->assertResult(
-'<div><b>Foo</b></div>',
-            '<div><b>Foo</b></div>'
-        );
+        $this->assertResult( '<div><b>Foo</b></div>' );
     }
     
     function testParagraphInBlockquote() {
@@ -277,9 +274,7 @@ Par1
     function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() {
         $this->assertResult(
 '<div>Par1
-<div>Par2</div></div>',
-'<div><p>Par1
-</p><div>Par2</div></div>'
+<div>Par2</div></div>'
         );
     }
     
@@ -351,6 +346,30 @@ Par2'
         );
     }
     
+    function testInlineAndBlockTagInDivNoParagraph() {
+        $this->assertResult(
+            '<div><code>bar</code> mmm <pre>asdf</pre></div>'
+        );
+    }
+    
+    function testInlineAndBlockTagInDivNeedingParagraph() {
+        $this->assertResult(
+'<div><code>bar</code> mmm
+
+<pre>asdf</pre></div>',
+'<div><p><code>bar</code> mmm</p><pre>asdf</pre></div>'
+        );
+    }
+    
+    function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() {
+        $this->assertResult(
+'<div>asdf <code>bar</code> mmm
+
+<pre>asdf</pre></div>',
+'<div><p>asdf <code>bar</code> mmm</p><pre>asdf</pre></div>'
+        );
+    }
+    
     function testErrorNeeded() {
         $this->config->set('HTML', 'Allowed', 'b');
         $this->expectError('Cannot enable AutoParagraph injector because p is not allowed');
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index e67a3e44..bf4c374d 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -109,8 +109,9 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
    function testInvalidParentError() {
         // test fallback to div
         $this->config->set('HTML', 'Parent', 'obviously-impossible');
-        $this->expectError('Cannot use unrecognized element as parent');
+        // $this->expectError('Cannot use unrecognized element as parent');
         $this->assertResult('<div>Accept</div>');
+        $this->swallowErrors();
     }
     
     function testCascadingRemovalOfNodesMissingRequiredChildren() {
@@ -129,5 +130,10 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
         $this->assertResult('<table></table><table></table>', '');
     }
     
+    function testStrictBlockquoteInHTML401() {
+        $this->config->set('HTML', 'Doctype', 'HTML 4.01 Strict');
+        $this->assertResult('<blockquote>text</blockquote>', '<blockquote><p>text</p></blockquote>');
+    }
+    
 }
 
diff --git a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
index 4ba30f36..db5b989f 100644
--- a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php
@@ -28,6 +28,11 @@ class HTMLPurifier_Strategy_FixNesting_ErrorsTest extends HTMLPurifier_Strategy_
         $this->invoke("<span>Valid<div>Invalid</div></span>");
     }
     
+    function testNoNodeReorganizedForEmptyNode() {
+        $this->expectNoErrorCollection();
+        $this->invoke("<span></span>");
+    }
+    
     function testNodeContentsRemoved() {
         $this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed');
         $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1));
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
index e8e6c797..0249d5ef 100644
--- a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php
@@ -11,6 +11,19 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
         $this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
         $this->config->set('AutoFormat', 'AutoParagraph', true);
         $this->config->set('AutoFormat', 'Linkify', true);
+        generate_mock_once('HTMLPurifier_Injector');
+    }
+    
+    function testEndNotification() {
+        $mock = new HTMLPurifier_InjectorMock();
+        $mock->skip = false;
+        $mock->expectAt(0, 'notifyEnd', array(new HTMLPurifier_Token_End('b')));
+        $mock->expectAt(1, 'notifyEnd', array(new HTMLPurifier_Token_End('i')));
+        $mock->expectCallCount('notifyEnd', 2);
+        $this->config->set('AutoFormat', 'AutoParagraph', false);
+        $this->config->set('AutoFormat', 'Linkify',       false);
+        $this->config->set('AutoFormat', 'Custom', array($mock));
+        $this->assertResult('<i><b>asdf</b>', '<i><b>asdf</b></i>');
     }
     
     function testOnlyAutoParagraph() {
@@ -62,4 +75,11 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
         );
     }
     
+    function testParagraphAfterLinkifiedURL() {
+        $this->assertResult(
+            "http://google.com\n\n<b>b</b>",
+            "<p><a href=\"http://google.com\">http://google.com</a></p><p><b>b</b></p>"
+        );
+    }
+    
 }
diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
index 19a37b24..4b397b09 100644
--- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
+++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php
@@ -82,5 +82,14 @@ alert(&lt;b&gt;bold&lt;/b&gt;);
         );
     }
     
+    function testRequiredAttributesTestNotPerformedOnEndTag() {
+        $this->config->set('HTML', 'DefinitionID',
+            'HTMLPurifier_Strategy_RemoveForeignElementsTest'.
+            '->testRequiredAttributesTestNotPerformedOnEndTag');
+        $def =& $this->config->getHTMLDefinition(true);
+        $def->addElement('f', 'Block', 'Optional: #PCDATA', false, array('req*' => 'Text'));
+        $this->assertResult('<f req="text">Foo</f> Bar');
+    }
+    
 }
 
diff --git a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
index d509a6a1..51f47358 100644
--- a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
+++ b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php
@@ -111,6 +111,12 @@ class HTMLPurifier_URIFilter_MakeAbsoluteTest extends HTMLPurifier_URIFilterHarn
         $this->assertFiltering('.', '../');
     }
     
+    function testRemoveJavaScriptWithEmbeddedLink() {
+        // credits: NykO18
+        $this->setBase('http://www.example.com/');
+        $this->assertFiltering('javascript: window.location = \'http://www.example.com\';', false);
+    }
+    
     // error case
     
     function testErrorNoBase() {
diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php
index 3ad307bb..6a221b24 100644
--- a/tests/HTMLPurifierTest.php
+++ b/tests/HTMLPurifierTest.php
@@ -94,6 +94,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
         
         $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
         $this->assertPurification('<span id="moon">foobar</span>');
+        $this->assertPurification('<img id="folly" src="folly.png" alt="Omigosh!" />');
         
     }
     
diff --git a/tests/index.php b/tests/index.php
index a98e20e6..5063d7ed 100755
--- a/tests/index.php
+++ b/tests/index.php
@@ -3,7 +3,9 @@
 // call one file using /?f=FileTest.php , see $test_files array for
 // valid values
 
-error_reporting(E_ALL | E_STRICT);
+if (version_compare(PHP_VERSION, '5.1', '>=')) error_reporting(E_ALL | E_STRICT);
+else error_reporting(E_ALL);
+
 define('HTMLPurifierTest', 1);
 define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas
 
@@ -17,6 +19,7 @@ $GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") &
 $simpletest_location = 'simpletest/'; // reasonable guess
 
 // load SimpleTest
+if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
 if (file_exists('../test-settings.php')) include '../test-settings.php';
 require_once $simpletest_location . 'unit_tester.php';
 require_once $simpletest_location . 'reporter.php';
@@ -79,7 +82,7 @@ if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) {
     
 } else {
     
-    $test = new GroupTest('All Tests');
+    $test = new GroupTest('All tests on PHP ' . PHP_VERSION);
     foreach ($test_files as $test_file) {
         require_once $test_file;
         $test->addTestClass(path2class($test_file));
@@ -91,5 +94,3 @@ if (SimpleReporter::inCli()) $reporter = new TextReporter();
 else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8');
 
 $test->run($reporter);
-
-
diff --git a/tests/multitest.php b/tests/multitest.php
new file mode 100644
index 00000000..1490bc7b
--- /dev/null
+++ b/tests/multitest.php
@@ -0,0 +1,33 @@
+<?php
+
+$versions_to_test = array(
+    'FLUSH',
+    '5.0.4',
+    '5.0.5',
+    '5.1.4',
+    '5.1.6',
+    '5.2.0',
+    '5.2.1',
+    '5.2.2',
+    '5.2.3',
+    '5.2.4',
+    '5.2.5RC2-dev',
+    '5.3.0-dev',
+    // '6.0.0-dev',
+);
+
+echo str_repeat('-', 70) . "\n";
+echo "HTML Purifier\n";
+echo "Multiple PHP Versions Test\n\n";
+
+passthru("php ../maintenance/merge-library.php");
+
+foreach ($versions_to_test as $version) {
+    if ($version === 'FLUSH') {
+        shell_exec('php ../maintenance/flush-definition-cache.php');
+        continue;
+    }
+    passthru("phpv $version index.php");
+    passthru("phpv $version index.php standalone");
+    echo "\n\n";
+}