diff --git a/Doxyfile b/Doxyfile index 044217be..5bd7625b 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,19 +1,93 @@ -# Doxyfile 1.4.7 +# Doxyfile 1.5.3 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- -PROJECT_NAME = HTML Purifier -PROJECT_NUMBER = 2.1.2 -OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen" + +# This tag specifies the encoding used for all characters in the config file that +# follow. The default is UTF-8 which is also the encoding used for all text before +# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into +# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of +# possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = HTMLPurifier + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 2.1.3 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = "docs/doxygen " + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, +# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, +# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, +# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. + OUTPUT_LANGUAGE = English -USE_WINDOWS_ENCODING = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + REPEAT_BRIEF = YES -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = "The $name class " \ + "The $name widget " \ + "The $name file " \ is \ provides \ specifies \ @@ -22,71 +96,440 @@ ABBREVIATE_BRIEF = "The $name class" \ a \ an \ the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + FULL_PATH_NAMES = YES -STRIP_FROM_PATH = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier" + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = "C:/Users/Edward/Webs/htmlpurifier " \ + "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier " + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + OPTIMIZE_OUTPUT_JAVA = NO + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to +# include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + SUBGROUPING = YES + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + EXTRACT_PRIVATE = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be extracted +# and appear in the documentation as a namespace called 'anonymous_namespace{file}', +# where file will be replaced with the base name of the file that contains the anonymous +# namespace. By default anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + SHOW_DIRECTORIES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + FILE_VERSION_FILTER = + #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + WARN_NO_PARAMDOC = NO -WARN_FORMAT = "$file:$line: $text" + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text " + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + WARN_LOGFILE = + #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- -INPUT = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier" + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ". " + +# This tag can be used to specify the character encoding of the source files that +# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default +# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding. +# See http://www.gnu.org/software/libiconv for the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py + FILE_PATTERNS = *.php + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + EXCLUDE_PATTERNS = */tests/* \ */benchmarks/* \ */docs/* \ @@ -94,149 +537,778 @@ EXCLUDE_PATTERNS = */tests/* \ */configdoc/* \ */test-settings.php \ */maintenance/* \ - */smoketests/* + */smoketests/* \ + */library/standalone/* \ + */.svn* + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the output. +# The symbol name can be a fully qualified name, a word, or if the wildcard * is used, +# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + FILTER_SOURCE_FILES = NO + #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH +# then you must also enable this option. If you don't then doxygen will produce +# a warning and turn it on anyway + SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentstion. + REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + VERBATIM_HEADERS = YES + #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + IGNORE_PREFIX = + #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + GENERATE_HTMLHELP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + GENERATE_TREEVIEW = YES + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + TREEVIEW_WIDTH = 250 + #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + LATEX_HIDE_INDICES = NO + #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + RTF_EXTENSIONS_FILE = + #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + MAN_LINKS = NO + #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + XML_PROGRAMLISTING = YES + #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + GENERATE_AUTOGEN_DEF = NO + #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + PERLMOD_MAKEVAR_PREFIX = + #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + SKIP_FUNCTION_MACROS = YES + #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + PERL_PATH = /usr/bin/perl + #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to +# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to +# specify the directory where the mscgen tool resides. If left empty the tool is assumed to +# be found in the default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + CALL_GRAPH = NO + +# If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will +# generate a caller dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. + CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + DOTFILE_DIRS = -MAX_DOT_GRAPH_WIDTH = 1024 -MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the number +# of direct children of the root node in a graph is already larger than +# MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + MAX_DOT_GRAPH_DEPTH = 1000 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + DOT_CLEANUP = YES + #--------------------------------------------------------------------------- # Configuration::additions related to the search engine #--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + SEARCHENGINE = NO diff --git a/INSTALL b/INSTALL index 1d68c0c9..317c89bb 100644 --- a/INSTALL +++ b/INSTALL @@ -1,64 +1,56 @@ + Install How to install HTML Purifier -HTML Purifier is designed to run out of the box, so actually using the library -is extremely easy. (Although, if you were looking for a step-by-step -installation GUI, you've come to the wrong place!) The impatient can scroll -down to the bottom of this INSTALL document to see the code, but you really -should make sure a few things are properly done. - - +HTML Purifier is designed to run out of the box, so actually using the +library is extremely easy. (Although... if you were looking for a +step-by-step installation GUI, you've downloaded the wrong software!) + +While the impatient can get going immediately with some of the sample +code at the bottom of this library, it's well worth performing some +basic sanity checks to get the most out of this library. +--------------------------------------------------------------------------- 1. Compatibility -HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no -core dependencies with other libraries. +HTML Purifier works in both PHP 4 and PHP 5, and is actively tested from +PHP 4.3.7 and up (see tests/multitest.php for specific versions). It has +no core dependencies with other libraries. PHP 4 support will be +deprecated on December 31, 2007, at which time only essential security +fixes will be issued for the PHP 4 version until August 8, 2008. -Optional extensions are iconv (usually installed) and tidy (also common). -If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with -not having either of these extensions. +These optional extensions can enhance the capabilities of HTML Purifier: + + * iconv : Converts text to and from non-UTF-8 encodings + * tidy : Used for pretty-printing HTML +--------------------------------------------------------------------------- +2. Reconnaissance -2. Including the library +A big plus of HTML Purifier is its inerrant support of standards, so +your web-pages should be standards-compliant. (They should also use +semantic markup, but that's another issue altogether, one HTML Purifier +cannot fix without reading your mind.) -Simply use: - - require_once '/path/to/library/HTMLPurifier.auto.php'; - -...and you're good to go. Since HTML Purifier's codebase is fairly -large, I recommend only including HTML Purifier when you need it. - -If you don't like your include_path to be fiddled around with, simply set -HTML Purifier's library/ directory to the include path yourself and then: - - require_once 'HTMLPurifier.php'; - -Only the contents in the library/ folder are necessary, so you can remove -everything else when using HTML Purifier in a production environment. - - - -3. Preparing the proper output environment - -HTML Purifier is all about web-standards, so accordingly your webpages should -be standards compliant. HTML Purifier can deal with these doctypes: +HTML Purifier can process these doctypes: * XHTML 1.0 Transitional (default) * XHTML 1.0 Strict * HTML 4.01 Transitional * HTML 4.01 Strict -* XHTML 1.1 (sans Ruby) +* XHTML 1.1 ...and these character encodings: * UTF-8 (default) -* Any encoding iconv supports (support is crippled for i18n though) +* Any encoding iconv supports (with crippled internationalization support) -The defaults are there for a reason: they are best-practice choices that -should not be changed lightly. For those of you in the dark, you can determine -the doctype from this code in your HTML documents: +These defaults reflect what my choices where be if I were authoring an +HTML document, however, what you choose depends on the nature of your +codebase. If you don't know what doctype you are using, you can determine +the doctype from this identifier at the top of your source code: @@ -67,18 +59,34 @@ the doctype from this code in your HTML documents: -For legacy codebases these declarations may be missing. If that is the case, -STOP, and read docs/enduser-utf8.html +If the character encoding declaration is missing, STOP NOW, and +read 'docs/enduser-utf8.html' (web accessible at +http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is +present, read this document anyway, as most websites specify character +encoding incorrectly. +--------------------------------------------------------------------------- +3. Including the library + +The procedure is quite simple: + + require_once '/path/to/library/HTMLPurifier.auto.php'; + +I recommend only including HTML Purifier when you need it, because that +call represents the inclusion of a lot of PHP files which constitute +the bulk of HTML Purifier's memory usage. + +If you don't like your include_path to be fiddled around with, simply set +HTML Purifier's library/ directory to the include path yourself and then: + + require_once 'HTMLPurifier.php'; + +Only the contents in the library/ folder are necessary, so you can remove +everything else when using HTML Purifier in a production environment. - -You may currently be vulnerable to XSS and other security threats, and HTML -Purifier won't be able to fix that. - - - +--------------------------------------------------------------------------- 4. Configuration HTML Purifier is designed to run out-of-the-box, but occasionally HTML @@ -95,7 +103,6 @@ object and read on: $config = HTMLPurifier_Config::createDefault(); - 4.1. Setting a different character encoding You really shouldn't use any other encoding except UTF-8, especially if you @@ -122,10 +129,6 @@ but please be cognizant of the issues the "solution" creates (for this reason, I do not include the solution in this document). - - - - 4.2. Setting a different doctype For those of you using HTML 4.01 Transitional, you can disable @@ -135,7 +138,6 @@ XHTML output like this: Other supported doctypes include: - * HTML 4.01 Strict * HTML 4.01 Transitional * XHTML 1.0 Strict @@ -143,7 +145,6 @@ Other supported doctypes include: * XHTML 1.1 - 4.3. Other settings There are more configuration directives which can be read about @@ -153,55 +154,24 @@ your code. Some of the more interesting ones are configurable at the demo and are well worth looking into for your own system. +For example, you can fine tune allowed elements and attributes, convert +relative URLs to absolute ones, and even autoparagraph input text! These +are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and +%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention +translates to: + + $config->set('Namespace', 'Directive', $value); + +E.g. + + $config->set('HTML', 'Allowed', 'p,b,a[href],i'); + $config->set('URI', 'Base', 'http://www.example.com'); + $config->set('URI', 'MakeAbsolute', true); + $config->set('AutoFormat', 'AutoParagraph', true); -5. Using the code - -The interface is mind-numbingly simple: - - $purifier = new HTMLPurifier(); - $clean_html = $purifier->purify( $dirty_html ); - -...or, if you're using the configuration object: - - $purifier = new HTMLPurifier($config); - $clean_html = $purifier->purify( $dirty_html ); - -That's it! For more examples, check out docs/examples/ (they aren't very -different though). Also, docs/enduser-slow.html gives advice on what to -do if HTML Purifier is slowing down your application. - - - -6. Quick install - -First, make sure library/HTMLPurifier/DefinitionCache/Serializer is -writable by the webserver (see Section 7: Caching below for details). -If your website is in UTF-8 and XHTML Transitional, use this code: - -purify($dirty_html); -?> - -If your website is in a different encoding or doctype, use this code: - -set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding - $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype - $purifier = new HTMLPurifier($config); - - $clean_html = $purifier->purify($dirty_html); -?> - - - -7. Caching +--------------------------------------------------------------------------- +5. Caching HTML Purifier generates some cache files (generally one or two) to speed up its execution. For maximum performance, make sure that @@ -236,3 +206,50 @@ hit): Or move the cache directory somewhere else (no trailing slash): $config->set('Cache', 'SerializerPath', '/home/user/absolute/path'); + + +--------------------------------------------------------------------------- +6. Using the code + +The interface is mind-numbingly simple: + + $purifier = new HTMLPurifier(); + $clean_html = $purifier->purify( $dirty_html ); + +...or, if you're using the configuration object: + + $purifier = new HTMLPurifier($config); + $clean_html = $purifier->purify( $dirty_html ); + +That's it! For more examples, check out docs/examples/ (they aren't very +different though). Also, docs/enduser-slow.html gives advice on what to +do if HTML Purifier is slowing down your application. + + +--------------------------------------------------------------------------- +7. Quick install + +First, make sure library/HTMLPurifier/DefinitionCache/Serializer is +writable by the webserver (see Section 5: Caching above for details). +If your website is in UTF-8 and XHTML Transitional, use this code: + +purify($dirty_html); +?> + +If your website is in a different encoding or doctype, use this code: + +set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding + $config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype + $purifier = new HTMLPurifier($config); + + $clean_html = $purifier->purify($dirty_html); +?> + diff --git a/NEWS b/NEWS index 212edc94..2f97331f 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,55 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +2.1.3, released 2007-11-05 +! tests/multitest.php allows you to test multiple versions by running + tests/index.php through multiple interpreters using `phpv` shell + script (you must provide this script!) +- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors + on some systems. +- Injector algorithm further refined: off-by-one error regarding skip + counts for dormant injectors fixed +- Corrective blockquote definition now enabled for HTML 4.01 Strict +- Fatal error when tag (or any other element with required attributes) + has 'id' attribute fixed, thanks NykO18 for reporting +- Fix warning emitted when a non-supported URI scheme is passed to the + MakeAbsolute URIFilter, thanks NykO18 (again) +- Further refine AutoParagraph injector. Behavior inside of elements + allowing paragraph tags clarified: only inline content delimeted by + double newlines (not block elements) are paragraphed. +- Buggy treatment of end tags of elements that have required attributes + fixed (does not manifest on default tag-set) +- Spurious internal content reorganization error suppressed +- HTMLDefinition->addElement now returns a reference to the created + element object, as implied by the documentation +- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere) +- Fix a theoretical class of infinite loops from DirectLex reported + by Nate Abele +- Work around unnecessary DOMElement type-cast in PH5P that caused errors + in PHP 5.1 +- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only + HTMLDefinition errors, this may indicate problems with error-collecting + facilities in PHP 5 +- Make ErrorCollectorEMock work in both PHP 4 and PHP 5 +- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef +. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment + to better communicate its purpose +. Error unit tests can now specify the expectation of no errors. Future + iterations of the harness will be extremely strict about what errors + are allowed +. Extend Injector hooks to allow for more powerful injector routines +. HTMLDefinition->addBlankElement created, as according to the HTMLModule + method +. Doxygen configuration file updated, with minor improvements +. Test runner now checks for similarly named files in conf/ directory too. +. Minor cosmetic change to flush-definition-cache.php: trailing newline is + outputted +. Maintenance script for generating PH5P patch added, original PH5P source + file also added under version control +. Full unit test runner script title made more descriptive with PHP version +. Updated INSTALL file to state that 4.3.7 is the earliest version we + are actively testing + 2.1.2, released 2007-09-03 ! Implemented Object module for trusted users ! Implemented experimental HTML5 parsing mode using PH5P. To use, add diff --git a/VERSION b/VERSION index 8f9174b4..abae0d9a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.2 \ No newline at end of file +2.1.3 \ No newline at end of file diff --git a/WHATSNEW b/WHATSNEW index e9a40184..d3511d5d 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -1,8 +1,6 @@ -Version 2.1.2 is a mix of experimental features and stability updates. -Among new features: an Object module for trusted users, support for the -CSS property 'border-spacing', and HTML 5 style parsing using PH5P. -Bug fixes ihave resolved a few obscure issues including border-collapse:seperate, -a DirectLex parsing error, broken HTML in printDefinition.php, and problems -with the experimental standalone distribution. Also, there were large -amounts of behind-the-scenes refactoring and the removal of URIScheme -inclusion reflection. +Stability release 2.1.3 fixes a slew of minor bugs found in HTML Purifier, +and also includes some internal code enhancements and refactorings. +Notably, tests/multitest.php automates testing in multiple versions, +fatal AttrDef_URI_Email error fixed, blockquote contents are more lenient +in HTML 4.01 Strict and fatal errors involving ID tags in img tags were +fixed. diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index 43fe616b..d5878d30 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -22,8 +22,8 @@ */ /* - HTML Purifier 2.1.2 - Standards Compliant HTML Filtering - Copyright (C) 2006 Edward Z. Yang + HTML Purifier 2.1.3 - Standards Compliant HTML Filtering + Copyright (C) 2006-2007 Edward Z. Yang This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -43,9 +43,8 @@ // constants are slow, but we'll make one exception define('HTMLPURIFIER_PREFIX', dirname(__FILE__)); -// almost every class has an undocumented dependency to these, so make sure -// they get included -require_once 'HTMLPurifier/ConfigSchema.php'; // important +// every class has an undocumented dependency to these, must be included! +require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Context.php'; @@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php'; HTMLPurifier_ConfigSchema::define( 'Core', 'CollectErrors', false, 'bool', ' Whether or not to collect errors found while filtering the document. This -is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED. -This directive has been available since 2.0.0. +is a useful way to give feedback to your users. Warning: +Currently this feature is very patchy and experimental, with lots of +possible error messages not yet implemented. It will not cause any problems, +but it may not help your users either. This directive has been available +since 2.0.0. '); /** - * Main library execution class. + * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. * - * Facade that performs calls to the HTMLPurifier_Lexer, - * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to - * purify HTML. + * @note There are several points in which configuration can be specified + * for HTML Purifier. The precedence of these (from lowest to + * highest) is as follows: + * -# Instance: new HTMLPurifier($config) + * -# Invocation: purify($html, $config) + * These configurations are entirely independent of each other and + * are *not* merged. * * @todo We need an easier way to inject strategies, it'll probably end * up getting done through config though. @@ -77,15 +83,16 @@ This directive has been available since 2.0.0. class HTMLPurifier { - var $version = '2.1.2'; + var $version = '2.1.3'; var $config; - var $filters; + var $filters = array(); var $strategy, $generator; /** - * Final HTMLPurifier_Context of last run purification. Might be an array. + * Resultant HTMLPurifier_Context of last run purification. Is an array + * of contexts if the last called method was purifyArray(). * @public */ var $context; @@ -150,6 +157,11 @@ class HTMLPurifier $context->register('ErrorCollector', $error_collector); } + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); for ($i = 0, $size = count($this->filters); $i < $size; $i++) { @@ -198,6 +210,8 @@ class HTMLPurifier /** * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with. */ static function &getInstance($prototype = null) { static $htmlpurifier; diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 365748c0..0e9a5f47 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $result = $uri->validate($config, $context); if (!$result) break; - // chained validation + // chained filtering $uri_def =& $config->getDefinition('URI'); $result = $uri_def->filter($uri, $config, $context); if (!$result) break; diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php index aaec099a..ababd9ea 100644 --- a/library/HTMLPurifier/AttrDef/URI/Email.php +++ b/library/HTMLPurifier/AttrDef/URI/Email.php @@ -1,7 +1,6 @@ getHTMLDefinition(); $e =& $context->get('ErrorCollector', true); + // initialize IDAccumulator if necessary + $ok =& $context->get('IDAccumulator', true); + if (!$ok) { + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + } + // initialize CurrentToken if necessary $current_token =& $context->get('CurrentToken', true); if (!$current_token) $context->register('CurrentToken', $token); diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php index 779a7f06..e9f14edf 100644 --- a/library/HTMLPurifier/ChildDef/Optional.php +++ b/library/HTMLPurifier/ChildDef/Optional.php @@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required var $type = 'optional'; function validateChildren($tokens_of_children, $config, &$context) { $result = parent::validateChildren($tokens_of_children, $config, $context); - if ($result === false) return array(); + if ($result === false) { + if (empty($tokens_of_children)) return true; + else return array(); + } return $result; } } diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 7e330e47..0d75b609 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -42,7 +42,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - var $version = '2.1.2'; + var $version = '2.1.3'; /** * Two-level associative array of configuration directives diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index fe6bd141..e13e0c62 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition /** * Adds a custom element to your HTML definition * @note See HTMLPurifier_HTMLModule::addElement for detailed - * parameter descriptions. + * parameter and return value descriptions. */ - function addElement($element_name, $type, $contents, $attr_collections, $attributes) { + function &addElement($element_name, $type, $contents, $attr_collections, $attributes) { $module =& $this->getAnonymousModule(); // assume that if the user is calling this, the element // is safe. This may not be a good idea - $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); + $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); + return $element; + } + + /** + * Adds a blank element to your HTML definition, for overriding + * existing behavior + * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed + * parameter and return value descriptions. + */ + function &addBlankElement($element_name) { + $module =& $this->getAnonymousModule(); + $element =& $module->addBlankElement($element_name); + return $element; } /** diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php index 386cf365..dcf306a0 100644 --- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php @@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php'; require_once 'HTMLPurifier/AttrTransform/ImgSpace.php'; require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php'; +require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php'; + class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy { @@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends { var $name = 'Tidy_Strict'; var $defaultLevel = 'light'; + + function makeFixes() { + $r = parent::makeFixes(); + $r['blockquote#content_model_type'] = 'strictblockquote'; + return $r; + } + + var $defines_child_def = true; + function getChildDef($def) { + if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def); + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } } diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php deleted file mode 100644 index b701491e..00000000 --- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php +++ /dev/null @@ -1,26 +0,0 @@ -content_model_type != 'strictblockquote') return false; - return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); - } - -} - diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index 74a233ff..3fc86160 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php'; require_once 'HTMLPurifier/HTMLModule/Tidy.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; -require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php'; HTMLPurifier_ConfigSchema::define( @@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.0 Strict', true, array_merge($common, $xml, $non_xml), - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'), array(), '-//W3C//DTD XHTML 1.0 Strict//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' @@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.1', true, array_merge($common, $xml, array('Ruby')), - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1 + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' diff --git a/library/HTMLPurifier/IDAccumulator.php b/library/HTMLPurifier/IDAccumulator.php index 525c9aa0..c9f58835 100644 --- a/library/HTMLPurifier/IDAccumulator.php +++ b/library/HTMLPurifier/IDAccumulator.php @@ -1,11 +1,15 @@ load($config->get('Attr', 'IDBlacklist')); + return $id_accumulator; + } + /** * Add an ID to the lookup table. * @param $id ID to be added. diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php index 59017163..3b847097 100644 --- a/library/HTMLPurifier/Injector.php +++ b/library/HTMLPurifier/Injector.php @@ -4,6 +4,9 @@ * Injects tokens into the document while parsing for well-formedness. * This enables "formatter-like" functionality such as auto-paragraphing, * smiley-ification and linkification to take place. + * + * @todo Allow injectors to request a re-run on their output. This + * would help if an operation is recursive. */ class HTMLPurifier_Injector { @@ -107,5 +110,12 @@ class HTMLPurifier_Injector */ function handleElement(&$token) {} + /** + * Notifier that is called when an end token is processed + * @note This differs from handlers in that the token is read-only + */ + function notifyEnd($token) {} + + } diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index 6e0a6a3e..56a6a268 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define( 'AutoFormat', 'AutoParagraph', false, 'bool', '

This directive turns on auto-paragraphing, where double newlines are - converted in to paragraphs whenever possible. Auto-paragraphing - applies when: + converted in to paragraphs whenever possible. Auto-paragraphing:

    -
  • There are inline elements or text in the root node
  • -
  • There are inline elements or text with double newlines or - block elements in nodes that allow paragraph tags
  • -
  • There are double newlines in paragraph tags
  • +
  • Always applies to inline elements or text in the root node,
  • +
  • Applies to inline elements or text with double newlines in nodes + that allow paragraph tags,
  • +
  • Applies to double newlines in paragraph tags

p tags must be allowed for this directive to take effect. We do not use br tags for paragraphing, as that is semantically incorrect.

+

+ To prevent auto-paragraphing as a content-producer, refrain from using + double-newlines except to specify a new paragraph or in contexts where + it has special meaning (whitespace usually has no meaning except in + tags like pre, so this should not be difficult.) To prevent + the paragraphing of inline text adjacent to block elements, wrap them + in div tags (the behavior is slightly different outside of + the root node.) +

This directive has been available since 2.0.1.

@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector $ok = false; // test if up-coming tokens are either block or have // a double newline in them + $nesting = 0; for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { if ($this->inputTokens[$i]->type == 'start'){ if (!$this->_isInline($this->inputTokens[$i])) { - $ok = true; + // we haven't found a double-newline, and + // we've hit a block element, so don't paragraph + $ok = false; + break; } - break; + $nesting++; + } + if ($this->inputTokens[$i]->type == 'end') { + if ($nesting <= 0) break; + $nesting--; } - if ($this->inputTokens[$i]->type == 'end') break; if ($this->inputTokens[$i]->type == 'text') { + // found it! if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { $ok = true; + break; } - if (!$this->inputTokens[$i]->is_whitespace) break; } } if ($ok) { diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index f52579ab..3819ad22 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) { } HTMLPurifier_ConfigSchema::define( - 'Core', 'AcceptFullDocuments', true, 'bool', - 'This parameter determines whether or not the filter should accept full '. - 'HTML documents, not just HTML fragments. When on, it will '. - 'drop all sections except the content between body.' -); + 'Core', 'ConvertDocumentToFragment', true, 'bool', ' +This parameter determines whether or not the filter should convert +input that is a full document with html and body tags to a fragment +of just the contents of a body tag. This parameter is simply something +HTML Purifier can do during an edge-case: for most inputs, this +processing is not necessary. +'); +HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment'); HTMLPurifier_ConfigSchema::define( 'Core', 'LexerImpl', null, 'mixed/null', ' @@ -316,7 +319,7 @@ class HTMLPurifier_Lexer function normalize($html, $config, &$context) { // extract body from document if applicable - if ($config->get('Core', 'AcceptFullDocuments')) { + if ($config->get('Core', 'ConvertDocumentToFragment')) { $html = $this->extractBody($html); } diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 6f8c8ff6..cdcf2aa1 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $segment = substr($html, $cursor, $strlen_segment); + if ($segment === false) { + // somehow, we attempted to access beyond the end of + // the string, defense-in-depth, reported by Nate Abele + break; + } + // Check if it's a comment if ( - substr($segment, 0, 3) == '!--' + substr($segment, 0, 3) === '!--' ) { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); @@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // trailing slash. Remember, we could have a tag like
, so // any later token processing scripts must convert improperly // classified EmptyTags from StartTags. - $is_self_closing= (strrpos($segment,'/') === $strlen_segment-1); + $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); if ($is_self_closing) { $strlen_segment--; $segment = substr($segment, 0, $strlen_segment); diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php index 5720c33a..b6762379 100644 --- a/library/HTMLPurifier/Lexer/PH5P.php +++ b/library/HTMLPurifier/Lexer/PH5P.php @@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { } -// begin PHP5P source code here - /* Copyright 2007 Jeroen van der Meer @@ -3722,7 +3720,7 @@ class HTML5TreeConstructer { } } - private function generateImpliedEndTags(array $exclude = array()) { + private function generateImpliedEndTags($exclude = array()) { /* When the steps below require the UA to generate implied end tags, then, if the current node is a dd element, a dt element, an li element, a p element, a td element, a th element, or a tr element, the UA must @@ -3736,7 +3734,8 @@ class HTML5TreeConstructer { } } - private function getElementCategory($name) { + private function getElementCategory($node) { + $name = $node->tagName; if(in_array($name, $this->special)) return self::SPECIAL; @@ -3884,3 +3883,4 @@ class HTML5TreeConstructer { return $this->dom; } } +?> diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index 51a14a78..25e9f8ac 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy //################################################################// // Process result by interpreting $result - if ($result === true) { + if ($result === true || $child_tokens === $result) { // leave the node as is // register start token as a parental node start diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index b3e8aa74..4b6f498f 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $definition = $config->getHTMLDefinition(); - // CurrentNesting - $this->currentNesting = array(); - $context->register('CurrentNesting', $this->currentNesting); - - // InputIndex - $this->inputIndex = false; - $context->register('InputIndex', $this->inputIndex); - - // InputTokens - $context->register('InputTokens', $tokens); - $this->inputTokens =& $tokens; - - // OutputTokens + // local variables $result = array(); - $this->outputTokens =& $result; - - // %Core.EscapeInvalidTags - $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $generator = new HTMLPurifier_Generator(); - + $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); $e =& $context->get('ErrorCollector', true); + // member variables + $this->currentNesting = array(); + $this->inputIndex = false; + $this->inputTokens =& $tokens; + $this->outputTokens =& $result; + + // context variables + $context->register('CurrentNesting', $this->currentNesting); + $context->register('InputIndex', $this->inputIndex); + $context->register('InputTokens', $tokens); + // -- begin INJECTOR -- $this->injectors = array(); @@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING); } + // warning: most foreach loops follow the convention $i => $x. + // be sure, for PHP4 compatibility, to only perform write operations + // directly referencing the object using $i: $x is only safe for reads + // -- end INJECTOR -- $token = false; @@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // if all goes well, this token will be passed through unharmed $token = $tokens[$this->inputIndex]; + //printTokens($tokens, $this->inputIndex); + foreach ($this->injectors as $i => $x) { if ($x->skip > 0) $this->injectors[$i]->skip--; } @@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($token->type === 'text') { // injector handler code; duplicated for performance reasons foreach ($this->injectors as $i => $x) { - if (!$x->skip) $x->handleText($token); + if (!$x->skip) $this->injectors[$i]->handleText($token); if (is_array($token)) { $this->currentInjector = $i; break; @@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // injector handler code; duplicated for performance reasons if ($ok) { foreach ($this->injectors as $i => $x) { - if (!$x->skip) $x->handleElement($token); + if (!$x->skip) $this->injectors[$i]->handleElement($token); if (is_array($token)) { $this->currentInjector = $i; break; @@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $current_parent = array_pop($this->currentNesting); if ($current_parent->name == $token->name) { $result[] = $token; + foreach ($this->injectors as $i => $x) { + $this->injectors[$i]->notifyEnd($token); + } continue; } @@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // okay, we found it, close all the skipped tags // note that skipped tags contains the element we need closed - $size = count($skipped_tags); - for ($i = $size - 1; $i > 0; $i--) { - if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { + for ($i = count($skipped_tags) - 1; $i >= 0; $i--) { + if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); } - $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); + foreach ($this->injectors as $j => $x) { // $j, not $i!!! + $this->injectors[$j]->notifyEnd($new_token); + } } - $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); - } $context->destroy('CurrentNesting'); @@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $context->destroy('InputIndex'); $context->destroy('CurrentToken'); - // we're at the end now, fix all still unclosed tags - // not using processToken() because at this point we don't - // care about current nesting + // we're at the end now, fix all still unclosed tags (this is + // duplicated from the end of the loop with some slight modifications) + // not using $skipped_tags since it would invariably be all of them if (!empty($this->currentNesting)) { - $size = count($this->currentNesting); - for ($i = $size - 1; $i >= 0; $i--) { + for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) { if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); } - $result[] = - new HTMLPurifier_Token_End($this->currentNesting[$i]->name); + $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); + foreach ($this->injectors as $j => $x) { // $j, not $i!!! + $this->injectors[$j]->notifyEnd($new_token); + } } } @@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // adjust the injector skips based on the array substitution if ($this->injectors) { - $offset = count($token) + 1; + $offset = count($token); for ($i = 0; $i <= $this->currentInjector; $i++) { + // because of the skip back, we need to add one more + // for uninitialized injectors. I'm not exactly + // sure why this is the case, but I think it has to + // do with the fact that we're decrementing skips + // before re-checking text + if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++; $this->injectors[$i]->skip += $offset; } } diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php index 2c280b23..5d26e4f5 100644 --- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy // mostly everything's good, but // we need to make sure required attributes are in order if ( + ($token->type === 'start' || $token->type === 'empty') && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img) // ensure config option still works ) { @@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $token->armor['ValidateAttributes'] = true; } - // CAN BE GENERICIZED if (isset($hidden_elements[$token->name]) && $token->type == 'start') { $textify_comments = $token->name; } elseif ($token->name === $textify_comments && $token->type == 'end') { diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php index 869f3fab..6debcc33 100644 --- a/library/HTMLPurifier/Strategy/ValidateAttributes.php +++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php @@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php'; require_once 'HTMLPurifier/AttrValidator.php'; -HTMLPurifier_ConfigSchema::define( - 'Attr', 'IDBlacklist', array(), 'list', - 'Array of IDs not allowed in the document.'); - /** * Validate all attributes in the tokens. */ @@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy function execute($tokens, $config, &$context) { - // setup id_accumulator context - $id_accumulator = new HTMLPurifier_IDAccumulator(); - $id_accumulator->load($config->get('Attr', 'IDBlacklist')); - $context->register('IDAccumulator', $id_accumulator); - // setup validator $validator = new HTMLPurifier_AttrValidator(); @@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy $tokens[$key] = $token; // for PHP 4 } - - $context->destroy('IDAccumulator'); $context->destroy('CurrentToken'); return $tokens; diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php index e0066f3b..ca000ea5 100644 --- a/library/HTMLPurifier/URIFilter.php +++ b/library/HTMLPurifier/URIFilter.php @@ -1,10 +1,22 @@ host)) return true; $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) { + // scheme not recognized + return false; + } if (!$scheme_obj->hierarchical) { // non-hierarchal URI with explicit scheme, don't change return true; diff --git a/maintenance/PH5P.patch b/maintenance/PH5P.patch index 37e4dbf1..9365cffe 100644 --- a/maintenance/PH5P.patch +++ b/maintenance/PH5P.patch @@ -1,5 +1,5 @@ ---- old.php 2007-08-19 14:42:33.640625000 -0400 -+++ new.php 2007-08-19 14:41:51.609375000 -0400 +--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500 ++++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500 @@ -211,7 +211,10 @@ // If nothing is returned, emit a U+0026 AMPERSAND character token. // Otherwise, emit the character token that was returned. @@ -43,3 +43,22 @@ $entity = $id; break; } +@@ -3659,7 +3668,7 @@ + } + } + +- private function generateImpliedEndTags(array $exclude = array()) { ++ private function generateImpliedEndTags($exclude = array()) { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must +@@ -3673,7 +3682,8 @@ + } + } + +- private function getElementCategory($name) { ++ private function getElementCategory($node) { ++ $name = $node->tagName; + if(in_array($name, $this->special)) + return self::SPECIAL; + diff --git a/maintenance/PH5P.php b/maintenance/PH5P.php new file mode 100644 index 00000000..96d0d13f --- /dev/null +++ b/maintenance/PH5P.php @@ -0,0 +1,3824 @@ +data = $data; + $this->char = -1; + $this->EOF = strlen($data); + $this->tree = new HTML5TreeConstructer; + $this->content_model = self::PCDATA; + + $this->state = 'data'; + + while($this->state !== null) { + $this->{$this->state.'State'}(); + } + } + + public function save() { + return $this->tree->save(); + } + + private function char() { + return ($this->char < $this->EOF) + ? $this->data[$this->char] + : false; + } + + private function character($s, $l = 0) { + if($s + $l < $this->EOF) { + if($l === 0) { + return $this->data[$s]; + } else { + return substr($this->data, $s, $l); + } + } + } + + private function characters($char_class, $start) { + return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); + } + + private function dataState() { + // Consume the next input character + $this->char++; + $char = $this->char(); + + if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { + /* U+0026 AMPERSAND (&) + When the content model flag is set to one of the PCDATA or RCDATA + states: switch to the entity data state. Otherwise: treat it as per + the "anything else" entry below. */ + $this->state = 'entityData'; + + } elseif($char === '-') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is false, and there are at + least three characters before this one in the input stream, and the + last four characters in the input stream, including this one, are + U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, + and U+002D HYPHEN-MINUS (""), + set the escape flag to false. */ + if(($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === true && + $this->character($this->char, 3) === '-->') { + $this->escape = false; + } + + /* In any case, emit the input character as a character token. + Stay in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + } elseif($this->char === $this->EOF) { + /* EOF + Emit an end-of-file token. */ + $this->EOF(); + + } elseif($this->content_model === self::PLAINTEXT) { + /* When the content model flag is set to the PLAINTEXT state + THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of + the text and emit it as a character token. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => substr($this->data, $this->char) + )); + + $this->EOF(); + + } else { + /* Anything else + THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that + otherwise would also be treated as a character token and emit it + as a single character token. Stay in the data state. */ + $len = strcspn($this->data, '<&', $this->char); + $char = substr($this->data, $this->char, $len); + $this->char += $len - 1; + + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + $this->state = 'data'; + } + } + + private function entityDataState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; + $this->emitToken($char); + + // Finally, switch to the data state. + $this->state = 'data'; + } + + private function tagOpenState() { + switch($this->content_model) { + case self::RCDATA: + case self::CDATA: + /* If the next input character is a U+002F SOLIDUS (/) character, + consume it and switch to the close tag open state. If the next + input character is not a U+002F SOLIDUS (/) character, emit a + U+003C LESS-THAN SIGN character token and switch to the data + state to process the next input character. */ + if($this->character($this->char + 1) === '/') { + $this->char++; + $this->state = 'closeTagOpen'; + + } else { + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->state = 'data'; + } + break; + + case self::PCDATA: + // If the content model flag is set to the PCDATA state + // Consume the next input character: + $this->char++; + $char = $this->char(); + + if($char === '!') { + /* U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. */ + $this->state = 'markupDeclarationOpen'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Switch to the close tag open state. */ + $this->state = 'closeTagOpen'; + + } elseif(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new start tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's code + point), then switch to the tag name state. (Don't emit the token + yet; further details will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::STARTTAG, + 'attr' => array() + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+003E GREATER-THAN SIGN character token. Switch to the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<>' + )); + + $this->state = 'data'; + + } elseif($char === '?') { + /* U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + + } else { + /* Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token and + reconsume the current input character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->char--; + $this->state = 'data'; + } + break; + } + } + + private function closeTagOpenState() { + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; + + if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && + (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', + $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { + /* If the content model flag is set to the RCDATA or CDATA states then + examine the next few characters. If they do not match the tag name of + the last start tag token emitted (case insensitively), or if they do but + they are not immediately followed by one of the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000B LINE TABULATION + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character + token, a U+002F SOLIDUS character token, and switch to the data state + to process the next input character. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'state = 'data'; + + } else { + /* Otherwise, if the content model flag is set to the PCDATA state, + or if the next few characters do match that tag name, consume the + next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new end tag token, set its tag name to the lowercase version + of the input character (add 0x0020 to the character's code point), then + switch to the tag name state. (Don't emit the token yet; further details + will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::ENDTAG + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. */ + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F + SOLIDUS character token. Reconsume the EOF character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'char--; + $this->state = 'data'; + + } else { + /* Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + } + } + } + + private function tagNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } else { + /* Anything else + Append the current input character to the current tag token's tag name. + Stay in the tag name state. */ + $this->token['name'] .= strtolower($char); + $this->state = 'tagName'; + } + } + + private function beforeAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Stay in the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function attributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's name. + Stay in the attribute name state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['name'] .= strtolower($char); + + $this->state = 'attributeName'; + } + } + + private function afterAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the + before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function beforeAttributeValueState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. */ + $this->state = 'attributeValueDoubleQuoted'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. */ + $this->char--; + $this->state = 'attributeValueUnquoted'; + + } elseif($char === '\'') { + /* U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. */ + $this->state = 'attributeValueSingleQuoted'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Switch to the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function attributeValueDoubleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('double'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (double-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueDoubleQuoted'; + } + } + + private function attributeValueSingleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '\'') { + /* U+0022 QUOTATION MARK (') + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('single'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (single-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueSingleQuoted'; + } + } + + private function attributeValueUnquotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('non'); + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function entityInAttributeValueState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, append a U+0026 AMPERSAND character to the + // current attribute's value. Otherwise, emit the character token that + // was returned. + $char = (!$entity) + ? '&' + : $entity; + + $this->emitToken($char); + } + + private function bogusCommentState() { + /* Consume every character up to the first U+003E GREATER-THAN SIGN + character (>) or the end of the file (EOF), whichever comes first. Emit + a comment token whose data is the concatenation of all the characters + starting from and including the character that caused the state machine + to switch into the bogus comment state, up to and including the last + consumed character before the U+003E character, if any, or up to the + end of the file otherwise. (If the comment was started by the end of + the file (EOF), the token is empty.) */ + $data = $this->characters('^>', $this->char); + $this->emitToken(array( + 'data' => $data, + 'type' => self::COMMENT + )); + + $this->char += strlen($data); + + /* Switch to the data state. */ + $this->state = 'data'; + + /* If the end of the file was reached, reconsume the EOF character. */ + if($this->char === $this->EOF) { + $this->char = $this->EOF - 1; + } + } + + private function markupDeclarationOpenState() { + /* If the next two characters are both U+002D HYPHEN-MINUS (-) + characters, consume those two characters, create a comment token whose + data is the empty string, and switch to the comment state. */ + if($this->character($this->char + 1, 2) === '--') { + $this->char += 2; + $this->state = 'comment'; + $this->token = array( + 'data' => null, + 'type' => self::COMMENT + ); + + /* Otherwise if the next seven chacacters are a case-insensitive match + for the word "DOCTYPE", then consume those characters and switch to the + DOCTYPE state. */ + } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { + $this->char += 7; + $this->state = 'doctype'; + + /* Otherwise, is is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. */ + } else { + $this->char++; + $this->state = 'bogusComment'; + } + } + + private function commentState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment dash state */ + $this->state = 'commentDash'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append the input character to the comment token's data. Stay in + the comment state. */ + $this->token['data'] .= $char; + } + } + + private function commentDashState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment end state */ + $this->state = 'commentEnd'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment state. */ + $this->token['data'] .= '-'.$char; + $this->state = 'comment'; + } + } + + private function commentEndState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '-') { + $this->token['data'] .= '-'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['data'] .= '--'.$char; + $this->state = 'comment'; + } + } + + private function doctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'beforeDoctypeName'; + + } else { + $this->char--; + $this->state = 'beforeDoctypeName'; + } + } + + private function beforeDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the before DOCTYPE name state. + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token = array( + 'name' => strtoupper($char), + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + + } elseif($char === '>') { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->char--; + $this->state = 'data'; + + } else { + $this->token = array( + 'name' => $char, + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + } + } + + private function doctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'AfterDoctypeName'; + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token['name'] .= strtoupper($char); + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['name'] .= $char; + } + + $this->token['error'] = ($this->token['name'] === 'HTML') + ? false + : true; + } + + private function afterDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the DOCTYPE name state. + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['error'] = true; + $this->state = 'bogusDoctype'; + } + } + + private function bogusDoctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + // Stay in the bogus DOCTYPE state. + } + } + + private function entity() { + $start = $this->char; + + // This section defines how to consume an entity. This definition is + // used when parsing entities in text and in attributes. + + // The behaviour depends on the identity of the next character (the + // one immediately after the U+0026 AMPERSAND character): + + switch($this->character($this->char + 1)) { + // U+0023 NUMBER SIGN (#) + case '#': + + // The behaviour further depends on the character after the + // U+0023 NUMBER SIGN: + switch($this->character($this->char + 1)) { + // U+0078 LATIN SMALL LETTER X + // U+0058 LATIN CAPITAL LETTER X + case 'x': + case 'X': + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER + // A, through to U+0046 LATIN CAPITAL LETTER F (in other + // words, 0-9, A-F, a-f). + $char = 1; + $char_class = '0-9A-Fa-f'; + break; + + // Anything else + default: + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE (i.e. just 0-9). + $char = 0; + $char_class = '0-9'; + break; + } + + // Consume as many characters as match the range of characters + // given above. + $this->char++; + $e_name = $this->characters($char_class, $this->char + $char + 1); + $entity = $this->character($start, $this->char); + $cond = strlen($e_name) > 0; + + // The rest of the parsing happens bellow. + break; + + // Anything else + default: + // Consume the maximum number of characters possible, with the + // consumed characters case-sensitively matching one of the + // identifiers in the first column of the entities table. + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); + $len = strlen($e_name); + + for($c = 1; $c <= $len; $c++) { + $id = substr($e_name, 0, $c); + $this->char++; + + if(in_array($id, $this->entities)) { + $entity = $id; + break; + } + } + + $cond = isset($entity); + // The rest of the parsing happens bellow. + break; + } + + if(!$cond) { + // If no match can be made, then this is a parse error. No + // characters are consumed, and nothing is returned. + $this->char = $start; + return false; + } + + // Return a character token for the character corresponding to the + // entity name (as given by the second column of the entities table). + return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); + } + + private function emitToken($token) { + $emit = $this->tree->emitToken($token); + + if(is_int($emit)) { + $this->content_model = $emit; + + } elseif($token['type'] === self::ENDTAG) { + $this->content_model = self::PCDATA; + } + } + + private function EOF() { + $this->state = null; + $this->tree->emitToken(array( + 'type' => self::EOF + )); + } +} + +class HTML5TreeConstructer { + public $stack = array(); + + private $phase; + private $mode; + private $dom; + private $foster_parent = null; + private $a_formatting = array(); + + private $head_pointer = null; + private $form_pointer = null; + + private $scoping = array('button','caption','html','marquee','object','table','td','th'); + private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); + private $special = array('address','area','base','basefont','bgsound', + 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', + 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', + 'h6','head','hr','iframe','image','img','input','isindex','li','link', + 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', + 'option','p','param','plaintext','pre','script','select','spacer','style', + 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); + + // The different phases. + const INIT_PHASE = 0; + const ROOT_PHASE = 1; + const MAIN_PHASE = 2; + const END_PHASE = 3; + + // The different insertion modes for the main phase. + const BEFOR_HEAD = 0; + const IN_HEAD = 1; + const AFTER_HEAD = 2; + const IN_BODY = 3; + const IN_TABLE = 4; + const IN_CAPTION = 5; + const IN_CGROUP = 6; + const IN_TBODY = 7; + const IN_ROW = 8; + const IN_CELL = 9; + const IN_SELECT = 10; + const AFTER_BODY = 11; + const IN_FRAME = 12; + const AFTR_FRAME = 13; + + // The different types of elements. + const SPECIAL = 0; + const SCOPING = 1; + const FORMATTING = 2; + const PHRASING = 3; + + const MARKER = 0; + + public function __construct() { + $this->phase = self::INIT_PHASE; + $this->mode = self::BEFOR_HEAD; + $this->dom = new DOMDocument; + + $this->dom->encoding = 'UTF-8'; + $this->dom->preserveWhiteSpace = true; + $this->dom->substituteEntities = true; + $this->dom->strictErrorChecking = false; + } + + // Process tag tokens + public function emitToken($token) { + switch($this->phase) { + case self::INIT_PHASE: return $this->initPhase($token); break; + case self::ROOT_PHASE: return $this->rootElementPhase($token); break; + case self::MAIN_PHASE: return $this->mainPhase($token); break; + case self::END_PHASE : return $this->trailingEndPhase($token); break; + } + } + + private function initPhase($token) { + /* Initially, the tree construction stage must handle each token + emitted from the tokenisation stage as follows: */ + + /* A DOCTYPE token that is marked as being in error + A comment token + A start tag token + An end tag token + A character token that is not one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE + An end-of-file token */ + if((isset($token['error']) && $token['error']) || + $token['type'] === HTML5::COMMENT || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF || + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { + /* This specification does not define how to handle this case. In + particular, user agents may ignore the entirety of this specification + altogether for such documents, and instead invoke special parse modes + with a greater emphasis on backwards compatibility. */ + + $this->phase = self::ROOT_PHASE; + return $this->rootElementPhase($token); + + /* A DOCTYPE token marked as being correct */ + } elseif(isset($token['error']) && !$token['error']) { + /* Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token (which will be + "HTML"), and the other attributes specific to DocumentType objects + set to null, empty lists, or the empty string as appropriate. */ + $doctype = new DOMDocumentType(null, null, 'HTML'); + + /* Then, switch to the root element phase of the tree construction + stage. */ + $this->phase = self::ROOT_PHASE; + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', + $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + } + } + + private function rootElementPhase($token) { + /* After the initial phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED + (FF), or U+0020 SPACE + A start tag token + An end tag token + An end-of-file token */ + } elseif(($token['type'] === HTML5::CHARACTR && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF) { + /* Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Switch to the main + phase and reprocess the current token. */ + $html = $this->dom->createElement('html'); + $this->dom->appendChild($html); + $this->stack[] = $html; + + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + } + } + + private function mainPhase($token) { + /* Tokens in the main phase must be handled as follows: */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A start tag token with the tag name "html" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { + /* If this start tag token was not the first start tag token, then + it is a parse error. */ + + /* For each attribute on the token, check to see if the attribute + is already present on the top element of the stack of open elements. + If it is not, add the attribute and its corresponding value to that + element. */ + foreach($token['attr'] as $attr) { + if(!$this->stack[0]->hasAttribute($attr['name'])) { + $this->stack[0]->setAttribute($attr['name'], $attr['value']); + } + } + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Anything else. */ + } else { + /* Depends on the insertion mode: */ + switch($this->mode) { + case self::BEFOR_HEAD: return $this->beforeHead($token); break; + case self::IN_HEAD: return $this->inHead($token); break; + case self::AFTER_HEAD: return $this->afterHead($token); break; + case self::IN_BODY: return $this->inBody($token); break; + case self::IN_TABLE: return $this->inTable($token); break; + case self::IN_CAPTION: return $this->inCaption($token); break; + case self::IN_CGROUP: return $this->inColumnGroup($token); break; + case self::IN_TBODY: return $this->inTableBody($token); break; + case self::IN_ROW: return $this->inRow($token); break; + case self::IN_CELL: return $this->inCell($token); break; + case self::IN_SELECT: return $this->inSelect($token); break; + case self::AFTER_BODY: return $this->afterBody($token); break; + case self::IN_FRAME: return $this->inFrameset($token); break; + case self::AFTR_FRAME: return $this->afterFrameset($token); break; + case self::END_PHASE: return $this->trailingEndPhase($token); break; + } + } + } + + private function beforeHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "head" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { + /* Create an element for the token, append the new element to the + current node and push it onto the stack of open elements. */ + $element = $this->insertElement($token); + + /* Set the head element pointer to this new element node. */ + $this->head_pointer = $element; + + /* Change the insertion mode to "in head". */ + $this->mode = self::IN_HEAD; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title". Or an end tag with the tag name "html". + Or a character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or any other start tag token */ + } elseif($token['type'] === HTML5::STARTTAG || + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || + ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', + $token['data']))) { + /* Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. */ + $this->beforeHead(array( + 'name' => 'head', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inHead($token); + + /* Any other end tag */ + } elseif($token['type'] === HTML5::ENDTAG) { + /* Parse error. Ignore the token. */ + } + } + + private function inHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. + + THIS DIFFERS FROM THE SPEC: If the current node is either a title, style + or script element, append the character to the current node regardless + of its content. */ + if(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( + $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, + array('title', 'style', 'script')))) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('title', 'style', 'script'))) { + array_pop($this->stack); + return HTML5::PCDATA; + + /* A start tag with the tag name "title" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $element = $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the RCDATA state. */ + return HTML5::RCDATA; + + /* A start tag with the tag name "style" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "script" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { + /* Create an element for the token. */ + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "base", "link", or "meta" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta'))) { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + array_pop($this->stack); + + } else { + $this->insertElement($token); + } + + /* An end tag with the tag name "head" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { + /* If the current node is a head element, pop the current node off + the stack of open elements. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + array_pop($this->stack); + + /* Otherwise, this is a parse error. */ + } else { + // k + } + + /* Change the insertion mode to "after head". */ + $this->mode = self::AFTER_HEAD; + + /* A start tag with the tag name "head" or an end tag except "html". */ + } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* If the current node is a head element, act as if an end tag + token with the tag name "head" had been seen. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + $this->inHead(array( + 'name' => 'head', + 'type' => HTML5::ENDTAG + )); + + /* Otherwise, change the insertion mode to "after head". */ + } else { + $this->mode = self::AFTER_HEAD; + } + + /* Then, reprocess the current token. */ + return $this->afterHead($token); + } + } + + private function afterHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "body" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { + /* Insert a body element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in body". */ + $this->mode = self::IN_BODY; + + /* A start tag token with the tag name "frameset" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { + /* Insert a frameset element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in frameset". */ + $this->mode = self::IN_FRAME; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta', 'script', 'style', 'title'))) { + /* Parse error. Switch the insertion mode back to "in head" and + reprocess the token. */ + $this->mode = self::IN_HEAD; + return $this->inHead($token); + + /* Anything else */ + } else { + /* Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. */ + $this->afterHead(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inBody($token); + } + } + + private function inBody($token) { + /* Handle the token as follows: */ + + switch($token['type']) { + /* A character token */ + case HTML5::CHARACTR: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + break; + + /* A comment token */ + case HTML5::COMMENT: + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + break; + + case HTML5::STARTTAG: + switch($token['name']) { + /* A start tag token whose tag name is one of: "script", + "style" */ + case 'script': case 'style': + /* Process the token as if the insertion mode had been "in + head". */ + return $this->inHead($token); + break; + + /* A start tag token whose tag name is one of: "base", "link", + "meta", "title" */ + case 'base': case 'link': case 'meta': case 'title': + /* Parse error. Process the token as if the insertion mode + had been "in head". */ + return $this->inHead($token); + break; + + /* A start tag token with the tag name "body" */ + case 'body': + /* Parse error. If the second element on the stack of open + elements is not a body element, or, if the stack of open + elements has only one node on it, then ignore the token. + (innerHTML case) */ + if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { + // Ignore + + /* Otherwise, for each attribute on the token, check to see + if the attribute is already present on the body element (the + second element) on the stack of open elements. If it is not, + add the attribute and its corresponding value to that + element. */ + } else { + foreach($token['attr'] as $attr) { + if(!$this->stack[1]->hasAttribute($attr['name'])) { + $this->stack[1]->setAttribute($attr['name'], $attr['value']); + } + } + } + break; + + /* A start tag whose tag name is one of: "address", + "blockquote", "center", "dir", "div", "dl", "fieldset", + "listing", "menu", "ol", "p", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'p': case 'ul': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "form" */ + case 'form': + /* If the form element pointer is not null, ignore the + token with a parse error. */ + if($this->form_pointer !== null) { + // Ignore. + + /* Otherwise: */ + } else { + /* If the stack of open elements has a p element in + scope, then act as if an end tag with the tag name p + had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token, and set the + form element pointer to point to the element created. */ + $element = $this->insertElement($token); + $this->form_pointer = $element; + } + break; + + /* A start tag whose tag name is "li", "dd" or "dt" */ + case 'li': case 'dd': case 'dt': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + $stack_length = count($this->stack) - 1; + + for($n = $stack_length; 0 <= $n; $n--) { + /* 1. Initialise node to be the current node (the + bottommost node of the stack). */ + $stop = false; + $node = $this->stack[$n]; + $cat = $this->getElementCategory($node->tagName); + + /* 2. If node is an li, dd or dt element, then pop all + the nodes from the current node up to node, including + node, then stop this algorithm. */ + if($token['name'] === $node->tagName || ($token['name'] !== 'li' + && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { + for($x = $stack_length; $x >= $n ; $x--) { + array_pop($this->stack); + } + + break; + } + + /* 3. If node is not in the formatting category, and is + not in the phrasing category, and is not an address or + div element, then stop this algorithm. */ + if($cat !== self::FORMATTING && $cat !== self::PHRASING && + $node->tagName !== 'address' && $node->tagName !== 'div') { + break; + } + } + + /* Finally, insert an HTML element with the same tag + name as the token's. */ + $this->insertElement($token); + break; + + /* A start tag token whose tag name is "plaintext" */ + case 'plaintext': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + return HTML5::PLAINTEXT; + break; + + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + this is a parse error; pop elements from the stack until an + element with one of those tag names has been popped from the + stack. */ + while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { + array_pop($this->stack); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "a" */ + case 'a': + /* If the list of active formatting elements contains + an element whose tag name is "a" between the end of the + list and the last marker on the list (or the start of + the list if there is no marker on the list), then this + is a parse error; act as if an end tag with the tag name + "a" had been seen, then remove that element from the list + of active formatting elements and the stack of open + elements if the end tag didn't already remove it (it + might not have if the element is not in table scope). */ + $leng = count($this->a_formatting); + + for($n = $leng - 1; $n >= 0; $n--) { + if($this->a_formatting[$n] === self::MARKER) { + break; + + } elseif($this->a_formatting[$n]->nodeName === 'a') { + $this->emitToken(array( + 'name' => 'a', + 'type' => HTML5::ENDTAG + )); + break; + } + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag whose tag name is one of: "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'b': case 'big': case 'em': case 'font': case 'i': + case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag token whose tag name is "button" */ + case 'button': + /* If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. (We don't + do that. Unnecessary.) */ + if($this->elementInScope('button')) { + $this->inBody(array( + 'name' => 'button', + 'type' => HTML5::ENDTAG + )); + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is one of: "marquee", "object" */ + case 'marquee': case 'object': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is "xmp" */ + case 'xmp': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Switch the content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "table" */ + case 'table': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + break; + + /* A start tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'img': case 'param': case 'spacer': + case 'wbr': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "hr" */ + case 'hr': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "image" */ + case 'image': + /* Parse error. Change the token's tag name to "img" and + reprocess it. (Don't ask.) */ + $token['name'] = 'img'; + return $this->inBody($token); + break; + + /* A start tag whose tag name is "input" */ + case 'input': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an input element for the token. */ + $element = $this->insertElement($token, false); + + /* If the form element pointer is not null, then associate the + input element with the form element pointed to by the form + element pointer. */ + $this->form_pointer !== null + ? $this->form_pointer->appendChild($element) + : end($this->stack)->appendChild($element); + + /* Pop that input element off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "isindex" */ + case 'isindex': + /* Parse error. */ + // w/e + + /* If the form element pointer is not null, + then ignore the token. */ + if($this->form_pointer === null) { + /* Act as if a start tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a stream of character tokens had been seen. */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if a start tag token with the tag name "input" + had been seen, with all the attributes from the "isindex" + token, except with the "name" attribute set to the value + "isindex" (ignoring any explicit "name" attribute). */ + $attr = $token['attr']; + $attr[] = array('name' => 'name', 'value' => 'isindex'); + + $this->inBody(array( + 'name' => 'input', + 'type' => HTML5::STARTTAG, + 'attr' => $attr + )); + + /* Act as if a stream of character tokens had been seen + (see below for what they should say). */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if an end tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'form', + 'type' => HTML5::ENDTAG + )); + } + break; + + /* A start tag whose tag name is "textarea" */ + case 'textarea': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the + RCDATA state. */ + return HTML5::RCDATA; + break; + + /* A start tag whose tag name is one of: "iframe", "noembed", + "noframes" */ + case 'iframe': case 'noembed': case 'noframes': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "select" */ + case 'select': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in select". */ + $this->mode = self::IN_SELECT; + break; + + /* A start or end tag whose tag name is one of: "caption", "col", + "colgroup", "frame", "frameset", "head", "option", "optgroup", + "tbody", "td", "tfoot", "th", "thead", "tr". */ + case 'caption': case 'col': case 'colgroup': case 'frame': + case 'frameset': case 'head': case 'option': case 'optgroup': + case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': + case 'tr': + // Parse error. Ignore the token. + break; + + /* A start or end tag whose tag name is one of: "event-source", + "section", "nav", "article", "aside", "header", "footer", + "datagrid", "command" */ + case 'event-source': case 'section': case 'nav': case 'article': + case 'aside': case 'header': case 'footer': case 'datagrid': + case 'command': + // Work in progress! + break; + + /* A start tag token not covered by the previous entries */ + default: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + $this->insertElement($token); + break; + } + break; + + case HTML5::ENDTAG: + switch($token['name']) { + /* An end tag with the tag name "body" */ + case 'body': + /* If the second element in the stack of open elements is + not a body element, this is a parse error. Ignore the token. + (innerHTML case) */ + if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { + // Ignore. + + /* If the current node is not the body element, then this + is a parse error. */ + } elseif(end($this->stack)->nodeName !== 'body') { + // Parse error. + } + + /* Change the insertion mode to "after body". */ + $this->mode = self::AFTER_BODY; + break; + + /* An end tag with the tag name "html" */ + case 'html': + /* Act as if an end tag with tag name "body" had been seen, + then, if that token wasn't ignored, reprocess the current + token. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::ENDTAG + )); + + return $this->afterBody($token); + break; + + /* An end tag whose tag name is one of: "address", "blockquote", + "center", "dir", "div", "dl", "fieldset", "listing", "menu", + "ol", "pre", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'pre': case 'ul': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with + the same tag name as that of the token, then this + is a parse error. */ + // w/e + + /* If the stack of open elements has an element in + scope with the same tag name as that of the token, + then pop elements from this stack until an element + with that tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is "form" */ + case 'form': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + } + + if(end($this->stack)->nodeName !== $token['name']) { + /* Now, if the current node is not an element with the + same tag name as that of the token, then this is a parse + error. */ + // w/e + + } else { + /* Otherwise, if the current node is an element with + the same tag name as that of the token pop that element + from the stack. */ + array_pop($this->stack); + } + + /* In any case, set the form element pointer to null. */ + $this->form_pointer = null; + break; + + /* An end tag whose tag name is "p" */ + case 'p': + /* If the stack of open elements has a p element in scope, + then generate implied end tags, except for p elements. */ + if($this->elementInScope('p')) { + $this->generateImpliedEndTags(array('p')); + + /* If the current node is not a p element, then this is + a parse error. */ + // k + + /* If the stack of open elements has a p element in + scope, then pop elements from this stack until the stack + no longer has a p element in scope. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->elementInScope('p')) { + array_pop($this->stack); + + } else { + break; + } + } + } + break; + + /* An end tag whose tag name is "dd", "dt", or "li" */ + case 'dd': case 'dt': case 'li': + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + generate implied end tags, except for elements with the + same tag name as the token. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(array($token['name'])); + + /* If the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + pop elements from this stack until an element with that + tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + generate implied end tags. */ + if($this->elementInScope($elements)) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as that of the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has in scope an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or + "h6", then pop elements from the stack until an element + with one of those tag names has been popped from the stack. */ + while($this->elementInScope($elements)) { + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "a", "b", "big", "em", + "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'a': case 'b': case 'big': case 'em': case 'font': + case 'i': case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* 1. Let the formatting element be the last element in + the list of active formatting elements that: + * is between the end of the list and the last scope + marker in the list, if any, or the start of the list + otherwise, and + * has the same tag name as the token. + */ + while(true) { + for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { + if($this->a_formatting[$a] === self::MARKER) { + break; + + } elseif($this->a_formatting[$a]->tagName === $token['name']) { + $formatting_element = $this->a_formatting[$a]; + $in_stack = in_array($formatting_element, $this->stack, true); + $fe_af_pos = $a; + break; + } + } + + /* If there is no such node, or, if that node is + also in the stack of open elements but the element + is not in scope, then this is a parse error. Abort + these steps. The token is ignored. */ + if(!isset($formatting_element) || ($in_stack && + !$this->elementInScope($token['name']))) { + break; + + /* Otherwise, if there is such a node, but that node + is not in the stack of open elements, then this is a + parse error; remove the element from the list, and + abort these steps. */ + } elseif(isset($formatting_element) && !$in_stack) { + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 2. Let the furthest block be the topmost node in the + stack of open elements that is lower in the stack + than the formatting element, and is not an element in + the phrasing or formatting categories. There might + not be one. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $length = count($this->stack); + + for($s = $fe_s_pos + 1; $s < $length; $s++) { + $category = $this->getElementCategory($this->stack[$s]->nodeName); + + if($category !== self::PHRASING && $category !== self::FORMATTING) { + $furthest_block = $this->stack[$s]; + } + } + + /* 3. If there is no furthest block, then the UA must + skip the subsequent steps and instead just pop all + the nodes from the bottom of the stack of open + elements, from the current node up to the formatting + element, and remove the formatting element from the + list of active formatting elements. */ + if(!isset($furthest_block)) { + for($n = $length - 1; $n >= $fe_s_pos; $n--) { + array_pop($this->stack); + } + + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 4. Let the common ancestor be the element + immediately above the formatting element in the stack + of open elements. */ + $common_ancestor = $this->stack[$fe_s_pos - 1]; + + /* 5. If the furthest block has a parent node, then + remove the furthest block from its parent node. */ + if($furthest_block->parentNode !== null) { + $furthest_block->parentNode->removeChild($furthest_block); + } + + /* 6. Let a bookmark note the position of the + formatting element in the list of active formatting + elements relative to the elements on either side + of it in the list. */ + $bookmark = $fe_af_pos; + + /* 7. Let node and last node be the furthest block. + Follow these steps: */ + $node = $furthest_block; + $last_node = $furthest_block; + + while(true) { + for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { + /* 7.1 Let node be the element immediately + prior to node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 7.2 If node is not in the list of active + formatting elements, then remove node from + the stack of open elements and then go back + to step 1. */ + if(!in_array($node, $this->a_formatting, true)) { + unset($this->stack[$n]); + $this->stack = array_merge($this->stack); + + } else { + break; + } + } + + /* 7.3 Otherwise, if node is the formatting + element, then go to the next step in the overall + algorithm. */ + if($node === $formatting_element) { + break; + + /* 7.4 Otherwise, if last node is the furthest + block, then move the aforementioned bookmark to + be immediately after the node in the list of + active formatting elements. */ + } elseif($last_node === $furthest_block) { + $bookmark = array_search($node, $this->a_formatting, true) + 1; + } + + /* 7.5 If node has any children, perform a + shallow clone of node, replace the entry for + node in the list of active formatting elements + with an entry for the clone, replace the entry + for node in the stack of open elements with an + entry for the clone, and let node be the clone. */ + if($node->hasChildNodes()) { + $clone = $node->cloneNode(); + $s_pos = array_search($node, $this->stack, true); + $a_pos = array_search($node, $this->a_formatting, true); + + $this->stack[$s_pos] = $clone; + $this->a_formatting[$a_pos] = $clone; + $node = $clone; + } + + /* 7.6 Insert last node into node, first removing + it from its previous parent node if any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $node->appendChild($last_node); + + /* 7.7 Let last node be node. */ + $last_node = $node; + } + + /* 8. Insert whatever last node ended up being in + the previous step into the common ancestor node, + first removing it from its previous parent node if + any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $common_ancestor->appendChild($last_node); + + /* 9. Perform a shallow clone of the formatting + element. */ + $clone = $formatting_element->cloneNode(); + + /* 10. Take all of the child nodes of the furthest + block and append them to the clone created in the + last step. */ + while($furthest_block->hasChildNodes()) { + $child = $furthest_block->firstChild; + $furthest_block->removeChild($child); + $clone->appendChild($child); + } + + /* 11. Append that clone to the furthest block. */ + $furthest_block->appendChild($clone); + + /* 12. Remove the formatting element from the list + of active formatting elements, and insert the clone + into the list of active formatting elements at the + position of the aforementioned bookmark. */ + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); + + /* 13. Remove the formatting element from the stack + of open elements, and insert the clone into the stack + of open elements immediately after (i.e. in a more + deeply nested position than) the position of the + furthest block in that stack. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $fb_s_pos = array_search($furthest_block, $this->stack, true); + unset($this->stack[$fe_s_pos]); + + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); + $this->stack = array_merge($s_part1, array($clone), $s_part2); + + /* 14. Jump back to step 1 in this series of steps. */ + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); + } + break; + + /* An end tag token whose tag name is one of: "button", + "marquee", "object" */ + case 'button': case 'marquee': case 'object': + /* If the stack of open elements has an element in scope whose + tag name matches the tag name of the token, then generate implied + tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // k + + /* Now, if the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then pop + elements from the stack until that element has been popped from + the stack, and clear the list of active formatting elements up + to the last marker. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); + + for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { + array_pop($this->a_formatting); + } + } + break; + + /* Or an end tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "hr", "iframe", "image", "img", + "input", "isindex", "noembed", "noframes", "param", "select", + "spacer", "table", "textarea", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'hr': case 'iframe': case 'image': + case 'img': case 'input': case 'isindex': case 'noembed': + case 'noframes': case 'param': case 'select': case 'spacer': + case 'table': case 'textarea': case 'wbr': + // Parse error. Ignore the token. + break; + + /* An end tag token not covered by the previous entries */ + default: + for($n = count($this->stack) - 1; $n >= 0; $n--) { + /* Initialise node to be the current node (the bottommost + node of the stack). */ + $node = end($this->stack); + + /* If node has the same tag name as the end tag token, + then: */ + if($token['name'] === $node->nodeName) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* If the tag name of the end tag token does not + match the tag name of the current node, this is a + parse error. */ + // k + + /* Pop all the nodes from the current node up to + node, including node, then stop this algorithm. */ + for($x = count($this->stack) - $n; $x >= $n; $x--) { + array_pop($this->stack); + } + + } else { + $category = $this->getElementCategory($node); + + if($category !== self::SPECIAL && $category !== self::SCOPING) { + /* Otherwise, if node is in neither the formatting + category nor the phrasing category, then this is a + parse error. Stop this algorithm. The end tag token + is ignored. */ + return false; + } + } + } + break; + } + break; + } + } + + private function inTable($token) { + $clear = array('html', 'table'); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "caption" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'caption') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + + /* Insert an HTML element for the token, then switch the + insertion mode to "in caption". */ + $this->insertElement($token); + $this->mode = self::IN_CAPTION; + + /* A start tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'colgroup') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the + insertion mode to "in column group". */ + $this->insertElement($token); + $this->mode = self::IN_CGROUP; + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'col') { + $this->inTable(array( + 'name' => 'colgroup', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + $this->inColumnGroup($token); + + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('tbody', 'tfoot', 'thead'))) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in table body". */ + $this->insertElement($token); + $this->mode = self::IN_TBODY; + + /* A start tag whose tag name is one of: "td", "th", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && + in_array($token['name'], array('td', 'th', 'tr'))) { + /* Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. */ + $this->inTable(array( + 'name' => 'tbody', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inTableBody($token); + + /* A start tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'table') { + /* Parse error. Act as if an end tag token with the tag name "table" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inTable(array( + 'name' => 'table', + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + + /* An end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + return false; + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a table element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a table element has been + popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'table') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', + 'tfoot', 'th', 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Parse error. Process the token as if the insertion mode was "in + body", with the following exception: */ + + /* If the current node is a table, tbody, tfoot, thead, or tr + element, then, whenever a node would be inserted into the current + node, it must instead be inserted into the foster parent element. */ + if(in_array(end($this->stack)->nodeName, + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* The foster parent element is the parent element of the last + table element in the stack of open elements, if there is a + table element and it has such a parent element. If there is no + table element in the stack of open elements (innerHTML case), + then the foster parent element is the first element in the + stack of open elements (the html element). Otherwise, if there + is a table element in the stack of open elements, but the last + table element in the stack of open elements has no parent, or + its parent node is not an element, then the foster parent + element is the element before the last table element in the + stack of open elements. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table') { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $table->parentNode !== null) { + $this->foster_parent = $table->parentNode; + + } elseif(!isset($table)) { + $this->foster_parent = $this->stack[0]; + + } elseif(isset($table) && ($table->parentNode === null || + $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { + $this->foster_parent = $this->stack[$n - 1]; + } + } + + $this->inBody($token); + } + } + + private function inCaption($token) { + /* An end tag whose tag name is "caption" */ + if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a caption element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a caption element has + been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === 'caption') { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag + name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table')) { + /* Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inCaption(array( + 'name' => 'caption', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + + /* An end tag whose tag name is one of: "body", "col", "colgroup", + "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', + 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inColumnGroup($token) { + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { + /* Insert a col element for the token. Immediately pop the current + node off the stack of open elements. */ + $this->insertElement($token); + array_pop($this->stack); + + /* An end tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'colgroup') { + /* If the current node is the root html element, then this is a + parse error, ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + /* Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". */ + } else { + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* An end tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current token. */ + $this->inColumnGroup(array( + 'name' => 'colgroup', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + } + } + + private function inTableBody($token) { + $clear = array('tbody', 'tfoot', 'thead', 'html'); + + /* A start tag whose tag name is "tr" */ + if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Insert a tr element for the token, then switch the insertion + mode to "in row". */ + $this->insertElement($token); + $this->mode = self::IN_ROW; + + /* A start tag whose tag name is one of: "th", "td" */ + } elseif($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inTableBody(array( + 'name' => 'tr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inRow($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node from the stack of open elements. Switch + the insertion mode to "in table". */ + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { + /* If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (innerHTML case) */ + if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Act as if an end tag with the same tag name as the current + node ("tbody", "tfoot", or "thead") had been seen, then + reprocess the current token. */ + $this->inTableBody(array( + 'name' => end($this->stack)->nodeName, + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inRow($token) { + $clear = array('tr', 'html'); + + /* A start tag whose tag name is one of: "th", "td" */ + if($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in cell". */ + $this->insertElement($token); + $this->mode = self::IN_CELL; + + /* Insert a marker at the end of the list of active formatting + elements. */ + $this->a_formatting[] = self::MARKER; + + /* An end tag whose tag name is "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node (which will be a tr element) from the + stack of open elements. Switch the insertion mode to "in table + body". */ + array_pop($this->stack); + $this->mode = self::IN_TBODY; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Otherwise, act as if an end tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inCell($token) { + /* An end tag whose tag name is one of: "td", "th" */ + if($token['type'] === HTML5::ENDTAG && + ($token['name'] === 'td' || $token['name'] === 'th')) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is a + parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Generate implied end tags, except for elements with the same + tag name as the token. */ + $this->generateImpliedEndTags(array($token['name'])); + + /* Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. */ + // k + + /* Pop elements from this stack until an element with the same + tag name as the token has been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === $token['name']) { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in row". (The current node + will be a tr element at this point.) */ + $this->mode = self::IN_ROW; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html'))) { + /* Parse error. Ignore the token. */ + + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can only + happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), + then this is a parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inSelect($token) { + /* Handle the token as follows: */ + + /* A character token */ + if($token['type'] === HTML5::CHARACTR) { + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'option') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* A start tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'optgroup') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. */ + if(end($this->stack)->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'optgroup', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* An end tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'optgroup') { + /* First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an optgroup + element, then act as if an end tag with the tag name "option" had + been seen. */ + $elements_in_stack = count($this->stack); + + if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if($this->stack[$elements_in_stack - 1] === 'optgroup') { + array_pop($this->stack); + } + + /* An end tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'option') { + /* If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if(end($this->stack)->nodeName === 'option') { + array_pop($this->stack); + } + + /* An end tag whose tag name is "select" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'select') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // w/e + + /* Otherwise: */ + } else { + /* Pop elements from the stack of open elements until a select + element has been popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'select') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* A start tag whose tag name is "select" */ + } elseif($token['name'] === 'select' && + $token['type'] === HTML5::STARTTAG) { + /* Parse error. Act as if the token had been an end tag with the + tag name "select" instead. */ + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + /* An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" */ + } elseif(in_array($token['name'], array('caption', 'table', 'tbody', + 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { + /* Parse error. */ + // w/e + + /* If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end tag + with the tag name "select" had been seen, and reprocess the token. + Otherwise, ignore the token. */ + if($this->elementInScope($token['name'], true)) { + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + $this->mainPhase($token); + } + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterBody($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed if the insertion mode + was "in body". */ + $this->inBody($token); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->stack[0]->appendChild($comment); + + /* An end tag with the tag name "html" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { + /* If the parser was originally created in order to handle the + setting of an element's innerHTML attribute, this is a parse error; + ignore the token. (The element will be an html element in this + case.) (innerHTML case) */ + + /* Otherwise, switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* Anything else */ + } else { + /* Parse error. Set the insertion mode to "in body" and reprocess + the token. */ + $this->mode = self::IN_BODY; + return $this->inBody($token); + } + } + + private function inFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::STARTTAG) { + $this->insertElement($token); + + /* An end tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::ENDTAG) { + /* If the current node is the root html element, then this is a + parse error; ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + } else { + /* Otherwise, pop the current node from the stack of open + elements. */ + array_pop($this->stack); + + /* If the parser was not originally created in order to handle + the setting of an element's innerHTML attribute (innerHTML case), + and the current node is no longer a frameset element, then change + the insertion mode to "after frameset". */ + $this->mode = self::AFTR_FRAME; + } + + /* A start tag with the tag name "frame" */ + } elseif($token['name'] === 'frame' && + $token['type'] === HTML5::STARTTAG) { + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* An end tag with the tag name "html" */ + } elseif($token['name'] === 'html' && + $token['type'] === HTML5::ENDTAG) { + /* Switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function trailingEndPhase($token) { + /* After the main phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed in the main phase. */ + $this->mainPhase($token); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or a start tag token. Or an end tag token. */ + } elseif(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { + /* Parse error. Switch back to the main phase and reprocess the + token. */ + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* OMG DONE!! */ + } + } + + private function insertElement($token, $append = true) { + $el = $this->dom->createElement($token['name']); + + foreach($token['attr'] as $attr) { + if(!$el->hasAttribute($attr['name'])) { + $el->setAttribute($attr['name'], $attr['value']); + } + } + + $this->appendToRealParent($el); + $this->stack[] = $el; + + return $el; + } + + private function insertText($data) { + $text = $this->dom->createTextNode($data); + $this->appendToRealParent($text); + } + + private function insertComment($data) { + $comment = $this->dom->createComment($data); + $this->appendToRealParent($comment); + } + + private function appendToRealParent($node) { + if($this->foster_parent === null) { + end($this->stack)->appendChild($node); + + } elseif($this->foster_parent !== null) { + /* If the foster parent element is the parent element of the + last table element in the stack of open elements, then the new + node must be inserted immediately before the last table element + in the stack of open elements in the foster parent element; + otherwise, the new node must be appended to the foster parent + element. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table' && + $this->stack[$n]->parentNode !== null) { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) + $this->foster_parent->insertBefore($node, $table); + else + $this->foster_parent->appendChild($node); + + $this->foster_parent = null; + } + } + + private function elementInScope($el, $table = false) { + if(is_array($el)) { + foreach($el as $element) { + if($this->elementInScope($element, $table)) { + return true; + } + } + + return false; + } + + $leng = count($this->stack); + + for($n = 0; $n < $leng; $n++) { + /* 1. Initialise node to be the current node (the bottommost node of + the stack). */ + $node = $this->stack[$leng - 1 - $n]; + + if($node->tagName === $el) { + /* 2. If node is the target node, terminate in a match state. */ + return true; + + } elseif($node->tagName === 'table') { + /* 3. Otherwise, if node is a table element, terminate in a failure + state. */ + return false; + + } elseif($table === true && in_array($node->tagName, array('caption', 'td', + 'th', 'button', 'marquee', 'object'))) { + /* 4. Otherwise, if the algorithm is the "has an element in scope" + variant (rather than the "has an element in table scope" variant), + and node is one of the following, terminate in a failure state. */ + return false; + + } elseif($node === $node->ownerDocument->documentElement) { + /* 5. Otherwise, if node is an html element (root element), terminate + in a failure state. (This can only happen if the node is the topmost + node of the stack of open elements, and prevents the next step from + being invoked if there are no more elements in the stack.) */ + return false; + } + + /* Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. (This will never fail, since the loop + will always terminate in the previous step if the top of the stack + is reached.) */ + } + } + + private function reconstructActiveFormattingElements() { + /* 1. If there are no entries in the list of active formatting elements, + then there is nothing to reconstruct; stop this algorithm. */ + $formatting_elements = count($this->a_formatting); + + if($formatting_elements === 0) { + return false; + } + + /* 3. Let entry be the last (most recently added) element in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. If the last (most recently added) entry in the list of active + formatting elements is a marker, or if it is an element that is in the + stack of open elements, then there is nothing to reconstruct; stop this + algorithm. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + return false; + } + + for($a = $formatting_elements - 1; $a >= 0; true) { + /* 4. If there are no entries before entry in the list of active + formatting elements, then jump to step 8. */ + if($a === 0) { + $step_seven = false; + break; + } + + /* 5. Let entry be the entry one earlier than entry in the list of + active formatting elements. */ + $a--; + $entry = $this->a_formatting[$a]; + + /* 6. If entry is neither a marker nor an element that is also in + thetack of open elements, go to step 4. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + break; + } + } + + while(true) { + /* 7. Let entry be the element one later than entry in the list of + active formatting elements. */ + if(isset($step_seven) && $step_seven === true) { + $a++; + $entry = $this->a_formatting[$a]; + } + + /* 8. Perform a shallow clone of the element entry to obtain clone. */ + $clone = $entry->cloneNode(); + + /* 9. Append clone to the current node and push it onto the stack + of open elements so that it is the new current node. */ + end($this->stack)->appendChild($clone); + $this->stack[] = $clone; + + /* 10. Replace the entry for entry in the list with an entry for + clone. */ + $this->a_formatting[$a] = $clone; + + /* 11. If the entry for clone in the list of active formatting + elements is not the last entry in the list, return to step 7. */ + if(end($this->a_formatting) !== $clone) { + $step_seven = true; + } else { + break; + } + } + } + + private function clearTheActiveFormattingElementsUpToTheLastMarker() { + /* When the steps below require the UA to clear the list of active + formatting elements up to the last marker, the UA must perform the + following steps: */ + + while(true) { + /* 1. Let entry be the last (most recently added) entry in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. Remove entry from the list of active formatting elements. */ + array_pop($this->a_formatting); + + /* 3. If entry was a marker, then stop the algorithm at this point. + The list has been cleared up to the last marker. */ + if($entry === self::MARKER) { + break; + } + } + } + + private function generateImpliedEndTags(array $exclude = array()) { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must + act as if an end tag with the respective tag name had been seen and + then generate implied end tags again. */ + $node = end($this->stack); + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); + + while(in_array(end($this->stack)->nodeName, $elements)) { + array_pop($this->stack); + } + } + + private function getElementCategory($name) { + if(in_array($name, $this->special)) + return self::SPECIAL; + + elseif(in_array($name, $this->scoping)) + return self::SCOPING; + + elseif(in_array($name, $this->formatting)) + return self::FORMATTING; + + else + return self::PHRASING; + } + + private function clearStackToTableContext($elements) { + /* When the steps above require the UA to clear the stack back to a + table context, it means that the UA must, while the current node is not + a table element or an html element, pop elements from the stack of open + elements. If this causes any elements to be popped from the stack, then + this is a parse error. */ + while(true) { + $node = end($this->stack)->nodeName; + + if(in_array($node, $elements)) { + break; + } else { + array_pop($this->stack); + } + } + } + + private function resetInsertionMode() { + /* 1. Let last be false. */ + $last = false; + $leng = count($this->stack); + + for($n = $leng - 1; $n >= 0; $n--) { + /* 2. Let node be the last node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 3. If node is the first node in the stack of open elements, then + set last to true. If the element whose innerHTML attribute is being + set is neither a td element nor a th element, then set node to the + element whose innerHTML attribute is being set. (innerHTML case) */ + if($this->stack[0]->isSameNode($node)) { + $last = true; + } + + /* 4. If node is a select element, then switch the insertion mode to + "in select" and abort these steps. (innerHTML case) */ + if($node->nodeName === 'select') { + $this->mode = self::IN_SELECT; + break; + + /* 5. If node is a td or th element, then switch the insertion mode + to "in cell" and abort these steps. */ + } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { + $this->mode = self::IN_CELL; + break; + + /* 6. If node is a tr element, then switch the insertion mode to + "in row" and abort these steps. */ + } elseif($node->nodeName === 'tr') { + $this->mode = self::IN_ROW; + break; + + /* 7. If node is a tbody, thead, or tfoot element, then switch the + insertion mode to "in table body" and abort these steps. */ + } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { + $this->mode = self::IN_TBODY; + break; + + /* 8. If node is a caption element, then switch the insertion mode + to "in caption" and abort these steps. */ + } elseif($node->nodeName === 'caption') { + $this->mode = self::IN_CAPTION; + break; + + /* 9. If node is a colgroup element, then switch the insertion mode + to "in column group" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'colgroup') { + $this->mode = self::IN_CGROUP; + break; + + /* 10. If node is a table element, then switch the insertion mode + to "in table" and abort these steps. */ + } elseif($node->nodeName === 'table') { + $this->mode = self::IN_TABLE; + break; + + /* 11. If node is a head element, then switch the insertion mode + to "in body" ("in body"! not "in head"!) and abort these steps. + (innerHTML case) */ + } elseif($node->nodeName === 'head') { + $this->mode = self::IN_BODY; + break; + + /* 12. If node is a body element, then switch the insertion mode to + "in body" and abort these steps. */ + } elseif($node->nodeName === 'body') { + $this->mode = self::IN_BODY; + break; + + /* 13. If node is a frameset element, then switch the insertion + mode to "in frameset" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'frameset') { + $this->mode = self::IN_FRAME; + break; + + /* 14. If node is an html element, then: if the head element + pointer is null, switch the insertion mode to "before head", + otherwise, switch the insertion mode to "after head". In either + case, abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'html') { + $this->mode = ($this->head_pointer === null) + ? self::BEFOR_HEAD + : self::AFTER_HEAD; + + break; + + /* 15. If last is true, then set the insertion mode to "in body" + and abort these steps. (innerHTML case) */ + } elseif($last) { + $this->mode = self::IN_BODY; + break; + } + } + } + + private function closeCell() { + /* If the stack of open elements has a td or th element in table scope, + then act as if an end tag token with that tag name had been seen. */ + foreach(array('td', 'th') as $cell) { + if($this->elementInScope($cell, true)) { + $this->inCell(array( + 'name' => $cell, + 'type' => HTML5::ENDTAG + )); + + break; + } + } + } + + public function save() { + return $this->dom; + } +} +?> diff --git a/maintenance/flush-definition-cache.php b/maintenance/flush-definition-cache.php index 6d51ab06..93af3b75 100755 --- a/maintenance/flush-definition-cache.php +++ b/maintenance/flush-definition-cache.php @@ -32,5 +32,5 @@ foreach ($names as $name) { $cache->flush($config); } -echo 'Cache flushed successfully.'; +echo "Cache flushed successfully.\n"; diff --git a/maintenance/generate-ph5p-patch.php b/maintenance/generate-ph5p-patch.php new file mode 100644 index 00000000..ecd9fa3f --- /dev/null +++ b/maintenance/generate-ph5p-patch.php @@ -0,0 +1,13 @@ + PH5P.patch"); +unlink($newt); diff --git a/plugins/phorum/htmlpurifier.php b/plugins/phorum/htmlpurifier.php index 4654c65d..ae2c276c 100644 --- a/plugins/phorum/htmlpurifier.php +++ b/plugins/phorum/htmlpurifier.php @@ -261,12 +261,42 @@ function phorum_htmlpurifier_editor_after_subject() { // don't show this message if it's a WYSIWYG editor, since it will // then be handled automatically if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return; - ?> - HTML input is on. Make sure you escape all HTML and - angled-brackets with &lt; and &gt; (you can also use CDATA - tags, simply wrap the suspect text with -<![CDATA[text]]>. Paragraphs will only be applied to -double-spaces; single-spaces will not generate <br> tags. + ?> +

+ HTML input is enabled. Make sure you escape all HTML and + angled brackets with &lt; and &gt;. +

config; + if ($config->get('AutoFormat', 'AutoParagraph')) { + ?>

+ Auto-paragraphing is enabled. Double + newlines will be converted to paragraphs; for single + newlines, use the pre tag. +

getDefinition('HTML'); + $allowed = array(); + foreach ($html_definition->info as $name => $x) $allowed[] = "$name"; + sort($allowed); + $allowed_text = implode(', ', $allowed); + ?>

Allowed tags: .

+

+

+ For inputting literal code such as HTML and PHP for display, use + CDATA tags to auto-escape your angled brackets, and pre + to preserve newlines: +

+
<pre><![CDATA[
+Place code here
+]]></pre>
+

+ Power users, you can hide this notice with: +

.htmlpurifier-help {display:none;}
+

$top_id) { // test for end condition diff --git a/tests/HTMLPurifier/ChildDef/OptionalTest.php b/tests/HTMLPurifier/ChildDef/OptionalTest.php index 154353df..40dc17ee 100644 --- a/tests/HTMLPurifier/ChildDef/OptionalTest.php +++ b/tests/HTMLPurifier/ChildDef/OptionalTest.php @@ -19,5 +19,9 @@ class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness $this->assertResult('Not allowed text', ''); } + function testEmpty() { + $this->assertResult(''); + } + } diff --git a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php index 256d3a34..e55f96e5 100644 --- a/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php +++ b/tests/HTMLPurifier/ChildDef/StrictBlockquoteTest.php @@ -74,10 +74,11 @@ extends HTMLPurifier_ChildDefHarness } function testError() { - $this->expectError('Cannot use non-block element as block wrapper'); + // $this->expectError('Cannot use non-block element as block wrapper'); $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p'); $this->config->set('HTML', 'BlockWrapper', 'dav'); $this->assertResult('Needs wrap', '

Needs wrap

'); + $this->swallowErrors(); } } diff --git a/tests/HTMLPurifier/ErrorCollectorEMock.php b/tests/HTMLPurifier/ErrorCollectorEMock.php index c0f577d5..d3461331 100644 --- a/tests/HTMLPurifier/ErrorCollectorEMock.php +++ b/tests/HTMLPurifier/ErrorCollectorEMock.php @@ -27,11 +27,20 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock function send($severity, $msg) { // test for context - $test = &$this->_getCurrentTestCase(); + $context =& SimpleTest::getContext(); + $test =& $context->getTest(); + + // compat + if (empty($this->_mock)) { + $mock =& $this; + } else { + $mock =& $this->_mock; + } + foreach ($this->_expected_context as $key => $value) { $test->assertEqual($value, $this->_context->get($key)); } - $step = $this->getCallCount('send'); + $step = $mock->getCallCount('send'); if (isset($this->_expected_context_at[$step])) { foreach ($this->_expected_context_at[$step] as $key => $value) { $test->assertEqual($value, $this->_context->get($key)); @@ -39,7 +48,7 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock } // boilerplate mock code, does not have return value or references $args = func_get_args(); - $this->_invoke('send', $args); + $mock->_invoke('send', $args); } } diff --git a/tests/HTMLPurifier/ErrorsHarness.php b/tests/HTMLPurifier/ErrorsHarness.php index 67f7c6b3..9ab204e7 100644 --- a/tests/HTMLPurifier/ErrorsHarness.php +++ b/tests/HTMLPurifier/ErrorsHarness.php @@ -3,11 +3,15 @@ require_once 'HTMLPurifier/ErrorCollectorEMock.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php'; +/** + * @todo Make the callCount variable actually work, so we can precisely + * specify what errors we want: no more, no less + */ class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness { var $config, $context; - var $collector, $generator; + var $collector, $generator, $callCount; function setup() { $this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true)); @@ -16,6 +20,11 @@ class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness $this->collector = new HTMLPurifier_ErrorCollectorEMock(); $this->collector->prepare($this->context); $this->context->register('ErrorCollector', $this->collector); + $this->callCount = 0; + } + + function expectNoErrorCollection() { + $this->collector->expectNever('send'); } function expectErrorCollection() { diff --git a/tests/HTMLPurifier/IDAccumulatorTest.php b/tests/HTMLPurifier/IDAccumulatorTest.php index 006d689c..c6249eca 100644 --- a/tests/HTMLPurifier/IDAccumulatorTest.php +++ b/tests/HTMLPurifier/IDAccumulatorTest.php @@ -30,5 +30,11 @@ class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness } + function testBuild() { + $this->config->set('Attr', 'IDBlacklist', array('foo')); + $accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context); + $this->assertTrue( isset($accumulator->ids['foo']) ); + } + } diff --git a/tests/HTMLPurifier/Injector/AutoParagraphTest.php b/tests/HTMLPurifier/Injector/AutoParagraphTest.php index 23743dff..5c726a11 100644 --- a/tests/HTMLPurifier/Injector/AutoParagraphTest.php +++ b/tests/HTMLPurifier/Injector/AutoParagraphTest.php @@ -194,10 +194,7 @@ Bar

', } function testNoParagraphSingleInlineNodeInBlockNode() { - $this->assertResult( -'
Foo
', - '
Foo
' - ); + $this->assertResult( '
Foo
' ); } function testParagraphInBlockquote() { @@ -277,9 +274,7 @@ Par1 function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() { $this->assertResult( '
Par1 -
Par2
', -'

Par1 -

Par2
' +
Par2
' ); } @@ -351,6 +346,30 @@ Par2' ); } + function testInlineAndBlockTagInDivNoParagraph() { + $this->assertResult( + '
bar mmm
asdf
' + ); + } + + function testInlineAndBlockTagInDivNeedingParagraph() { + $this->assertResult( +'
bar mmm + +
asdf
', +'

bar mmm

asdf
' + ); + } + + function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() { + $this->assertResult( +'
asdf bar mmm + +
asdf
', +'

asdf bar mmm

asdf
' + ); + } + function testErrorNeeded() { $this->config->set('HTML', 'Allowed', 'b'); $this->expectError('Cannot enable AutoParagraph injector because p is not allowed'); diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index e67a3e44..bf4c374d 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -109,8 +109,9 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness function testInvalidParentError() { // test fallback to div $this->config->set('HTML', 'Parent', 'obviously-impossible'); - $this->expectError('Cannot use unrecognized element as parent'); + // $this->expectError('Cannot use unrecognized element as parent'); $this->assertResult('
Accept
'); + $this->swallowErrors(); } function testCascadingRemovalOfNodesMissingRequiredChildren() { @@ -129,5 +130,10 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness $this->assertResult('
', ''); } + function testStrictBlockquoteInHTML401() { + $this->config->set('HTML', 'Doctype', 'HTML 4.01 Strict'); + $this->assertResult('
text
', '

text

'); + } + } diff --git a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php index 4ba30f36..db5b989f 100644 --- a/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php +++ b/tests/HTMLPurifier/Strategy/FixNesting_ErrorsTest.php @@ -28,6 +28,11 @@ class HTMLPurifier_Strategy_FixNesting_ErrorsTest extends HTMLPurifier_Strategy_ $this->invoke("Valid
Invalid
"); } + function testNoNodeReorganizedForEmptyNode() { + $this->expectNoErrorCollection(); + $this->invoke(""); + } + function testNodeContentsRemoved() { $this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed'); $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1)); diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php index e8e6c797..0249d5ef 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormed_InjectorTest.php @@ -11,6 +11,19 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str $this->obj = new HTMLPurifier_Strategy_MakeWellFormed(); $this->config->set('AutoFormat', 'AutoParagraph', true); $this->config->set('AutoFormat', 'Linkify', true); + generate_mock_once('HTMLPurifier_Injector'); + } + + function testEndNotification() { + $mock = new HTMLPurifier_InjectorMock(); + $mock->skip = false; + $mock->expectAt(0, 'notifyEnd', array(new HTMLPurifier_Token_End('b'))); + $mock->expectAt(1, 'notifyEnd', array(new HTMLPurifier_Token_End('i'))); + $mock->expectCallCount('notifyEnd', 2); + $this->config->set('AutoFormat', 'AutoParagraph', false); + $this->config->set('AutoFormat', 'Linkify', false); + $this->config->set('AutoFormat', 'Custom', array($mock)); + $this->assertResult('asdf', 'asdf'); } function testOnlyAutoParagraph() { @@ -62,4 +75,11 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str ); } + function testParagraphAfterLinkifiedURL() { + $this->assertResult( + "http://google.com\n\nb", + "

http://google.com

b

" + ); + } + } diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php index 19a37b24..4b397b09 100644 --- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php +++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php @@ -82,5 +82,14 @@ alert(<b>bold</b>); ); } + function testRequiredAttributesTestNotPerformedOnEndTag() { + $this->config->set('HTML', 'DefinitionID', + 'HTMLPurifier_Strategy_RemoveForeignElementsTest'. + '->testRequiredAttributesTestNotPerformedOnEndTag'); + $def =& $this->config->getHTMLDefinition(true); + $def->addElement('f', 'Block', 'Optional: #PCDATA', false, array('req*' => 'Text')); + $this->assertResult('Foo Bar'); + } + } diff --git a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php index d509a6a1..51f47358 100644 --- a/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php +++ b/tests/HTMLPurifier/URIFilter/MakeAbsoluteTest.php @@ -111,6 +111,12 @@ class HTMLPurifier_URIFilter_MakeAbsoluteTest extends HTMLPurifier_URIFilterHarn $this->assertFiltering('.', '../'); } + function testRemoveJavaScriptWithEmbeddedLink() { + // credits: NykO18 + $this->setBase('http://www.example.com/'); + $this->assertFiltering('javascript: window.location = \'http://www.example.com\';', false); + } + // error case function testErrorNoBase() { diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php index 3ad307bb..6a221b24 100644 --- a/tests/HTMLPurifierTest.php +++ b/tests/HTMLPurifierTest.php @@ -94,6 +94,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true)); $this->assertPurification('foobar'); + $this->assertPurification('Omigosh!'); } diff --git a/tests/index.php b/tests/index.php index a98e20e6..5063d7ed 100755 --- a/tests/index.php +++ b/tests/index.php @@ -3,7 +3,9 @@ // call one file using /?f=FileTest.php , see $test_files array for // valid values -error_reporting(E_ALL | E_STRICT); +if (version_compare(PHP_VERSION, '5.1', '>=')) error_reporting(E_ALL | E_STRICT); +else error_reporting(E_ALL); + define('HTMLPurifierTest', 1); define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas @@ -17,6 +19,7 @@ $GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") & $simpletest_location = 'simpletest/'; // reasonable guess // load SimpleTest +if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php'; if (file_exists('../test-settings.php')) include '../test-settings.php'; require_once $simpletest_location . 'unit_tester.php'; require_once $simpletest_location . 'reporter.php'; @@ -79,7 +82,7 @@ if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) { } else { - $test = new GroupTest('All Tests'); + $test = new GroupTest('All tests on PHP ' . PHP_VERSION); foreach ($test_files as $test_file) { require_once $test_file; $test->addTestClass(path2class($test_file)); @@ -91,5 +94,3 @@ if (SimpleReporter::inCli()) $reporter = new TextReporter(); else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8'); $test->run($reporter); - - diff --git a/tests/multitest.php b/tests/multitest.php new file mode 100644 index 00000000..1490bc7b --- /dev/null +++ b/tests/multitest.php @@ -0,0 +1,33 @@ +