diff --git a/INSTALL b/INSTALL index e03e2a29..87e61012 100644 --- a/INSTALL +++ b/INSTALL @@ -71,10 +71,6 @@ document's character encoding incorrectly. --------------------------------------------------------------------------- 3. Including the library -WARNING: Currently, the HTMLPurifier.auto.php file is broken due to our -configuration setup. Once ConfigSchema is migrated outside of PHP files, -this information will be correct. - The procedure is quite simple: require_once '/path/to/library/HTMLPurifier.auto.php'; @@ -85,6 +81,58 @@ when you use them. Only the contents in the library/ folder are necessary, so you can remove everything else when using HTML Purifier in a production environment. +Advanced users, read on; other users can skip to section 4. + +Autoload compatibility +---------------------- + + HTML Purifier attempts to be as smart as possible when registering an + autoloader, but there are some cases where you will need to change + your own code to accomodate HTML Purifier. These are those cases: + + PHP VERSION IS LESS THAN 5.1.2, AND YOU'VE DEFINED __autoload + Because spl_autoload_register() doesn't exist in early versions + of PHP 5, HTML Purifier has no way of adding itself to the autoload + stack. Modify your __autoload function to test + HTMLPurifier_Bootstrap::autoload($class) + + For example, suppose your autoload function looks like this: + + function __autoload($class) { + require str_replace('_', '/', $class) . '.php'; + return true; + } + + A modified version with HTML Purifier would look like this: + + function __autoload($class) { + if (HTMLPurifier_Bootstrap::autoload($class)) return true; + require str_replace('_', '/', $class) . '.php'; + return true; + } + + Note that there *is* some custom behavior in our autoloader; the + original autoloader in our example would work for 99% of the time, + but would fail when including language files. + + AN __autoload FUNCTION IS DECLARED AFTER OUR AUTOLOADER IS REGISTERED + spl_autoload_register() has the curious behavior of disabling + the existing __autoload() handler. Users need to explicitly + spl_autoload_register('__autoload'). Because we use SPL when it + is available, __autoload() will ALWAYS be disabled. If __autoload() + is declared before HTML Purifier is loaded, this is not a problem: + HTML Purifier will register the function for you. But if it is + declared afterwards, it will mysteriously not work. This + snippet of code (after your autoloader is defined) will fix it: + + spl_autoload_register('__autoload') + + Users should also be on guard if they use a version of PHP previous + to 5.1.2 without an autoloader--HTML Purifier will define __autoload() + for you, which can collide with an autoloader that was added by *you* + later. + + For better performance ---------------------- @@ -95,13 +143,51 @@ For better performance // If /path/to/library isn't already in your include path, uncomment // the below line: - // set_include_path( '/path/to/library' . PATH_SEPARATOR . get_include_path() ); + // require '/path/to/library/HTMLPurifier.path.php'; require 'HTMLPurifier.includes.php'; Optional components still need to be included--you'll know if you try to use a feature and you get a class doesn't exists error! The autoloader - can be used in conjunction with this approach to catch + can be used in conjunction with this approach to catch classes that are + missing. Simply add this afterwards: + + require 'HTMLPurifier.autoload.php'; + +Standalone version +------------------ + + HTML Purifier has a standalone distribution; you can also generate + a standalone file from the full version by running the script + maintenance/merge-library.php . The standalone version has the + benefit of having most of its code in one file, so parsing is much + faster and the library is easier to manage. + + If HTMLPurifier.standalone.php exists in the library directory, you + can use it like this: + + require '/path/to/HTMLPurifier.standalone.php'; + + This is equivalent to including HTMLPurifier.includes.php, but no + include path changes are necessary unless you want to use optional + classes. If you *do* want the optional classes, you need to add + HTML Purifier's source directory to your path. This will vary: + + * If you downloaded the htmlpurifier-x.y.z-standalone + distribution, you'll notice that the rest of the library is + missing; add standalone/ to your include path. + + * If you generated the standalone file yourself, the + standalone/ directory will also exist with the relevant + optional classes, but you can also set library/ to your path + and things will still work properly (in theory, a file in both + places should be equivalent). + + The autoloader can be added to the end to ensure the classes are + loaded when necessary; otherwise you can manually include them. + To use the autoloader, use this: + + require 'HTMLPurifier.autoload.php'; For advanced users ------------------ @@ -109,23 +195,24 @@ For advanced users HTMLPurifier.auto.php performs a number of operations that can be done individually. These are: - * Puts /path/to/library in the include path, - * Registers an autoload handler with HTMLPurifier.autoload.php - (depending on your version of PHP, this means using - spl_autoload_register or defining an __autoload function) + HTMLPurifier.path.php + Puts /path/to/library in the include path. For high performance, + this should be done in php.ini. + + HTMLPurifier.autoload.php + Registers our autoload handler HTMLPurifier_Bootstrap::autoload($class). You can do these operations by yourself--in fact, you must modify your own - autoload handler if you are using a version of PHP earlier than PHP 5.1.2. - HTML Purifier's autoload handler is HTMLPurifier_Bootstrap::autoload($class) - (so be sure to include HTMLPurifier/Bootstrap.php first.) + autoload handler if you are using a version of PHP earlier than PHP 5.1.2 + (See "Autoload compatibility" above). --------------------------------------------------------------------------- 4. Configuration HTML Purifier is designed to run out-of-the-box, but occasionally HTML -Purifier needs to be told what to do. If you answered no to any of these -questions, read on, otherwise, you can skip to the next section (or, if you're +Purifier needs to be told what to do. If you answer no to any of these +questions, read on; otherwise, you can skip to the next section (or, if you're into configuring things just for the heck of it, skip to 4.3). * Am I using UTF-8? diff --git a/TODO b/TODO index 0dd5f29f..c43b7a13 100644 --- a/TODO +++ b/TODO @@ -20,25 +20,22 @@ IMPORTANT of the two. This is related to standalone in tests/index.php. This should use semi-automated smoketests using PHPT style files (probably should be part of SimpleTest framework). These tests can be further extended to work for - many of our other smoketests. + many of our other smoketests. Follow the documentation! - Release candidate, because of the major changes - Move utility classes for ConfigSchema into HTML Purifier itself: they're that important DOCUMENTATION - - Document new methods of including the library (probably can go in INSTALL) - Document new ConfigSchema setup and format; dev-includes.txt is a base but we need it in HTML - Update French translation of README - Document which scripts need to be called when a change is made - - Document that standalone doesn't load autoload by default, so you need - to include HTMLPurifier.autoload.php after it IMPORTANT FEATURES - Get everything into configuration objects (filters, I'm looking at you) - Factor generate-schema-cache.php into a class, so that the maintenance script is as small as possible - - Factor out command line parser into its own class + - Factor out command line parser into its own class, and unit test it - Optimize ConfigSchema by only caching things necessary for runtime CONFIGDOC @@ -57,7 +54,6 @@ IF IT AIN'T BROKE... - Create "super" script which performs all regeneration actions - Remove all includes from unit tests, and remove blanks/ folder and generation - Simplify merge library script by removing recursion? (or other things) - - Update unit tests for ConfigSchema - Perhaps replace types with integer identifiers in ConfigSchema? (would be smaller, but not by much). diff --git a/extras/HTMLPurifierExtras.autoload.php b/extras/HTMLPurifierExtras.autoload.php index 8ee9f37a..3994f75d 100644 --- a/extras/HTMLPurifierExtras.autoload.php +++ b/extras/HTMLPurifierExtras.autoload.php @@ -1,11 +1,18 @@ purify($html, $config); diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index b03cf153..d0c14b97 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -18,15 +18,13 @@ * library directory; this is not auto-set. */ -// Treat this file specially, as it is detached from the rest of the library -require_once 'HTMLPurifier/Bootstrap.php'; - require 'HTMLPurifier.php'; require 'HTMLPurifier/AttrCollections.php'; require 'HTMLPurifier/AttrDef.php'; require 'HTMLPurifier/AttrTransform.php'; require 'HTMLPurifier/AttrTypes.php'; require 'HTMLPurifier/AttrValidator.php'; +require 'HTMLPurifier/Bootstrap.php'; require 'HTMLPurifier/Definition.php'; require 'HTMLPurifier/CSSDefinition.php'; require 'HTMLPurifier/ChildDef.php'; diff --git a/library/HTMLPurifier.path.php b/library/HTMLPurifier.path.php new file mode 100644 index 00000000..0368f1ae --- /dev/null +++ b/library/HTMLPurifier.path.php @@ -0,0 +1,9 @@ +config; // implementation is partially environment dependant, partially @@ -187,18 +193,17 @@ class HTMLPurifier * @param $prototype Optional prototype HTMLPurifier instance to * overload singleton with. */ - public static function &getInstance($prototype = null) { - static $htmlpurifier; - if (!$htmlpurifier || $prototype) { + public static function getInstance($prototype = null) { + if (!self::$instance || $prototype) { if ($prototype instanceof HTMLPurifier) { - $htmlpurifier = $prototype; + self::$instance = $prototype; } elseif ($prototype) { - $htmlpurifier = new HTMLPurifier($prototype); + self::$instance = new HTMLPurifier($prototype); } else { - $htmlpurifier = new HTMLPurifier(); + self::$instance = new HTMLPurifier(); } } - return $htmlpurifier; + return self::$instance; } } diff --git a/library/HTMLPurifier/Bootstrap.php b/library/HTMLPurifier/Bootstrap.php index 1d4e81d3..672ec5f4 100644 --- a/library/HTMLPurifier/Bootstrap.php +++ b/library/HTMLPurifier/Bootstrap.php @@ -20,9 +20,6 @@ if (!defined('PHP_EOL')) { } } -// :TODO: Might be slow -if (!class_exists('HTMLPurifier_Bootstrap', false)) { - /** * Bootstrap class that contains meta-functionality for HTML Purifier such as * the autoload function. @@ -59,6 +56,3 @@ class HTMLPurifier_Bootstrap } } - -} - diff --git a/maintenance/generate-includes.php b/maintenance/generate-includes.php index 2e0a2ae6..0fb4c17b 100644 --- a/maintenance/generate-includes.php +++ b/maintenance/generate-includes.php @@ -22,7 +22,6 @@ $exclude_dirs = array( $exclude_files = array( 'HTMLPurifier/Lexer/PEARSax3.php', 'HTMLPurifier/Lexer/PH5P.php', - 'HTMLPurifier/Bootstrap.php', ); // Determine what files need to be included: @@ -140,9 +139,6 @@ $php = "setOptions( 'packagedirectory' => realpath(dirname(__FILE__) . '/library'), 'filelistgenerator' => 'file', 'include' => array('*'), - 'dir_roles' => array('/' => 'php'), // hack to put .ser in the right place - 'ignore' => array('HTMLPurifier.auto.php', 'HTMLPurifier.standalone.php', 'standalone/'), + 'dir_roles' => array('/' => 'php'), // hack to put *.ser files in the right place + 'ignore' => array('HTMLPurifier.auto.php', 'HTMLPurifier.standalone.php', 'HTMLPurifier.path.php', 'standalone/'), ) ); diff --git a/test-settings.sample.php b/test-settings.sample.php index ed743e8c..a9d57008 100644 --- a/test-settings.sample.php +++ b/test-settings.sample.php @@ -9,7 +9,11 @@ set_time_limit(0); // Turning off output buffering will prevent mysterious errors from core dumps -@ob_end_flush(); +$data = @ob_get_clean(); +if ($data !== false && $data !== '') { + echo "Output buffer contains data [".urlencode($data)."]\n"; + exit; +} // Where is SimpleTest located? $simpletest_location = '/path/to/simpletest/'; @@ -20,6 +24,9 @@ $csstidy_location = '/path/to/csstidy/'; // For tests/multitest.php, which versions to test? $versions_to_test = array(); +// Stable PHP binary to use when invoking maintenance scripts. +$php = 'php'; + // For tests/multitest.php, what is the multi-version executable? It must // accept an extra parameter (version number) before all other arguments $phpv = 'phpv'; diff --git a/tests/HTMLPurifier/PHPT/loading/auto-includes.phpt b/tests/HTMLPurifier/PHPT/loading/auto-includes.phpt index 75a658b7..69327e31 100644 --- a/tests/HTMLPurifier/PHPT/loading/auto-includes.phpt +++ b/tests/HTMLPurifier/PHPT/loading/auto-includes.phpt @@ -2,7 +2,7 @@ HTMLPurifier.auto.php and HTMLPurifier.includes.php loading test --FILE-- purify('Salsa!'); +--EXPECT-- +Salsa! diff --git a/tests/HTMLPurifierTest.php b/tests/HTMLPurifierTest.php index 5462b67f..2fe565cd 100644 --- a/tests/HTMLPurifierTest.php +++ b/tests/HTMLPurifierTest.php @@ -134,8 +134,8 @@ alert(""); } function testGetInstance() { - $purifier =& HTMLPurifier::getInstance(); - $purifier2 =& HTMLPurifier::getInstance(); + $purifier = HTMLPurifier::getInstance(); + $purifier2 = HTMLPurifier::getInstance(); $this->assertReference($purifier, $purifier2); } diff --git a/tests/common.php b/tests/common.php index 95240974..6dbba861 100644 --- a/tests/common.php +++ b/tests/common.php @@ -9,10 +9,10 @@ if (!defined('HTMLPurifierTest')) { // is not allowed function __autoload($class) { if (!function_exists('spl_autoload_register')) { - if (HTMLPurifier_Bootstrap::autoload($class)) return true; - if (HTMLPurifierExtras::autoload($class)) return true; + if (class_exists('HTMLPurifier_Bootstrap', false) && HTMLPurifier_Bootstrap::autoload($class)) return true; + if (class_exists('HTMLPurifierExtras', false) && HTMLPurifierExtras::autoload($class)) return true; } - require_once str_replace('_', '/', $class) . '.php'; + require str_replace('_', '/', $class) . '.php'; return true; } if (function_exists('spl_autoload_register')) { @@ -36,16 +36,15 @@ if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php if (file_exists('../test-settings.php')) include '../test-settings.php'; // load SimpleTest -require_once $simpletest_location . 'unit_tester.php'; -require_once $simpletest_location . 'reporter.php'; -require_once $simpletest_location . 'mock_objects.php'; -require_once $simpletest_location . 'xml.php'; -require_once $simpletest_location . 'remote.php'; +require $simpletest_location . 'unit_tester.php'; +require $simpletest_location . 'reporter.php'; +require $simpletest_location . 'mock_objects.php'; +require $simpletest_location . 'xml.php'; +require $simpletest_location . 'remote.php'; // load CSS Tidy if ($csstidy_location !== false) { - require_once $csstidy_location . 'class.csstidy.php'; - require_once $csstidy_location . 'class.csstidy_print.php'; + require $csstidy_location . 'class.csstidy.php'; } // load PEAR to include path @@ -58,16 +57,13 @@ if ( is_string($GLOBALS['HTMLPurifierTest']['PEAR']) ) { // after external libraries are loaded, turn on compile time errors error_reporting(E_ALL | E_STRICT); -// initialize HTML Purifier -require_once '../library/HTMLPurifier.auto.php'; - -// initialize alternative classes -require_once '../extras/HTMLPurifierExtras.auto.php'; +// initialize extra HTML Purifier libraries +require '../extras/HTMLPurifierExtras.auto.php'; // load SimpleTest addon functions -require_once 'generate_mock_once.func.php'; -require_once 'path2class.func.php'; -require_once 'tally_errors.func.php'; // compat +require 'generate_mock_once.func.php'; +require 'path2class.func.php'; +require 'tally_errors.func.php'; // compat /** * Arguments parser, is cli and web agnostic. diff --git a/tests/index.php b/tests/index.php index c2c136fa..67ddce8d 100755 --- a/tests/index.php +++ b/tests/index.php @@ -17,7 +17,7 @@ define('HTMLPurifierTest', 1); define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas chdir(dirname(__FILE__)); -require_once 'common.php'; +require 'common.php'; $AC = array(); // parameters $AC['flush'] = false; @@ -60,11 +60,13 @@ if ($AC['flush']) shell_exec($AC['php'] . ' ../maintenance/flush-definition-cach if ($AC['standalone']) { // :TODO: This line is pretty important; please document! set_include_path(realpath('../library/standalone') . PATH_SEPARATOR . realpath('blanks') . PATH_SEPARATOR . get_include_path()); - require_once '../library/HTMLPurifier.standalone.php'; + require '../library/HTMLPurifier.standalone.php'; } else { - require_once 'HTMLPurifier.includes.php'; + require '../library/HTMLPurifier.path.php'; + require 'HTMLPurifier.includes.php'; + require '../library/HTMLPurifier.autoload.php'; } -require_once 'HTMLPurifier/Harness.php'; +require 'HTMLPurifier/Harness.php'; // setup special DefinitionCacheFactory decorator $factory =& HTMLPurifier_DefinitionCacheFactory::instance();