mirror of
https://github.com/renbaoshuo/S2OJ.git
synced 2024-12-25 14:31:53 +00:00
96d4a3ecf7
Due to historical reasons, the code is in subfolder "1". With SVN removal, we place the code back and remove the annoying "1" folder.
460 lines
15 KiB
PHP
460 lines
15 KiB
PHP
<?php
|
|
|
|
class HTMLPurifier_HTMLModuleManager
|
|
{
|
|
|
|
/**
|
|
* @type HTMLPurifier_DoctypeRegistry
|
|
*/
|
|
public $doctypes;
|
|
|
|
/**
|
|
* Instance of current doctype.
|
|
* @type string
|
|
*/
|
|
public $doctype;
|
|
|
|
/**
|
|
* @type HTMLPurifier_AttrTypes
|
|
*/
|
|
public $attrTypes;
|
|
|
|
/**
|
|
* Active instances of modules for the specified doctype are
|
|
* indexed, by name, in this array.
|
|
* @type HTMLPurifier_HTMLModule[]
|
|
*/
|
|
public $modules = array();
|
|
|
|
/**
|
|
* Array of recognized HTMLPurifier_HTMLModule instances,
|
|
* indexed by module's class name. This array is usually lazy loaded, but a
|
|
* user can overload a module by pre-emptively registering it.
|
|
* @type HTMLPurifier_HTMLModule[]
|
|
*/
|
|
public $registeredModules = array();
|
|
|
|
/**
|
|
* List of extra modules that were added by the user
|
|
* using addModule(). These get unconditionally merged into the current doctype, whatever
|
|
* it may be.
|
|
* @type HTMLPurifier_HTMLModule[]
|
|
*/
|
|
public $userModules = array();
|
|
|
|
/**
|
|
* Associative array of element name to list of modules that have
|
|
* definitions for the element; this array is dynamically filled.
|
|
* @type array
|
|
*/
|
|
public $elementLookup = array();
|
|
|
|
/**
|
|
* List of prefixes we should use for registering small names.
|
|
* @type array
|
|
*/
|
|
public $prefixes = array('HTMLPurifier_HTMLModule_');
|
|
|
|
/**
|
|
* @type HTMLPurifier_ContentSets
|
|
*/
|
|
public $contentSets;
|
|
|
|
/**
|
|
* @type HTMLPurifier_AttrCollections
|
|
*/
|
|
public $attrCollections;
|
|
|
|
/**
|
|
* If set to true, unsafe elements and attributes will be allowed.
|
|
* @type bool
|
|
*/
|
|
public $trusted = false;
|
|
|
|
public function __construct()
|
|
{
|
|
// editable internal objects
|
|
$this->attrTypes = new HTMLPurifier_AttrTypes();
|
|
$this->doctypes = new HTMLPurifier_DoctypeRegistry();
|
|
|
|
// setup basic modules
|
|
$common = array(
|
|
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
|
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
|
'StyleAttribute',
|
|
// Unsafe:
|
|
'Scripting', 'Object', 'Forms',
|
|
// Sorta legacy, but present in strict:
|
|
'Name',
|
|
);
|
|
$transitional = array('Legacy', 'Target', 'Iframe');
|
|
$xml = array('XMLCommonAttributes');
|
|
$non_xml = array('NonXMLCommonAttributes');
|
|
|
|
// setup basic doctypes
|
|
$this->doctypes->register(
|
|
'HTML 4.01 Transitional',
|
|
false,
|
|
array_merge($common, $transitional, $non_xml),
|
|
array('Tidy_Transitional', 'Tidy_Proprietary'),
|
|
array(),
|
|
'-//W3C//DTD HTML 4.01 Transitional//EN',
|
|
'http://www.w3.org/TR/html4/loose.dtd'
|
|
);
|
|
|
|
$this->doctypes->register(
|
|
'HTML 4.01 Strict',
|
|
false,
|
|
array_merge($common, $non_xml),
|
|
array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
|
|
array(),
|
|
'-//W3C//DTD HTML 4.01//EN',
|
|
'http://www.w3.org/TR/html4/strict.dtd'
|
|
);
|
|
|
|
$this->doctypes->register(
|
|
'XHTML 1.0 Transitional',
|
|
true,
|
|
array_merge($common, $transitional, $xml, $non_xml),
|
|
array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
|
|
array(),
|
|
'-//W3C//DTD XHTML 1.0 Transitional//EN',
|
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
|
|
);
|
|
|
|
$this->doctypes->register(
|
|
'XHTML 1.0 Strict',
|
|
true,
|
|
array_merge($common, $xml, $non_xml),
|
|
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
|
|
array(),
|
|
'-//W3C//DTD XHTML 1.0 Strict//EN',
|
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
|
|
);
|
|
|
|
$this->doctypes->register(
|
|
'XHTML 1.1',
|
|
true,
|
|
// Iframe is a real XHTML 1.1 module, despite being
|
|
// "transitional"!
|
|
array_merge($common, $xml, array('Ruby', 'Iframe')),
|
|
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
|
|
array(),
|
|
'-//W3C//DTD XHTML 1.1//EN',
|
|
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
|
|
);
|
|
|
|
}
|
|
|
|
/**
|
|
* Registers a module to the recognized module list, useful for
|
|
* overloading pre-existing modules.
|
|
* @param $module Mixed: string module name, with or without
|
|
* HTMLPurifier_HTMLModule prefix, or instance of
|
|
* subclass of HTMLPurifier_HTMLModule.
|
|
* @param $overload Boolean whether or not to overload previous modules.
|
|
* If this is not set, and you do overload a module,
|
|
* HTML Purifier will complain with a warning.
|
|
* @note This function will not call autoload, you must instantiate
|
|
* (and thus invoke) autoload outside the method.
|
|
* @note If a string is passed as a module name, different variants
|
|
* will be tested in this order:
|
|
* - Check for HTMLPurifier_HTMLModule_$name
|
|
* - Check all prefixes with $name in order they were added
|
|
* - Check for literal object name
|
|
* - Throw fatal error
|
|
* If your object name collides with an internal class, specify
|
|
* your module manually. All modules must have been included
|
|
* externally: registerModule will not perform inclusions for you!
|
|
*/
|
|
public function registerModule($module, $overload = false)
|
|
{
|
|
if (is_string($module)) {
|
|
// attempt to load the module
|
|
$original_module = $module;
|
|
$ok = false;
|
|
foreach ($this->prefixes as $prefix) {
|
|
$module = $prefix . $original_module;
|
|
if (class_exists($module)) {
|
|
$ok = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!$ok) {
|
|
$module = $original_module;
|
|
if (!class_exists($module)) {
|
|
trigger_error(
|
|
$original_module . ' module does not exist',
|
|
E_USER_ERROR
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
$module = new $module();
|
|
}
|
|
if (empty($module->name)) {
|
|
trigger_error('Module instance of ' . get_class($module) . ' must have name');
|
|
return;
|
|
}
|
|
if (!$overload && isset($this->registeredModules[$module->name])) {
|
|
trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
|
|
}
|
|
$this->registeredModules[$module->name] = $module;
|
|
}
|
|
|
|
/**
|
|
* Adds a module to the current doctype by first registering it,
|
|
* and then tacking it on to the active doctype
|
|
*/
|
|
public function addModule($module)
|
|
{
|
|
$this->registerModule($module);
|
|
if (is_object($module)) {
|
|
$module = $module->name;
|
|
}
|
|
$this->userModules[] = $module;
|
|
}
|
|
|
|
/**
|
|
* Adds a class prefix that registerModule() will use to resolve a
|
|
* string name to a concrete class
|
|
*/
|
|
public function addPrefix($prefix)
|
|
{
|
|
$this->prefixes[] = $prefix;
|
|
}
|
|
|
|
/**
|
|
* Performs processing on modules, after being called you may
|
|
* use getElement() and getElements()
|
|
* @param HTMLPurifier_Config $config
|
|
*/
|
|
public function setup($config)
|
|
{
|
|
$this->trusted = $config->get('HTML.Trusted');
|
|
|
|
// generate
|
|
$this->doctype = $this->doctypes->make($config);
|
|
$modules = $this->doctype->modules;
|
|
|
|
// take out the default modules that aren't allowed
|
|
$lookup = $config->get('HTML.AllowedModules');
|
|
$special_cases = $config->get('HTML.CoreModules');
|
|
|
|
if (is_array($lookup)) {
|
|
foreach ($modules as $k => $m) {
|
|
if (isset($special_cases[$m])) {
|
|
continue;
|
|
}
|
|
if (!isset($lookup[$m])) {
|
|
unset($modules[$k]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// custom modules
|
|
if ($config->get('HTML.Proprietary')) {
|
|
$modules[] = 'Proprietary';
|
|
}
|
|
if ($config->get('HTML.SafeObject')) {
|
|
$modules[] = 'SafeObject';
|
|
}
|
|
if ($config->get('HTML.SafeEmbed')) {
|
|
$modules[] = 'SafeEmbed';
|
|
}
|
|
if ($config->get('HTML.SafeScripting') !== array()) {
|
|
$modules[] = 'SafeScripting';
|
|
}
|
|
if ($config->get('HTML.Nofollow')) {
|
|
$modules[] = 'Nofollow';
|
|
}
|
|
if ($config->get('HTML.TargetBlank')) {
|
|
$modules[] = 'TargetBlank';
|
|
}
|
|
|
|
// merge in custom modules
|
|
$modules = array_merge($modules, $this->userModules);
|
|
|
|
foreach ($modules as $module) {
|
|
$this->processModule($module);
|
|
$this->modules[$module]->setup($config);
|
|
}
|
|
|
|
foreach ($this->doctype->tidyModules as $module) {
|
|
$this->processModule($module);
|
|
$this->modules[$module]->setup($config);
|
|
}
|
|
|
|
// prepare any injectors
|
|
foreach ($this->modules as $module) {
|
|
$n = array();
|
|
foreach ($module->info_injector as $injector) {
|
|
if (!is_object($injector)) {
|
|
$class = "HTMLPurifier_Injector_$injector";
|
|
$injector = new $class;
|
|
}
|
|
$n[$injector->name] = $injector;
|
|
}
|
|
$module->info_injector = $n;
|
|
}
|
|
|
|
// setup lookup table based on all valid modules
|
|
foreach ($this->modules as $module) {
|
|
foreach ($module->info as $name => $def) {
|
|
if (!isset($this->elementLookup[$name])) {
|
|
$this->elementLookup[$name] = array();
|
|
}
|
|
$this->elementLookup[$name][] = $module->name;
|
|
}
|
|
}
|
|
|
|
// note the different choice
|
|
$this->contentSets = new HTMLPurifier_ContentSets(
|
|
// content set assembly deals with all possible modules,
|
|
// not just ones deemed to be "safe"
|
|
$this->modules
|
|
);
|
|
$this->attrCollections = new HTMLPurifier_AttrCollections(
|
|
$this->attrTypes,
|
|
// there is no way to directly disable a global attribute,
|
|
// but using AllowedAttributes or simply not including
|
|
// the module in your custom doctype should be sufficient
|
|
$this->modules
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Takes a module and adds it to the active module collection,
|
|
* registering it if necessary.
|
|
*/
|
|
public function processModule($module)
|
|
{
|
|
if (!isset($this->registeredModules[$module]) || is_object($module)) {
|
|
$this->registerModule($module);
|
|
}
|
|
$this->modules[$module] = $this->registeredModules[$module];
|
|
}
|
|
|
|
/**
|
|
* Retrieves merged element definitions.
|
|
* @return Array of HTMLPurifier_ElementDef
|
|
*/
|
|
public function getElements()
|
|
{
|
|
$elements = array();
|
|
foreach ($this->modules as $module) {
|
|
if (!$this->trusted && !$module->safe) {
|
|
continue;
|
|
}
|
|
foreach ($module->info as $name => $v) {
|
|
if (isset($elements[$name])) {
|
|
continue;
|
|
}
|
|
$elements[$name] = $this->getElement($name);
|
|
}
|
|
}
|
|
|
|
// remove dud elements, this happens when an element that
|
|
// appeared to be safe actually wasn't
|
|
foreach ($elements as $n => $v) {
|
|
if ($v === false) {
|
|
unset($elements[$n]);
|
|
}
|
|
}
|
|
|
|
return $elements;
|
|
|
|
}
|
|
|
|
/**
|
|
* Retrieves a single merged element definition
|
|
* @param string $name Name of element
|
|
* @param bool $trusted Boolean trusted overriding parameter: set to true
|
|
* if you want the full version of an element
|
|
* @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef
|
|
* @note You may notice that modules are getting iterated over twice (once
|
|
* in getElements() and once here). This
|
|
* is because
|
|
*/
|
|
public function getElement($name, $trusted = null)
|
|
{
|
|
if (!isset($this->elementLookup[$name])) {
|
|
return false;
|
|
}
|
|
|
|
// setup global state variables
|
|
$def = false;
|
|
if ($trusted === null) {
|
|
$trusted = $this->trusted;
|
|
}
|
|
|
|
// iterate through each module that has registered itself to this
|
|
// element
|
|
foreach ($this->elementLookup[$name] as $module_name) {
|
|
$module = $this->modules[$module_name];
|
|
|
|
// refuse to create/merge from a module that is deemed unsafe--
|
|
// pretend the module doesn't exist--when trusted mode is not on.
|
|
if (!$trusted && !$module->safe) {
|
|
continue;
|
|
}
|
|
|
|
// clone is used because, ideally speaking, the original
|
|
// definition should not be modified. Usually, this will
|
|
// make no difference, but for consistency's sake
|
|
$new_def = clone $module->info[$name];
|
|
|
|
if (!$def && $new_def->standalone) {
|
|
$def = $new_def;
|
|
} elseif ($def) {
|
|
// This will occur even if $new_def is standalone. In practice,
|
|
// this will usually result in a full replacement.
|
|
$def->mergeIn($new_def);
|
|
} else {
|
|
// :TODO:
|
|
// non-standalone definitions that don't have a standalone
|
|
// to merge into could be deferred to the end
|
|
// HOWEVER, it is perfectly valid for a non-standalone
|
|
// definition to lack a standalone definition, even
|
|
// after all processing: this allows us to safely
|
|
// specify extra attributes for elements that may not be
|
|
// enabled all in one place. In particular, this might
|
|
// be the case for trusted elements. WARNING: care must
|
|
// be taken that the /extra/ definitions are all safe.
|
|
continue;
|
|
}
|
|
|
|
// attribute value expansions
|
|
$this->attrCollections->performInclusions($def->attr);
|
|
$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
|
|
|
|
// descendants_are_inline, for ChildDef_Chameleon
|
|
if (is_string($def->content_model) &&
|
|
strpos($def->content_model, 'Inline') !== false) {
|
|
if ($name != 'del' && $name != 'ins') {
|
|
// this is for you, ins/del
|
|
$def->descendants_are_inline = true;
|
|
}
|
|
}
|
|
|
|
$this->contentSets->generateChildDef($def, $module);
|
|
}
|
|
|
|
// This can occur if there is a blank definition, but no base to
|
|
// mix it in with
|
|
if (!$def) {
|
|
return false;
|
|
}
|
|
|
|
// add information on required attributes
|
|
foreach ($def->attr as $attr_name => $attr_def) {
|
|
if ($attr_def->required) {
|
|
$def->required_attr[] = $attr_name;
|
|
}
|
|
}
|
|
return $def;
|
|
}
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|