0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-12-22 08:21:52 +00:00

Add %Core.DisableExcludes directive

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang 2013-02-17 15:47:38 -08:00
parent 344e0640b6
commit 631021733b
6 changed files with 62 additions and 16 deletions

4
NEWS
View File

@ -20,6 +20,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Permit underscores in font families ! Permit underscores in font families
! Support for page-break-* CSS3 properties when proprietary properties ! Support for page-break-* CSS3 properties when proprietary properties
are enabled. are enabled.
! New directive %Core.EnableExcludes; can be set to 'false' to turn off
SGML excludes checking. If HTML Purifier is removing too much text
and you don't care about full standards compliance, try setting this to
'false'.
- Use prepend for SPL autoloading on PHP 5.3 and later. - Use prepend for SPL autoloading on PHP 5.3 and later.
- Fix bug with nofollow transform when pre-existing rel exists. - Fix bug with nofollow transform when pre-existing rel exists.
- Fix bug where background:url() always gets lower-cased - Fix bug where background:url() always gets lower-cased

View File

@ -24,32 +24,32 @@
</directive> </directive>
<directive id="CSS.Proprietary"> <directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>214</line> <line>215</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowTricky"> <directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>218</line> <line>219</line>
</file> </file>
</directive> </directive>
<directive id="CSS.Trusted"> <directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>222</line> <line>223</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowImportant"> <directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>226</line> <line>227</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowedProperties"> <directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>296</line> <line>302</line>
</file> </file>
</directive> </directive>
<directive id="CSS.ForbiddenProperties"> <directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>310</line> <line>316</line>
</file> </file>
</directive> </directive>
<directive id="Cache.DefinitionImpl"> <directive id="Cache.DefinitionImpl">
@ -80,18 +80,18 @@
<directive id="Core.Encoding"> <directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>337</line> <line>337</line>
<line>367</line> <line>372</line>
</file> </file>
</directive> </directive>
<directive id="Test.ForceNoIconv"> <directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>341</line> <line>341</line>
<line>374</line> <line>379</line>
</file> </file>
</directive> </directive>
<directive id="Core.EscapeNonASCIICharacters"> <directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>368</line> <line>373</line>
</file> </file>
</directive> </directive>
<directive id="Output.CommentScriptContents"> <directive id="Output.CommentScriptContents">
@ -419,17 +419,17 @@
</directive> </directive>
<directive id="Filter.ExtractStyleBlocks.TidyImpl"> <directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>54</line> <line>55</line>
</file> </file>
</directive> </directive>
<directive id="Filter.ExtractStyleBlocks.Scope"> <directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>78</line> <line>79</line>
</file> </file>
</directive> </directive>
<directive id="Filter.ExtractStyleBlocks.Escaping"> <directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>276</line> <line>277</line>
</file> </file>
</directive> </directive>
<directive id="HTML.SafeIframe"> <directive id="HTML.SafeIframe">
@ -473,12 +473,12 @@
</directive> </directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp"> <directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
<file name="HTMLPurifier/Injector/RemoveEmpty.php"> <file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>12</line> <line>15</line>
</file> </file>
</directive> </directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions"> <directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
<file name="HTMLPurifier/Injector/RemoveEmpty.php"> <file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>13</line> <line>16</line>
</file> </file>
</directive> </directive>
<directive id="Core.AggressivelyFixLt"> <directive id="Core.AggressivelyFixLt">
@ -491,6 +491,11 @@
<line>70</line> <line>70</line>
</file> </file>
</directive> </directive>
<directive id="Core.DisableExcludes">
<file name="HTMLPurifier/Strategy/FixNesting.php">
<line>57</line>
</file>
</directive>
<directive id="Core.EscapeInvalidTags"> <directive id="Core.EscapeInvalidTags">
<file name="HTMLPurifier/Strategy/MakeWellFormed.php"> <file name="HTMLPurifier/Strategy/MakeWellFormed.php">
<line>53</line> <line>53</line>

View File

@ -0,0 +1,14 @@
Core.DisableExcludes
TYPE: bool
DEFAULT: false
VERSION: 4.5.0
--DESCRIPTION--
<p>
This directive disables SGML-style exclusions, e.g. the exclusion of
<code>&lt;object&gt;</code> in any descendant of a
<code>&lt;pre&gt;</code> tag. Disabling excludes will allow some
invalid documents to pass through HTML Purifier, but HTML Purifier
will also be less likely to accidentally remove large documents during
processing.
</p>
--# vim: et sw=4 sts=4

View File

@ -26,6 +26,22 @@
* translated into text depends on the child definitions. * translated into text depends on the child definitions.
* *
* @todo Enable nodes to be bubbled out of the structure. * @todo Enable nodes to be bubbled out of the structure.
*
* @warning This algorithm (though it may be hard to see) proceeds from
* a top-down fashion. Thus, parents are processed before
* children. This is easy to implement and has a nice effiency
* benefit, in that if a node is removed, we never waste any
* time processing it, but it also means that if a child
* changes in a non-encapsulated way (e.g. it is removed), we
* need to go back and reprocess the parent to see if those
* changes resulted in problems for the parent. See
* [BACKTRACK] for an example of this. In the current
* implementation, this backtracking can only be triggered when
* a node is removed and if that node was the sole node, the
* parent would need to be removed. As such, it is easy to see
* that backtracking only incurs constant overhead. If more
* sophisticated backtracking is implemented, care must be
* taken to avoid nontermination or exponential blowup.
*/ */
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// get a copy of the HTML definition // get a copy of the HTML definition
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
$excludes_enabled = !$config->get('Core.DisableExcludes');
// insert implicit "parent" node, will be removed at end. // insert implicit "parent" node, will be removed at end.
// DEFINITION CALL // DEFINITION CALL
$parent_name = $definition->info_parent; $parent_name = $definition->info_parent;
@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// parent exclusions. The array should not be very large, two // parent exclusions. The array should not be very large, two
// elements at most. // elements at most.
$excluded = false; $excluded = false;
if (!empty($exclude_stack)) { if (!empty($exclude_stack) && $excludes_enabled) {
foreach ($exclude_stack as $lookup) { foreach ($exclude_stack as $lookup) {
if (isset($lookup[$tokens[$i]->name])) { if (isset($lookup[$tokens[$i]->name])) {
$excluded = true; $excluded = true;
@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// our current implementation claims that that case would // our current implementation claims that that case would
// not allow empty, even if it did // not allow empty, even if it did
if (!$parent_def->child->allow_empty) { if (!$parent_def->child->allow_empty) {
// we need to do a double-check // we need to do a double-check [BACKTRACK]
$i = $parent_index; $i = $parent_index;
array_pop($stack); array_pop($stack);
} }

View File

@ -139,6 +139,11 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('<blockquote>text</blockquote>', '<blockquote><p>text</p></blockquote>'); $this->assertResult('<blockquote>text</blockquote>', '<blockquote><p>text</p></blockquote>');
} }
function testDisabledExcludes() {
$this->config->set('Core.DisableExcludes', true);
$this->assertResult('<pre><font><font></font></font></pre>');
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4