0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-08 23:11:52 +00:00

Implement %AutoFormat.RemoveEmpty.RemoveNbsp, by popular demand.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2009-04-09 00:53:19 -04:00
parent 398a02039e
commit e3c2063f69
8 changed files with 67 additions and 4 deletions

4
NEWS
View File

@ -20,6 +20,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on ! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
the name attribute when set. Use with care. Thanks Ian Cook for the name attribute when set. Use with care. Thanks Ian Cook for
sponsoring. sponsoring.
! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
tags that contain non-breaking spaces as well other whitespace. You
can also modify which tags should have &nbsp; maintained with
%AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
3.3.0, released 2009-02-16 3.3.0, released 2009-02-16
! Implement CSS property 'overflow' when %CSS.AllowTricky is true. ! Implement CSS property 'overflow' when %CSS.AllowTricky is true.

View File

@ -0,0 +1,11 @@
AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions
TYPE: lookup
VERSION: 4.0.0
DEFAULT: array('td' => true, 'th' => true)
--DESCRIPTION--
<p>
When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
are enabled, this directive defines what HTML elements should not be
removede if they have only a non-breaking space in them.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,15 @@
AutoFormat.RemoveEmpty.RemoveNbsp
TYPE: bool
VERSION: 4.0.0
DEFAULT: false
--DESCRIPTION--
<p>
When enabled, HTML Purifier will treat any elements that contain only
non-breaking spaces as well as regular whitespace as empty, and remove
them when %AutoForamt.RemoveEmpty is enabled.
</p>
<p>
See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
that don't have this behavior applied to them.
</p>
--# vim: et sw=4 sts=4

View File

@ -31,7 +31,8 @@ DEFAULT: false
</p> </p>
<p> <p>
Elements that contain only whitespace will be treated as empty. Non-breaking Elements that contain only whitespace will be treated as empty. Non-breaking
spaces, however, do not count as whitespace. spaces, however, do not count as whitespace. See
%AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
</p> </p>
<p> <p>
This algorithm is not perfect; you may still notice some empty tags, This algorithm is not perfect; you may still notice some empty tags,
@ -39,7 +40,7 @@ DEFAULT: false
because they were not permitted in that context, or tags that, after because they were not permitted in that context, or tags that, after
being auto-closed by another tag, where empty. This is for safety reasons being auto-closed by another tag, where empty. This is for safety reasons
to prevent clever code from breaking validation. The general rule of thumb: to prevent clever code from breaking validation. The general rule of thumb:
if a tag looked empty on the way end, it will get removed; if HTML Purifier if a tag looked empty on the way in, it will get removed; if HTML Purifier
made it empty, it will stay. made it empty, it will stay.
</p> </p>
--# vim: et sw=4 sts=4 --# vim: et sw=4 sts=4

View File

@ -3,12 +3,14 @@
class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
{ {
private $context, $config; private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
public function prepare($config, $context) { public function prepare($config, $context) {
parent::prepare($config, $context); parent::prepare($config, $context);
$this->config = $config; $this->config = $config;
$this->context = $context; $this->context = $context;
$this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
$this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
$this->attrValidator = new HTMLPurifier_AttrValidator(); $this->attrValidator = new HTMLPurifier_AttrValidator();
} }
@ -17,7 +19,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
$next = false; $next = false;
for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
$next = $this->inputTokens[$i]; $next = $this->inputTokens[$i];
if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue; if ($next instanceof HTMLPurifier_Token_Text) {
if ($next->is_whitespace) continue;
if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
$plain = str_replace("\xC2\xA0", "", $next->data);
$isWsOrNbsp = $plain === '' || ctype_space($plain);
if ($isWsOrNbsp) continue;
}
}
break; break;
} }
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {

View File

@ -72,6 +72,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$custom_injectors = $injectors['Custom']; $custom_injectors = $injectors['Custom'];
unset($injectors['Custom']); // special case unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) { foreach ($injectors as $injector => $b) {
// XXX: Fix with a legitimate lookup table of enabled filters
if (strpos($injector, '.') !== false) continue;
$injector = "HTMLPurifier_Injector_$injector"; $injector = "HTMLPurifier_Injector_$injector";
if (!$b) continue; if (!$b) continue;
$this->injectors[] = new $injector; $this->injectors[] = new $injector;

View File

@ -54,6 +54,27 @@ class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness
$this->assertResult('<b> <i> <u> </u> </i> </b>', ''); $this->assertResult('<b> <i> <u> </u> </i> </b>', '');
} }
function testRemoveNbsp() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<b>&nbsp;</b>', '');
}
function testRemoveNbspMix() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<b>&nbsp; &nbsp;</b>', '');
}
function testDontRemoveNbsp() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<td>&nbsp;</b>', "<td>\xC2\xA0</td>");
}
function testRemoveNbspExceptionsSpecial() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions', 'b');
$this->assertResult('<b>&nbsp;</b>', "<b>\xC2\xA0</b>");
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4