0
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2024-09-18 18:25:18 +00:00

Implement %AutoFormat.RemoveEmpty.RemoveNbsp, by popular demand.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang 2009-04-09 00:53:19 -04:00
parent 398a02039e
commit e3c2063f69
8 changed files with 67 additions and 4 deletions

4
NEWS
View File

@ -20,6 +20,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
the name attribute when set. Use with care. Thanks Ian Cook for
sponsoring.
! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
tags that contain non-breaking spaces as well other whitespace. You
can also modify which tags should have &nbsp; maintained with
%AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
3.3.0, released 2009-02-16
! Implement CSS property 'overflow' when %CSS.AllowTricky is true.

View File

@ -0,0 +1,11 @@
AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions
TYPE: lookup
VERSION: 4.0.0
DEFAULT: array('td' => true, 'th' => true)
--DESCRIPTION--
<p>
When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
are enabled, this directive defines what HTML elements should not be
removede if they have only a non-breaking space in them.
</p>
--# vim: et sw=4 sts=4

View File

@ -0,0 +1,15 @@
AutoFormat.RemoveEmpty.RemoveNbsp
TYPE: bool
VERSION: 4.0.0
DEFAULT: false
--DESCRIPTION--
<p>
When enabled, HTML Purifier will treat any elements that contain only
non-breaking spaces as well as regular whitespace as empty, and remove
them when %AutoForamt.RemoveEmpty is enabled.
</p>
<p>
See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
that don't have this behavior applied to them.
</p>
--# vim: et sw=4 sts=4

View File

@ -31,7 +31,8 @@ DEFAULT: false
</p>
<p>
Elements that contain only whitespace will be treated as empty. Non-breaking
spaces, however, do not count as whitespace.
spaces, however, do not count as whitespace. See
%AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
</p>
<p>
This algorithm is not perfect; you may still notice some empty tags,
@ -39,7 +40,7 @@ DEFAULT: false
because they were not permitted in that context, or tags that, after
being auto-closed by another tag, where empty. This is for safety reasons
to prevent clever code from breaking validation. The general rule of thumb:
if a tag looked empty on the way end, it will get removed; if HTML Purifier
if a tag looked empty on the way in, it will get removed; if HTML Purifier
made it empty, it will stay.
</p>
--# vim: et sw=4 sts=4

View File

@ -3,12 +3,14 @@
class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
{
private $context, $config;
private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
public function prepare($config, $context) {
parent::prepare($config, $context);
$this->config = $config;
$this->context = $context;
$this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp');
$this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions');
$this->attrValidator = new HTMLPurifier_AttrValidator();
}
@ -17,7 +19,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
$next = false;
for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) {
$next = $this->inputTokens[$i];
if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue;
if ($next instanceof HTMLPurifier_Token_Text) {
if ($next->is_whitespace) continue;
if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) {
$plain = str_replace("\xC2\xA0", "", $next->data);
$isWsOrNbsp = $plain === '' || ctype_space($plain);
if ($isWsOrNbsp) continue;
}
}
break;
}
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {

View File

@ -72,6 +72,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$custom_injectors = $injectors['Custom'];
unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) {
// XXX: Fix with a legitimate lookup table of enabled filters
if (strpos($injector, '.') !== false) continue;
$injector = "HTMLPurifier_Injector_$injector";
if (!$b) continue;
$this->injectors[] = new $injector;

View File

@ -54,6 +54,27 @@ class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness
$this->assertResult('<b> <i> <u> </u> </i> </b>', '');
}
function testRemoveNbsp() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<b>&nbsp;</b>', '');
}
function testRemoveNbspMix() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<b>&nbsp; &nbsp;</b>', '');
}
function testDontRemoveNbsp() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->assertResult('<td>&nbsp;</b>', "<td>\xC2\xA0</td>");
}
function testRemoveNbspExceptionsSpecial() {
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true);
$this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions', 'b');
$this->assertResult('<b>&nbsp;</b>', "<b>\xC2\xA0</b>");
}
}
// vim: et sw=4 sts=4