From e3c2063f69ff156bd05431f8179306afef15ed31 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
+ When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
+ are enabled, this directive defines what HTML elements should not be
+ removede if they have only a non-breaking space in them.
+
+ When enabled, HTML Purifier will treat any elements that contain only
+ non-breaking spaces as well as regular whitespace as empty, and remove
+ them when %AutoForamt.RemoveEmpty is enabled.
+
+ See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
+ that don't have this behavior applied to them.
+
Elements that contain only whitespace will be treated as empty. Non-breaking - spaces, however, do not count as whitespace. + spaces, however, do not count as whitespace. See + %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
This algorithm is not perfect; you may still notice some empty tags, @@ -39,7 +40,7 @@ DEFAULT: false because they were not permitted in that context, or tags that, after being auto-closed by another tag, where empty. This is for safety reasons to prevent clever code from breaking validation. The general rule of thumb: - if a tag looked empty on the way end, it will get removed; if HTML Purifier + if a tag looked empty on the way in, it will get removed; if HTML Purifier made it empty, it will stay.
--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveEmpty.php b/library/HTMLPurifier/Injector/RemoveEmpty.php index d85ca97d..638bfca0 100644 --- a/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -3,12 +3,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector { - private $context, $config; + private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; public function prepare($config, $context) { parent::prepare($config, $context); $this->config = $config; $this->context = $context; + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); $this->attrValidator = new HTMLPurifier_AttrValidator(); } @@ -17,7 +19,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $next = false; for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { $next = $this->inputTokens[$i]; - if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue; + if ($next instanceof HTMLPurifier_Token_Text) { + if ($next->is_whitespace) continue; + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { + $plain = str_replace("\xC2\xA0", "", $next->data); + $isWsOrNbsp = $plain === '' || ctype_space($plain); + if ($isWsOrNbsp) continue; + } + } break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index 82642946..feb0c32b 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -72,6 +72,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $custom_injectors = $injectors['Custom']; unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { + // XXX: Fix with a legitimate lookup table of enabled filters + if (strpos($injector, '.') !== false) continue; $injector = "HTMLPurifier_Injector_$injector"; if (!$b) continue; $this->injectors[] = new $injector; diff --git a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php index f2152976..34dbc951 100644 --- a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php +++ b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php @@ -54,6 +54,27 @@ class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness $this->assertResult(' ', ''); } + function testRemoveNbsp() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult(' ', ''); + } + + function testRemoveNbspMix() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult(' ', ''); + } + + function testDontRemoveNbsp() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult('