From e3c2063f69ff156bd05431f8179306afef15ed31 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 9 Apr 2009 00:53:19 -0400 Subject: [PATCH] Implement %AutoFormat.RemoveEmpty.RemoveNbsp, by popular demand. Signed-off-by: Edward Z. Yang --- NEWS | 4 ++++ library/HTMLPurifier/ConfigSchema/schema.ser | Bin 11884 -> 12243 bytes ...rmat.RemoveEmpty.RemoveNbsp.Exceptions.txt | 11 +++++++++ .../AutoFormat.RemoveEmpty.RemoveNbsp.txt | 15 +++++++++++++ .../schema/AutoFormat.RemoveEmpty.txt | 5 +++-- library/HTMLPurifier/Injector/RemoveEmpty.php | 13 +++++++++-- .../HTMLPurifier/Strategy/MakeWellFormed.php | 2 ++ .../HTMLPurifier/Injector/RemoveEmptyTest.php | 21 ++++++++++++++++++ 8 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt create mode 100644 library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt diff --git a/NEWS b/NEWS index 655f76c1..14959a64 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on the name attribute when set. Use with care. Thanks Ian Cook for sponsoring. +! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty + tags that contain non-breaking spaces as well other whitespace. You + can also modify which tags should have   maintained with + %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions. 3.3.0, released 2009-02-16 ! Implement CSS property 'overflow' when %CSS.AllowTricky is true. diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser index b8e5f347463af1469f7d0edf5aaac2eabe05319c..1e324df664e5815c7d6996cf40307399cdba2443 100644 GIT binary patch delta 287 zcmaD8b2)y38Kdds^|DHv75G({m`qG2*E31j=mn+b=9i`VB^4LwxmF~n7L;V>=M^hi zCt4Xm(~fYX~O;!l^B`GBySoz>Q?UW*b3Ap2^$z+-Yx55f7uu=Jm2? mVg67+at4Y&GOa9tE{C{P07;ROb*7d1WJf{y%`+6P@B#qF`&x_u delta 52 xcmcZ{|0ZUF8Kd#!jdDsGBUP9-y9rtHOg86tM-j^6VKmyjQSL0$=EsU3cmd??5;_0? diff --git a/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt new file mode 100644 index 00000000..35c393b4 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array('td' => true, 'th' => true) +--DESCRIPTION-- +

+ When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp + are enabled, this directive defines what HTML elements should not be + removede if they have only a non-breaking space in them. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt new file mode 100644 index 00000000..ca17eb1d --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt @@ -0,0 +1,15 @@ +AutoFormat.RemoveEmpty.RemoveNbsp +TYPE: bool +VERSION: 4.0.0 +DEFAULT: false +--DESCRIPTION-- +

+ When enabled, HTML Purifier will treat any elements that contain only + non-breaking spaces as well as regular whitespace as empty, and remove + them when %AutoForamt.RemoveEmpty is enabled. +

+

+ See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements + that don't have this behavior applied to them. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt index aaede47d..34657ba4 100644 --- a/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt +++ b/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt @@ -31,7 +31,8 @@ DEFAULT: false

Elements that contain only whitespace will be treated as empty. Non-breaking - spaces, however, do not count as whitespace. + spaces, however, do not count as whitespace. See + %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.

This algorithm is not perfect; you may still notice some empty tags, @@ -39,7 +40,7 @@ DEFAULT: false because they were not permitted in that context, or tags that, after being auto-closed by another tag, where empty. This is for safety reasons to prevent clever code from breaking validation. The general rule of thumb: - if a tag looked empty on the way end, it will get removed; if HTML Purifier + if a tag looked empty on the way in, it will get removed; if HTML Purifier made it empty, it will stay.

--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveEmpty.php b/library/HTMLPurifier/Injector/RemoveEmpty.php index d85ca97d..638bfca0 100644 --- a/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -3,12 +3,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector { - private $context, $config; + private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; public function prepare($config, $context) { parent::prepare($config, $context); $this->config = $config; $this->context = $context; + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); $this->attrValidator = new HTMLPurifier_AttrValidator(); } @@ -17,7 +19,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $next = false; for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { $next = $this->inputTokens[$i]; - if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue; + if ($next instanceof HTMLPurifier_Token_Text) { + if ($next->is_whitespace) continue; + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { + $plain = str_replace("\xC2\xA0", "", $next->data); + $isWsOrNbsp = $plain === '' || ctype_space($plain); + if ($isWsOrNbsp) continue; + } + } break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index 82642946..feb0c32b 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -72,6 +72,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $custom_injectors = $injectors['Custom']; unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { + // XXX: Fix with a legitimate lookup table of enabled filters + if (strpos($injector, '.') !== false) continue; $injector = "HTMLPurifier_Injector_$injector"; if (!$b) continue; $this->injectors[] = new $injector; diff --git a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php index f2152976..34dbc951 100644 --- a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php +++ b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php @@ -54,6 +54,27 @@ class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness $this->assertResult(' ', ''); } + function testRemoveNbsp() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult(' ', ''); + } + + function testRemoveNbspMix() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult('   ', ''); + } + + function testDontRemoveNbsp() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->assertResult(' ', "\xC2\xA0"); + } + + function testRemoveNbspExceptionsSpecial() { + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp', true); + $this->config->set('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions', 'b'); + $this->assertResult(' ', "\xC2\xA0"); + } + } // vim: et sw=4 sts=4