From a721ae0ae6106b49c3a33b08a8b6a24bdcbd8801 Mon Sep 17 00:00:00 2001 From: Xavier Ripoll Date: Wed, 17 Oct 2018 17:19:54 +0200 Subject: [PATCH] MOODLE-556: Normalization of whitespace in MathML token elements --- library/HTMLPurifier.includes.php | 1 + library/HTMLPurifier.safe-includes.php | 1 + library/HTMLPurifier/HTMLModule/Math.php | 3 ++ .../Injector/MathSpaceNormalize.php | 40 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 library/HTMLPurifier/Injector/MathSpaceNormalize.php diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 75144193..28202e62 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -192,6 +192,7 @@ require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; require 'HTMLPurifier/Injector/AutoParagraph.php'; require 'HTMLPurifier/Injector/DisplayLinkURI.php'; require 'HTMLPurifier/Injector/Linkify.php'; +require 'HTMLPurifier/Injector/MathSpaceNormalize.php'; require 'HTMLPurifier/Injector/PurifierLinkify.php'; require 'HTMLPurifier/Injector/RemoveEmpty.php'; require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index 0da1e6ed..73251972 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -186,6 +186,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php'; require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php'; require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php'; require_once $__dir . '/HTMLPurifier/Injector/Linkify.php'; +require_once $__dir . '/HTMLPurifier/Injector/MathSpaceNormalize.php'; require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php'; diff --git a/library/HTMLPurifier/HTMLModule/Math.php b/library/HTMLPurifier/HTMLModule/Math.php index 827e6338..6f535ae2 100644 --- a/library/HTMLPurifier/HTMLModule/Math.php +++ b/library/HTMLPurifier/HTMLModule/Math.php @@ -19,6 +19,9 @@ class HTMLPurifier_HTMLModule_Math extends HTMLPurifier_HTMLModule public function setup($config) { + // Normalize whitespace inside text elements as per MathML spec 2.1.7 + $this->info_injector[] = new HTMLPurifier_Injector_MathSpaceNormalize(); + /***************************************************************** * Meta variables * Used in this file to simplify code and help adapt the DTD diff --git a/library/HTMLPurifier/Injector/MathSpaceNormalize.php b/library/HTMLPurifier/Injector/MathSpaceNormalize.php new file mode 100644 index 00000000..1228f934 --- /dev/null +++ b/library/HTMLPurifier/Injector/MathSpaceNormalize.php @@ -0,0 +1,40 @@ + and . + * @type array + */ + protected $tags = array('mi', 'mn', 'mo', 'ms', 'mtext', 'ci', 'cn', 'csymbol', 'annotation'); + + /** + * @param HTMLPurifier_Token $token + */ + public function handleText(&$token) + { + + // No parent tag => return to avoid error on following line + if (count($this->currentNesting) == 0) { + return; + } + + // Get the parent tag + $parent_token = $this->currentNesting[count($this->currentNesting) - 1]; + + // If we're not in a "token element" (specified in $tags above), return + if ($parent_token === null || !in_array($parent_token->name, $this->tags)) { + return; + } + + // Replace as per the MathML specification, section 2.1.7 + $token->data = preg_replace( + '/[ \t\n\r]+/', + ' ', + trim($token->data) // Using trim($token->data, ' \t\n\r') trims t,n,r + ); + } + +}