From ad34dd3841a2edad9fcd5dbd18839b73908cf4bc Mon Sep 17 00:00:00 2001 From: Xavier Ripoll Date: Thu, 15 Nov 2018 14:56:11 +0100 Subject: [PATCH] MOODLE-556: Added some types for MathML attributes to restrict use of CDATA --- .../HTMLPurifier/AttrDef/MathML/Character.php | 25 ++++ library/HTMLPurifier/AttrDef/MathML/Color.php | 44 ++++++ .../HTMLPurifier/AttrDef/MathML/Length.php | 36 +++++ library/HTMLPurifier/AttrTypes.php | 13 ++ library/HTMLPurifier/HTMLModule/MathML.php | 23 +-- library/HTMLPurifier/Length.php | 3 +- library/HTMLPurifier/MathMLLength.php | 136 ++++++++++++++++++ 7 files changed, 268 insertions(+), 12 deletions(-) create mode 100644 library/HTMLPurifier/AttrDef/MathML/Character.php create mode 100644 library/HTMLPurifier/AttrDef/MathML/Color.php create mode 100644 library/HTMLPurifier/AttrDef/MathML/Length.php create mode 100644 library/HTMLPurifier/MathMLLength.php diff --git a/library/HTMLPurifier/AttrDef/MathML/Character.php b/library/HTMLPurifier/AttrDef/MathML/Character.php new file mode 100644 index 00000000..94248c55 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/MathML/Character.php @@ -0,0 +1,25 @@ +parseCDATA($string); + + // Optimizations + if ($string === '') { + return false; + } + if ($string === '0') { + return '0'; + } + + $length = HTMLPurifier_MathMLLength::make($string); + if (!$length->isValid()) { + return false; + } + + return $length->toString(); + } +} \ No newline at end of file diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php index 3b70520b..069137e5 100644 --- a/library/HTMLPurifier/AttrTypes.php +++ b/library/HTMLPurifier/AttrTypes.php @@ -50,10 +50,23 @@ class HTMLPurifier_AttrTypes // "proprietary" types $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); + $this->info['CSS'] = new HTMLPurifier_AttrDef_CSS(); // number is really a positive integer (one or more digits) // FIXME: ^^ not always, see start and value of list items $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); + + // MathML types + $this->info['MathML_ID'] = new HTMLPurifier_AttrDef_MathML_ID(); + $this->info['MathML_Length'] = new HTMLPurifier_AttrDef_MathML_Length(); + $this->info['MathML_UnsignedInteger'] = new HTMLPurifier_AttrDef_Integer(false, true, true); + $this->info['MathML_PositiveInteger'] = new HTMLPurifier_AttrDef_Integer(false, false, true); + $this->info['MathML_Integer'] = new HTMLPurifier_AttrDef_Integer(true, true, true); + $this->info['MathML_UnsignedNumber'] = new HTMLPurifier_AttrDef_CSS_Number(true); + $this->info['MathML_Number'] = new HTMLPurifier_AttrDef_CSS_Number(false); + $this->info['MathML_Character'] = new HTMLPurifier_AttrDef_MathML_Character(); + $this->info['MathML_Color'] = new HTMLPurifier_AttrDef_MathML_Color(); + } private static function makeEnum($in) diff --git a/library/HTMLPurifier/HTMLModule/MathML.php b/library/HTMLPurifier/HTMLModule/MathML.php index c4e9cf18..170b3ba2 100644 --- a/library/HTMLPurifier/HTMLModule/MathML.php +++ b/library/HTMLPurifier/HTMLModule/MathML.php @@ -82,10 +82,10 @@ class HTMLPurifier_HTMLModule_MathML extends HTMLPurifier_HTMLModule $E['XLINK.prefix'] . ':type' => 'CDATA', 'xml:lang' => 'CDATA', 'xml:space' => 'Enum#default,preserve', - 'id' => new HTMLPurifier_AttrDef_MathML_ID(), // MathML allows multiple elements with same ID - 'xref' => new HTMLPurifier_AttrDef_MathML_ID(), + 'id' => 'MathML_ID', // MathML allows multiple elements with same ID + 'xref' => 'MathML_ID', 'class' => 'Class', - 'style' => new HTMLPurifier_AttrDef_CSS(), + 'style' => 'CSS', 'href' => 'URI', 'other' => 'CDATA', ), @@ -1442,16 +1442,16 @@ class HTMLPurifier_HTMLModule_MathML extends HTMLPurifier_HTMLModule $E['mpadded-length'] = 'CDATA'; $E['linestyle'] = 'Enum#none,solid,dashed'; $E['columnalignstyle'] = 'Enum#left,center,right'; - $E['unsigned-integer'] = 'CDATA'; - $E['integer'] = 'CDATA'; - $E['number'] = 'CDATA'; - $E['character'] = 'CDATA'; - $E['color'] = 'CDATA'; - $E['positive-integer'] = 'CDATA'; + $E['unsigned-integer'] = 'MathML_UnsignedInteger'; + $E['integer'] = 'MathML_Integer'; + $E['number'] = 'MathML_Number'; + $E['character'] = 'MathML_Character'; + $E['color'] = 'MathML_Color'; + $E['positive-integer'] = 'MathML_PositiveInteger'; $E['token.content'] = '#PCDATA|mglyph|malignmark'; - $E['length'] = 'CDATA'; + $E['length'] = 'MathML_Length'; $E['DeprecatedTokenAtt'] = array( 'fontfamily' => 'CDATA', 'fontweight' => 'Enum#normal,bold', @@ -2167,7 +2167,8 @@ class HTMLPurifier_HTMLModule_MathML extends HTMLPurifier_HTMLModule array_merge( $CCPAtt, array( - 'actiontype*' => 'CDATA', + // Using 'actiontype*' removes maction element altogether + 'actiontype' => 'CDATA', 'selection' => $E['positive-integer'], ) ) diff --git a/library/HTMLPurifier/Length.php b/library/HTMLPurifier/Length.php index e70da55a..4b5e2562 100644 --- a/library/HTMLPurifier/Length.php +++ b/library/HTMLPurifier/Length.php @@ -81,7 +81,8 @@ class HTMLPurifier_Length if (!ctype_lower($this->unit)) { $this->unit = strtolower($this->unit); } - if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) { + if (!isset(static::$allowedUnits[$this->unit]) && + !(isset(static::$allowedUnits['']) && $this->unit === false)) { return false; } // Hack: diff --git a/library/HTMLPurifier/MathMLLength.php b/library/HTMLPurifier/MathMLLength.php new file mode 100644 index 00000000..ecdef7fc --- /dev/null +++ b/library/HTMLPurifier/MathMLLength.php @@ -0,0 +1,136 @@ + true, + 'verythinmathspace' => true, + 'thinmathspace' => true, + 'mediummathspace' => true, + 'thickmathspace' => true, + 'verythickmathspace' => true, + 'veryverythickmathspace' => true, + 'negativeveryverythinmathspace' => true, + 'negativeverythinmathspace' => true, + 'negativethinmathspace' => true, + 'negativemediummathspace' => true, + 'negativethickmathspace' => true, + 'negativeverythickmathspace' => true, + 'negativeveryverythickmathspace' => true + ); + + /** + * Array Lookup array of units recognized by MathML. + * @note This is a restriction of HTMLPurifier_Length's allowed units. + * @type array + */ + protected static $allowedUnits = array( + 'em' => true, 'ex' => true, 'px' => true, 'in' => true, + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true, + '%' => true, '' => true + ); + + /** + * @param string $n Magnitude + * @param bool|string $u Unit + */ + public function __construct($n = '0', $u = false, $namedspace = '') + { + if ($namedspace) { + $this->namedspace = strtolower($namedspace); + } else { + $this->n = (string) $n; + $this->unit = $u !== false ? (string) $u : false; + } + } + + /** + * @param string $s Unit string, like '2em' or '3.4in', or namedspace + * @return HTMLPurifier_MathMLLength + * @warning Does not perform validation. + */ + public static function make($s) + { + if ($s instanceof HTMLPurifier_MathMLLength) { + return $s; + } + if (isset(HTMLPurifier_MathMLLength::$allowedNamedspaces[trim($s)])) { + return new HTMLPurifier_MathMLLength('0', false, $s); + } + $length = HTMLPurifier_Length::make($s); + return new HTMLPurifier_MathMLLength($length->n, $length->unit); + } + + /** + * Validates the number and unit or namedspace. + * @return bool + */ + protected function validate() + { + if (isset(HTMLPurifier_MathMLLength::$allowedNamedspaces[$this->namedspace])) { + return true; + } + return parent::validate(); + } + + /** + * Returns string representation of number. + * @return string + */ + public function toString() + { + if (!$this->isValid()) { + return false; + } + if ($this->namedspace) { + return $this->namedspace; + } + return parent::toString(); + } + + /** + * Retrieves the namedspace. + * @return string + */ + public function getNamedspace() + { + return $this->namedspace; + } + + /** + * Compares two lengths, and returns 1 if greater, -1 if less, 0 if equal + * and null if not comparable. + * @param HTMLPurifier_Length $l + * @return int + * @warning If both values are too large or small, this calculation will + * not work properly + */ + public function compareTo($l) + { + if ($l === false) { + return false; + } + if ($this->namedspace || $l->namedspace) { + if ($this->namedspace === $l->namedspace) { + return 0; + } else { + return null; + } + } + return parent::compareTo($l); + } +} \ No newline at end of file