From 12b1110bf6fe420e3f8f8db246dfc8160ad363ca Mon Sep 17 00:00:00 2001 From: Xavier Ripoll Date: Wed, 14 Nov 2018 17:15:30 +0100 Subject: [PATCH] MOODLE-556: Changed some CDATA generic attributes to more specific AttrDefs --- library/HTMLPurifier/AttrDef/MathML/ID.php | 77 ++++++++++++++++++++++ library/HTMLPurifier/HTMLModule/MathML.php | 14 ++-- 2 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 library/HTMLPurifier/AttrDef/MathML/ID.php diff --git a/library/HTMLPurifier/AttrDef/MathML/ID.php b/library/HTMLPurifier/AttrDef/MathML/ID.php new file mode 100644 index 00000000..2809252d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/MathML/ID.php @@ -0,0 +1,77 @@ +get('Attr.IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr.IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) { + $id = $prefix . $id; + } + } elseif ($config->get('Attr.IDPrefixLocal') !== '') { + trigger_error( + '%Attr.IDPrefixLocal cannot be used unless ' . + '%Attr.IDPrefix is set', + E_USER_WARNING + ); + } + + // we purposely avoid using regex, hopefully this is faster + + if ($config->get('Attr.ID.HTML5') === true) { + if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { + return false; + } + } else { + if (ctype_alpha($id)) { + // OK + } else { + if (!ctype_alpha(@$id[0])) { + return false; + } + // primitive style of regexps, I suppose + $trim = trim( + $id, + 'A..Za..z0..9:-._' + ); + if ($trim !== '') { + return false; + } + } + } + + $regexp = $config->get('Attr.IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $id; + } +} \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/MathML.php b/library/HTMLPurifier/HTMLModule/MathML.php index b35b9d4a..c4e9cf18 100644 --- a/library/HTMLPurifier/HTMLModule/MathML.php +++ b/library/HTMLPurifier/HTMLModule/MathML.php @@ -72,21 +72,21 @@ class HTMLPurifier_HTMLModule_MathML extends HTMLPurifier_HTMLModule $E['DefEncAtt'] = array( 'encoding' => 'CDATA', - 'definitionurl' => 'CDATA' + 'definitionurl' => 'URI' ); $E['CommonAtt'] = array_merge( array( 'xmlns' => 'Bool#http://www.w3.org/1998/Math/MathML', - $E['XLINK.prefix'] . ':href' => 'CDATA', + $E['XLINK.prefix'] . ':href' => 'URI', $E['XLINK.prefix'] . ':type' => 'CDATA', 'xml:lang' => 'CDATA', 'xml:space' => 'Enum#default,preserve', - 'id' => 'CDATA', // MathML allows multiple elements with same ID - 'xref' => 'CDATA', - 'class' => 'CDATA', - 'style' => 'CDATA', - 'href' => 'CDATA', + 'id' => new HTMLPurifier_AttrDef_MathML_ID(), // MathML allows multiple elements with same ID + 'xref' => new HTMLPurifier_AttrDef_MathML_ID(), + 'class' => 'Class', + 'style' => new HTMLPurifier_AttrDef_CSS(), + 'href' => 'URI', 'other' => 'CDATA', ), $proprietary_att_wrs,