2006-08-04 02:48:20 +00:00
|
|
|
<?php
|
|
|
|
|
2006-08-20 21:47:15 +00:00
|
|
|
/**
|
2007-02-03 20:15:33 +00:00
|
|
|
* Validates contents based on NMTOKENS attribute type.
|
2006-08-20 21:47:15 +00:00
|
|
|
*/
|
2007-02-14 20:38:51 +00:00
|
|
|
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
2006-08-04 02:48:20 +00:00
|
|
|
{
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2008-01-05 00:10:43 +00:00
|
|
|
public function validate($string, $config, $context) {
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-08-05 00:30:31 +00:00
|
|
|
$string = trim($string);
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-08-04 02:48:20 +00:00
|
|
|
// early abort: '' and '0' (strings that convert to false) are invalid
|
|
|
|
if (!$string) return false;
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2009-05-26 01:55:44 +00:00
|
|
|
$tokens = $this->split($string);
|
|
|
|
$tokens = $this->filter($tokens, $config, $context);
|
|
|
|
if (empty($tokens)) return false;
|
|
|
|
return implode(' ', $tokens);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Splits a space separated list of tokens into its constituent parts.
|
|
|
|
*/
|
|
|
|
protected function split($string) {
|
2006-08-04 02:48:20 +00:00
|
|
|
// OPTIMIZABLE!
|
|
|
|
// do the preg_match, capture all subpatterns for reformulation
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-08-04 02:48:20 +00:00
|
|
|
// we don't support U+00A1 and up codepoints or
|
|
|
|
// escaping because I don't know how to do that with regexps
|
|
|
|
// and plus it would complicate optimization efforts (you never
|
|
|
|
// see that anyway).
|
2006-11-12 19:26:49 +00:00
|
|
|
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
2006-08-05 00:30:31 +00:00
|
|
|
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
2006-11-12 19:26:49 +00:00
|
|
|
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
2006-08-04 02:48:20 +00:00
|
|
|
preg_match_all($pattern, $string, $matches);
|
2009-05-26 01:55:44 +00:00
|
|
|
return $matches[1];
|
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2009-05-26 01:55:44 +00:00
|
|
|
/**
|
|
|
|
* Template method for removing certain tokens based on arbitrary criteria.
|
|
|
|
* @note If we wanted to be really functional, we'd do an array_filter
|
|
|
|
* with a callback. But... we're not.
|
|
|
|
*/
|
|
|
|
protected function filter($tokens, $config, $context) {
|
|
|
|
return $tokens;
|
2006-08-04 02:48:20 +00:00
|
|
|
}
|
2008-12-06 07:28:20 +00:00
|
|
|
|
2006-08-04 02:48:20 +00:00
|
|
|
}
|
|
|
|
|
2008-12-06 09:24:59 +00:00
|
|
|
// vim: et sw=4 sts=4
|