htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php

<?php

/**
 * Validates contents based on NMTOKENS attribute type.
 * @note The only current use for this is the class attribute in HTML
 * @note Could have some functionality factored out into Nmtoken class
 * @warning We cannot assume this class will be used only for 'class'
 *          attributes. Not sure how to hook in magic behavior, then.
 */
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{

    public function validate($string, $config, $context) {

        $string = trim($string);

        // early abort: '' and '0' (strings that convert to false) are invalid
        if (!$string) return false;

        // OPTIMIZABLE!
        // do the preg_match, capture all subpatterns for reformulation

        // we don't support U+00A1 and up codepoints or
        // escaping because I don't know how to do that with regexps
        // and plus it would complicate optimization efforts (you never
        // see that anyway).
        $matches = array();
        $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
                   '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
                   '(?:(?=\s)|\z)/'; // look ahead for space or string end
        preg_match_all($pattern, $string, $matches);

        if (empty($matches[1])) return false;

        // reconstruct string
        $new_string = '';
        foreach ($matches[1] as $token) {
            $new_string .= $token . ' ';
        }
        $new_string = rtrim($new_string);

        return $new_string;

    }

}

// vim: et sw=4 sts=4