diff options
author | Andrew Dolgov <[email protected]> | 2009-06-22 13:56:49 +0400 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2009-06-22 13:56:49 +0400 |
commit | f45a286b8d62f710b519a98c7d4b75a0c34d5d10 (patch) | |
tree | 0c310b7b9d44e12fac1cd11e1563c4cef9b5eab2 /lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | |
parent | 5c4461432c290ad4863fd7dc4107121db59b298c (diff) |
strip_tags_long: use htmlpurifier to properly reformat html content
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php')
-rwxr-xr-x | lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php new file mode 100755 index 000000000..55035c4d0 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -0,0 +1,48 @@ +<?php + +/** + * Validates contents based on NMTOKENS attribute type. + * @note The only current use for this is the class attribute in HTML + * @note Could have some functionality factored out into Nmtoken class + * @warning We cannot assume this class will be used only for 'class' + * attributes. Not sure how to hook in magic behavior, then. + */ +class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef +{ + + public function validate($string, $config, $context) { + + $string = trim($string); + + // early abort: '' and '0' (strings that convert to false) are invalid + if (!$string) return false; + + // OPTIMIZABLE! + // do the preg_match, capture all subpatterns for reformulation + + // we don't support U+00A1 and up codepoints or + // escaping because I don't know how to do that with regexps + // and plus it would complicate optimization efforts (you never + // see that anyway). + $matches = array(); + $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; // look ahead for space or string end + preg_match_all($pattern, $string, $matches); + + if (empty($matches[1])) return false; + + // reconstruct string + $new_string = ''; + foreach ($matches[1] as $token) { + $new_string .= $token . ' '; + } + $new_string = rtrim($new_string); + + return $new_string; + + } + +} + +// vim: et sw=4 sts=4 |