summaryrefslogtreecommitdiff
path: root/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2009-06-22 13:56:49 +0400
committerAndrew Dolgov <[email protected]>2009-06-22 13:56:49 +0400
commitf45a286b8d62f710b519a98c7d4b75a0c34d5d10 (patch)
tree0c310b7b9d44e12fac1cd11e1563c4cef9b5eab2 /lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
parent5c4461432c290ad4863fd7dc4107121db59b298c (diff)
strip_tags_long: use htmlpurifier to properly reformat html content
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php')
-rwxr-xr-xlib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php48
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100755
index 000000000..55035c4d0
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ * @note The only current use for this is the class attribute in HTML
+ * @note Could have some functionality factored out into Nmtoken class
+ * @warning We cannot assume this class will be used only for 'class'
+ * attributes. Not sure how to hook in magic behavior, then.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+
+ // early abort: '' and '0' (strings that convert to false) are invalid
+ if (!$string) return false;
+
+ // OPTIMIZABLE!
+ // do the preg_match, capture all subpatterns for reformulation
+
+ // we don't support U+00A1 and up codepoints or
+ // escaping because I don't know how to do that with regexps
+ // and plus it would complicate optimization efforts (you never
+ // see that anyway).
+ $matches = array();
+ $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+ '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+ '(?:(?=\s)|\z)/'; // look ahead for space or string end
+ preg_match_all($pattern, $string, $matches);
+
+ if (empty($matches[1])) return false;
+
+ // reconstruct string
+ $new_string = '';
+ foreach ($matches[1] as $token) {
+ $new_string .= $token . ' ';
+ }
+ $new_string = rtrim($new_string);
+
+ return $new_string;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4