strip_tags_long: use htmlpurifier to properly reformat html content

author: Andrew Dolgov <[email protected]> 2009-06-22 13:56:49 +0400
committer: Andrew Dolgov <[email protected]> 2009-06-22 13:56:49 +0400
commit: f45a286b8d62f710b519a98c7d4b75a0c34d5d10 (patch)
tree: 0c310b7b9d44e12fac1cd11e1563c4cef9b5eab2 /lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
parent: 5c4461432c290ad4863fd7dc4107121db59b298c (diff)
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100755
index 000000000..55035c4d0
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ * @note The only current use for this is the class attribute in HTML
+ * @note Could have some functionality factored out into Nmtoken class
+ * @warning We cannot assume this class will be used only for 'class'
+ *          attributes. Not sure how to hook in magic behavior, then.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+
+        // early abort: '' and '0' (strings that convert to false) are invalid
+        if (!$string) return false;
+
+        // OPTIMIZABLE!
+        // do the preg_match, capture all subpatterns for reformulation
+
+        // we don't support U+00A1 and up codepoints or
+        // escaping because I don't know how to do that with regexps
+        // and plus it would complicate optimization efforts (you never
+        // see that anyway).
+        $matches = array();
+        $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+                   '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+                   '(?:(?=\s)|\z)/'; // look ahead for space or string end
+        preg_match_all($pattern, $string, $matches);
+
+        if (empty($matches[1])) return false;
+
+        // reconstruct string
+        $new_string = '';
+        foreach ($matches[1] as $token) {
+            $new_string .= $token . ' ';
+        }
+        $new_string = rtrim($new_string);
+
+        return $new_string;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
author	Andrew Dolgov <[email protected]>	2009-06-22 13:56:49 +0400
committer	Andrew Dolgov <[email protected]>	2009-06-22 13:56:49 +0400
commit	f45a286b8d62f710b519a98c7d4b75a0c34d5d10 (patch)
tree	0c310b7b9d44e12fac1cd11e1563c4cef9b5eab2 /lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
parent	5c4461432c290ad4863fd7dc4107121db59b298c (diff)