From f45a286b8d62f710b519a98c7d4b75a0c34d5d10 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 22 Jun 2009 13:56:49 +0400 Subject: strip_tags_long: use htmlpurifier to properly reformat html content --- .../library/HTMLPurifier/Injector/Linkify.php | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php') diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php new file mode 100755 index 000000000..296dac282 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php @@ -0,0 +1,46 @@ + array('href')); + + public function handleText(&$token) { + if (!$this->allowsElement('a')) return; + + if (strpos($token->data, '://') === false) { + // our really quick heuristic failed, abort + // this may not work so well if we want to match things like + // "google.com", but then again, most people don't + return; + } + + // there is/are URL(s). Let's split the string: + // Note: this regex is extremely permissive + $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3