summaryrefslogtreecommitdiff
path: root/lib/htmlpurifier/library/HTMLPurifier/Filter
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2009-06-22 13:56:49 +0400
committerAndrew Dolgov <[email protected]>2009-06-22 13:56:49 +0400
commitf45a286b8d62f710b519a98c7d4b75a0c34d5d10 (patch)
tree0c310b7b9d44e12fac1cd11e1563c4cef9b5eab2 /lib/htmlpurifier/library/HTMLPurifier/Filter
parent5c4461432c290ad4863fd7dc4107121db59b298c (diff)
strip_tags_long: use htmlpurifier to properly reformat html content
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/Filter')
-rwxr-xr-xlib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php135
-rwxr-xr-xlib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php39
2 files changed, 174 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
new file mode 100755
index 000000000..970f9e0c9
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -0,0 +1,135 @@
+<?php
+
+/**
+ * This filter extracts <style> blocks from input HTML, cleans them up
+ * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
+ * so they can be used elsewhere in the document.
+ *
+ * @note
+ * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for
+ * sample usage.
+ *
+ * @note
+ * This filter can also be used on stylesheets not included in the
+ * document--something purists would probably prefer. Just directly
+ * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()
+ */
+class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
+{
+
+ public $name = 'ExtractStyleBlocks';
+ private $_styleMatches = array();
+ private $_tidy;
+
+ public function __construct() {
+ $this->_tidy = new csstidy();
+ }
+
+ /**
+ * Save the contents of CSS blocks to style matches
+ * @param $matches preg_replace style $matches array
+ */
+ protected function styleCallback($matches) {
+ $this->_styleMatches[] = $matches[1];
+ }
+
+ /**
+ * Removes inline <style> tags from HTML, saves them for later use
+ * @todo Extend to indicate non-text/css style blocks
+ */
+ public function preFilter($html, $config, $context) {
+ $tidy = $config->get('FilterParam', 'ExtractStyleBlocksTidyImpl');
+ if ($tidy !== null) $this->_tidy = $tidy;
+ $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
+ $style_blocks = $this->_styleMatches;
+ $this->_styleMatches = array(); // reset
+ $context->register('StyleBlocks', $style_blocks); // $context must not be reused
+ if ($this->_tidy) {
+ foreach ($style_blocks as &$style) {
+ $style = $this->cleanCSS($style, $config, $context);
+ }
+ }
+ return $html;
+ }
+
+ /**
+ * Takes CSS (the stuff found in <style>) and cleans it.
+ * @warning Requires CSSTidy <http://csstidy.sourceforge.net/>
+ * @param $css CSS styling to clean
+ * @param $config Instance of HTMLPurifier_Config
+ * @param $context Instance of HTMLPurifier_Context
+ * @return Cleaned CSS
+ */
+ public function cleanCSS($css, $config, $context) {
+ // prepare scope
+ $scope = $config->get('FilterParam', 'ExtractStyleBlocksScope');
+ if ($scope !== null) {
+ $scopes = array_map('trim', explode(',', $scope));
+ } else {
+ $scopes = array();
+ }
+ // remove comments from CSS
+ $css = trim($css);
+ if (strncmp('<!--', $css, 4) === 0) {
+ $css = substr($css, 4);
+ }
+ if (strlen($css) > 3 && substr($css, -3) == '-->') {
+ $css = substr($css, 0, -3);
+ }
+ $css = trim($css);
+ $this->_tidy->parse($css);
+ $css_definition = $config->getDefinition('CSS');
+ foreach ($this->_tidy->css as $k => $decls) {
+ // $decls are all CSS declarations inside an @ selector
+ $new_decls = array();
+ foreach ($decls as $selector => $style) {
+ $selector = trim($selector);
+ if ($selector === '') continue; // should not happen
+ if ($selector[0] === '+') {
+ if ($selector !== '' && $selector[0] === '+') continue;
+ }
+ if (!empty($scopes)) {
+ $new_selector = array(); // because multiple ones are possible
+ $selectors = array_map('trim', explode(',', $selector));
+ foreach ($scopes as $s1) {
+ foreach ($selectors as $s2) {
+ $new_selector[] = "$s1 $s2";
+ }
+ }
+ $selector = implode(', ', $new_selector); // now it's a string
+ }
+ foreach ($style as $name => $value) {
+ if (!isset($css_definition->info[$name])) {
+ unset($style[$name]);
+ continue;
+ }
+ $def = $css_definition->info[$name];
+ $ret = $def->validate($value, $config, $context);
+ if ($ret === false) unset($style[$name]);
+ else $style[$name] = $ret;
+ }
+ $new_decls[$selector] = $style;
+ }
+ $this->_tidy->css[$k] = $new_decls;
+ }
+ // remove stuff that shouldn't be used, could be reenabled
+ // after security risks are analyzed
+ $this->_tidy->import = array();
+ $this->_tidy->charset = null;
+ $this->_tidy->namespace = null;
+ $css = $this->_tidy->print->plain();
+ // we are going to escape any special characters <>& to ensure
+ // that no funny business occurs (i.e. </style> in a font-family prop).
+ if ($config->get('FilterParam', 'ExtractStyleBlocksEscaping')) {
+ $css = str_replace(
+ array('<', '>', '&'),
+ array('\3C ', '\3E ', '\26 '),
+ $css
+ );
+ }
+ return $css;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php
new file mode 100755
index 000000000..aca972f6c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php
@@ -0,0 +1,39 @@
+<?php
+
+class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
+{
+
+ public $name = 'YouTube';
+
+ public function preFilter($html, $config, $context) {
+ $pre_regex = '#<object[^>]+>.+?'.
+ 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
+ $pre_replace = '<span class="youtube-embed">\1</span>';
+ return preg_replace($pre_regex, $pre_replace, $html);
+ }
+
+ public function postFilter($html, $config, $context) {
+ $post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
+ return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
+ }
+
+ protected function armorUrl($url) {
+ return str_replace('--', '-&#45;', $url);
+ }
+
+ protected function postFilterCallback($matches) {
+ $url = $this->armorUrl($matches[1]);
+ return '<object width="425" height="350" type="application/x-shockwave-flash" '.
+ 'data="http://www.youtube.com/v/'.$url.'">'.
+ '<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'.
+ '<!--[if IE]>'.
+ '<embed src="http://www.youtube.com/v/'.$url.'"'.
+ 'type="application/x-shockwave-flash"'.
+ 'wmode="transparent" width="425" height="350" />'.
+ '<![endif]-->'.
+ '</object>';
+
+ }
+}
+
+// vim: et sw=4 sts=4