From 010efc9b814b433bc60353caec185d905688a32b Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 5 Jun 2012 21:52:21 +0400 Subject: Revert "remove htmlpurifier" This reverts commit c21a462d52bd32737c32c29b060da03b38f1c2e6. --- .../HTMLPurifier/URIFilter/DisableExternal.php | 23 +++++ .../URIFilter/DisableExternalResources.php | 12 +++ .../HTMLPurifier/URIFilter/DisableResources.php | 11 ++ .../HTMLPurifier/URIFilter/HostBlacklist.php | 25 +++++ .../HTMLPurifier/URIFilter/MakeAbsolute.php | 114 +++++++++++++++++++++ .../library/HTMLPurifier/URIFilter/Munge.php | 53 ++++++++++ .../library/HTMLPurifier/URIFilter/SafeIframe.php | 35 +++++++ 7 files changed, 273 insertions(+) create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/URIFilter') diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php new file mode 100644 index 000000000..d8a39a501 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php @@ -0,0 +1,23 @@ +getDefinition('URI')->host; + if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); + } + public function filter(&$uri, $config, $context) { + if (is_null($uri->host)) return true; + if ($this->ourHostParts === false) return false; + $host_parts = array_reverse(explode('.', $uri->host)); + foreach ($this->ourHostParts as $i => $x) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $this->ourHostParts[$i]) return false; + } + return true; + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php new file mode 100644 index 000000000..881abc43c --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php @@ -0,0 +1,12 @@ +get('EmbeddedURI', true)) return true; + return parent::filter($uri, $config, $context); + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php new file mode 100644 index 000000000..67538c7bb --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php @@ -0,0 +1,11 @@ +get('EmbeddedURI', true); + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php new file mode 100644 index 000000000..55fde3bf4 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php @@ -0,0 +1,25 @@ +blacklist = $config->get('URI.HostBlacklist'); + return true; + } + public function filter(&$uri, $config, $context) { + foreach($this->blacklist as $blacklisted_host_fragment) { + if (strpos($uri->host, $blacklisted_host_fragment) !== false) { + return false; + } + } + return true; + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php new file mode 100644 index 000000000..f46ab2630 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php @@ -0,0 +1,114 @@ +getDefinition('URI'); + $this->base = $def->base; + if (is_null($this->base)) { + trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING); + return false; + } + $this->base->fragment = null; // fragment is invalid for base URI + $stack = explode('/', $this->base->path); + array_pop($stack); // discard last segment + $stack = $this->_collapseStack($stack); // do pre-parsing + $this->basePathStack = $stack; + return true; + } + public function filter(&$uri, $config, $context) { + if (is_null($this->base)) return true; // abort early + if ( + $uri->path === '' && is_null($uri->scheme) && + is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment) + ) { + // reference to current document + $uri = clone $this->base; + return true; + } + if (!is_null($uri->scheme)) { + // absolute URI already: don't change + if (!is_null($uri->host)) return true; + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) { + // scheme not recognized + return false; + } + if (!$scheme_obj->hierarchical) { + // non-hierarchal URI with explicit scheme, don't change + return true; + } + // special case: had a scheme but always is hierarchical and had no authority + } + if (!is_null($uri->host)) { + // network path, don't bother + return true; + } + if ($uri->path === '') { + $uri->path = $this->base->path; + } elseif ($uri->path[0] !== '/') { + // relative path, needs more complicated processing + $stack = explode('/', $uri->path); + $new_stack = array_merge($this->basePathStack, $stack); + if ($new_stack[0] !== '' && !is_null($this->base->host)) { + array_unshift($new_stack, ''); + } + $new_stack = $this->_collapseStack($new_stack); + $uri->path = implode('/', $new_stack); + } else { + // absolute path, but still we should collapse + $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path))); + } + // re-combine + $uri->scheme = $this->base->scheme; + if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo; + if (is_null($uri->host)) $uri->host = $this->base->host; + if (is_null($uri->port)) $uri->port = $this->base->port; + return true; + } + + /** + * Resolve dots and double-dots in a path stack + */ + private function _collapseStack($stack) { + $result = array(); + $is_folder = false; + for ($i = 0; isset($stack[$i]); $i++) { + $is_folder = false; + // absorb an internally duplicated slash + if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue; + if ($stack[$i] == '..') { + if (!empty($result)) { + $segment = array_pop($result); + if ($segment === '' && empty($result)) { + // error case: attempted to back out too far: + // restore the leading slash + $result[] = ''; + } elseif ($segment === '..') { + $result[] = '..'; // cannot remove .. with .. + } + } else { + // relative path, preserve the double-dots + $result[] = '..'; + } + $is_folder = true; + continue; + } + if ($stack[$i] == '.') { + // silently absorb + $is_folder = true; + continue; + } + $result[] = $stack[$i]; + } + if ($is_folder) $result[] = ''; + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php new file mode 100644 index 000000000..de695df14 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php @@ -0,0 +1,53 @@ +target = $config->get('URI.' . $this->name); + $this->parser = new HTMLPurifier_URIParser(); + $this->doEmbed = $config->get('URI.MungeResources'); + $this->secretKey = $config->get('URI.MungeSecretKey'); + return true; + } + public function filter(&$uri, $config, $context) { + if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true; + + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it + if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way + if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL + + $this->makeReplace($uri, $config, $context); + $this->replace = array_map('rawurlencode', $this->replace); + + $new_uri = strtr($this->target, $this->replace); + $new_uri = $this->parser->parse($new_uri); + // don't redirect if the target host is the same as the + // starting host + if ($uri->host === $new_uri->host) return true; + $uri = $new_uri; // overwrite + return true; + } + + protected function makeReplace($uri, $config, $context) { + $string = $uri->toString(); + // always available + $this->replace['%s'] = $string; + $this->replace['%r'] = $context->get('EmbeddedURI', true); + $token = $context->get('CurrentToken', true); + $this->replace['%n'] = $token ? $token->name : null; + $this->replace['%m'] = $context->get('CurrentAttr', true); + $this->replace['%p'] = $context->get('CurrentCSSProperty', true); + // not always available + if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php new file mode 100644 index 000000000..284bb13de --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php @@ -0,0 +1,35 @@ +regexp = $config->get('URI.SafeIframeRegexp'); + return true; + } + public function filter(&$uri, $config, $context) { + // check if filter not applicable + if (!$config->get('HTML.SafeIframe')) return true; + // check if the filter should actually trigger + if (!$context->get('EmbeddedURI', true)) return true; + $token = $context->get('CurrentToken', true); + if (!($token && $token->name == 'iframe')) return true; + // check if we actually have some whitelists enabled + if ($this->regexp === null) return false; + // actually check the whitelists + return preg_match($this->regexp, $uri->toString()); + } +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3