summaryrefslogtreecommitdiff
path: root/lib/htmlpurifier/library/HTMLPurifier/URIScheme
diff options
context:
space:
mode:
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/URIScheme')
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php96
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php32
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php42
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php19
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php13
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php27
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php22
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php19
8 files changed, 270 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php
new file mode 100644
index 000000000..a5c43989e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php
@@ -0,0 +1,96 @@
+<?php
+
+/**
+ * Implements data: URI for base64 encoded images supported by GD.
+ */
+class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
+
+ public $browsable = true;
+ public $allowed_types = array(
+ // you better write validation code for other types if you
+ // decide to allow them
+ 'image/jpeg' => true,
+ 'image/gif' => true,
+ 'image/png' => true,
+ );
+ // this is actually irrelevant since we only write out the path
+ // component
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $result = explode(',', $uri->path, 2);
+ $is_base64 = false;
+ $charset = null;
+ $content_type = null;
+ if (count($result) == 2) {
+ list($metadata, $data) = $result;
+ // do some legwork on the metadata
+ $metas = explode(';', $metadata);
+ while(!empty($metas)) {
+ $cur = array_shift($metas);
+ if ($cur == 'base64') {
+ $is_base64 = true;
+ break;
+ }
+ if (substr($cur, 0, 8) == 'charset=') {
+ // doesn't match if there are arbitrary spaces, but
+ // whatever dude
+ if ($charset !== null) continue; // garbage
+ $charset = substr($cur, 8); // not used
+ } else {
+ if ($content_type !== null) continue; // garbage
+ $content_type = $cur;
+ }
+ }
+ } else {
+ $data = $result[0];
+ }
+ if ($content_type !== null && empty($this->allowed_types[$content_type])) {
+ return false;
+ }
+ if ($charset !== null) {
+ // error; we don't allow plaintext stuff
+ $charset = null;
+ }
+ $data = rawurldecode($data);
+ if ($is_base64) {
+ $raw_data = base64_decode($data);
+ } else {
+ $raw_data = $data;
+ }
+ // XXX probably want to refactor this into a general mechanism
+ // for filtering arbitrary content types
+ $file = tempnam("/tmp", "");
+ file_put_contents($file, $raw_data);
+ if (function_exists('exif_imagetype')) {
+ $image_code = exif_imagetype($file);
+ } elseif (function_exists('getimagesize')) {
+ set_error_handler(array($this, 'muteErrorHandler'));
+ $info = getimagesize($file);
+ restore_error_handler();
+ if ($info == false) return false;
+ $image_code = $info[2];
+ } else {
+ trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
+ }
+ $real_content_type = image_type_to_mime_type($image_code);
+ if ($real_content_type != $content_type) {
+ // we're nice guys; if the content type is something else we
+ // support, change it over
+ if (empty($this->allowed_types[$real_content_type])) return false;
+ $content_type = $real_content_type;
+ }
+ // ok, it's kosher, rewrite what we need
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->fragment = null;
+ $uri->query = null;
+ $uri->path = "$content_type;base64," . base64_encode($raw_data);
+ return true;
+ }
+
+ public function muteErrorHandler($errno, $errstr) {}
+
+}
+
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php
new file mode 100644
index 000000000..d74a3f198
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php
@@ -0,0 +1,32 @@
+<?php
+
+/**
+ * Validates file as defined by RFC 1630 and RFC 1738.
+ */
+class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme {
+
+ // Generally file:// URLs are not accessible from most
+ // machines, so placing them as an img src is incorrect.
+ public $browsable = false;
+
+ // Basically the *only* URI scheme for which this is true, since
+ // accessing files on the local machine is very common. In fact,
+ // browsers on some operating systems don't understand the
+ // authority, though I hear it is used on Windows to refer to
+ // network shares.
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ // Authentication method is not supported
+ $uri->userinfo = null;
+ // file:// makes no provisions for accessing the resource
+ $uri->port = null;
+ // While it seems to work on Firefox, the querystring has
+ // no possible effect and is thus stripped.
+ $uri->query = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php
new file mode 100644
index 000000000..0fb2abf64
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php
@@ -0,0 +1,42 @@
+<?php
+
+/**
+ * Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
+ */
+class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
+
+ public $default_port = 21;
+ public $browsable = true; // usually
+ public $hierarchical = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $uri->query = null;
+
+ // typecode check
+ $semicolon_pos = strrpos($uri->path, ';'); // reverse
+ if ($semicolon_pos !== false) {
+ $type = substr($uri->path, $semicolon_pos + 1); // no semicolon
+ $uri->path = substr($uri->path, 0, $semicolon_pos);
+ $type_ret = '';
+ if (strpos($type, '=') !== false) {
+ // figure out whether or not the declaration is correct
+ list($key, $typecode) = explode('=', $type, 2);
+ if ($key !== 'type') {
+ // invalid key, tack it back on encoded
+ $uri->path .= '%3B' . $type;
+ } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
+ $type_ret = ";type=$typecode";
+ }
+ } else {
+ $uri->path .= '%3B' . $type;
+ }
+ $uri->path = str_replace(';', '%3B', $uri->path);
+ $uri->path .= $type_ret;
+ }
+
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php
new file mode 100644
index 000000000..959b8daff
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php
@@ -0,0 +1,19 @@
+<?php
+
+/**
+ * Validates http (HyperText Transfer Protocol) as defined by RFC 2616
+ */
+class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
+
+ public $default_port = 80;
+ public $browsable = true;
+ public $hierarchical = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $uri->userinfo = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
new file mode 100644
index 000000000..159c2874e
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php
@@ -0,0 +1,13 @@
+<?php
+
+/**
+ * Validates https (Secure HTTP) according to http scheme.
+ */
+class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
+
+ public $default_port = 443;
+ public $secure = true;
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php
new file mode 100644
index 000000000..9db4cb23f
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php
@@ -0,0 +1,27 @@
+<?php
+
+// VERY RELAXED! Shouldn't cause problems, not even Firefox checks if the
+// email is valid, but be careful!
+
+/**
+ * Validates mailto (for E-mail) according to RFC 2368
+ * @todo Validate the email address
+ * @todo Filter allowed query parameters
+ */
+
+class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
+
+ public $browsable = false;
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ // we need to validate path against RFC 2368's addr-spec
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php
new file mode 100644
index 000000000..84a6748d8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php
@@ -0,0 +1,22 @@
+<?php
+
+/**
+ * Validates news (Usenet) as defined by generic RFC 1738
+ */
+class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
+
+ public $browsable = false;
+ public $may_omit_host = true;
+
+ public function doValidate(&$uri, $config, $context) {
+ $uri->userinfo = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->query = null;
+ // typecode check needed on path
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php
new file mode 100644
index 000000000..4ccea0dfc
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php
@@ -0,0 +1,19 @@
+<?php
+
+/**
+ * Validates nntp (Network News Transfer Protocol) as defined by generic RFC 1738
+ */
+class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
+
+ public $default_port = 119;
+ public $browsable = false;
+
+ public function doValidate(&$uri, $config, $context) {
+ $uri->userinfo = null;
+ $uri->query = null;
+ return true;
+ }
+
+}
+
+// vim: et sw=4 sts=4