summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2021-06-18 11:20:57 +0300
committerAndrew Dolgov <[email protected]>2021-06-18 11:20:57 +0300
commite9c062a189cfad71922fc576d636610da18006d4 (patch)
tree786eb11ef2108f481a854c806ba87113efe342eb
parent34807bacd4b2d31b6268d839ccd5281db4ee4da2 (diff)
UrlHelper::rewrite_relative():
- support invoking specifying owner URL element/attribute - restrict mailto/magnet/tel schemes for A href - allow some data: base64 image types for IMG src Sanitizer::sanitize(): - when checking href and src attributes, pass element tagname and attribute to rewrite_relative()
-rw-r--r--classes/sanitizer.php6
-rw-r--r--classes/urlhelper.php21
2 files changed, 20 insertions, 7 deletions
diff --git a/classes/sanitizer.php b/classes/sanitizer.php
index 07766dc16..0a444a296 100644
--- a/classes/sanitizer.php
+++ b/classes/sanitizer.php
@@ -74,7 +74,7 @@ class Sanitizer {
if ($entry->hasAttribute('href')) {
$entry->setAttribute('href',
- rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
+ UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('href'), $entry->tagName, "href"));
$entry->setAttribute('rel', 'noopener noreferrer');
$entry->setAttribute("target", "_blank");
@@ -82,7 +82,7 @@ class Sanitizer {
if ($entry->hasAttribute('src')) {
$entry->setAttribute('src',
- rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')));
+ UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('src'), $entry->tagName, "src"));
}
if ($entry->nodeName == 'img') {
@@ -94,7 +94,7 @@ class Sanitizer {
$matches = RSSUtils::decode_srcset($entry->getAttribute('srcset'));
for ($i = 0; $i < count($matches); $i++) {
- $matches[$i]["url"] = rewrite_relative_url($rewrite_base_url, $matches[$i]["url"]);
+ $matches[$i]["url"] = UrlHelper::rewrite_relative($rewrite_base_url, $matches[$i]["url"]);
}
$entry->setAttribute("srcset", RSSUtils::encode_srcset($matches));
diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index 648d609a4..b4545939f 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -1,6 +1,6 @@
<?php
class UrlHelper {
- const ALLOWED_RELATIVE_SCHEMES = [
+ const EXTRA_HREF_SCHEMES = [
"magnet",
"mailto",
"tel"
@@ -27,22 +27,35 @@ class UrlHelper {
/**
* Converts a (possibly) relative URL to a absolute one, using provided base URL.
+ * Provides some exceptions for additional schemes like data: if called with owning element/attribute.
*
* @param string $base_url Base URL (i.e. from where the document is)
* @param string $rel_url Possibly relative URL in the document
+ * @param string $owner_element Owner node tag name (i.e. A) (optional)
+ * @param string $owner_attribute Owner attribute (i.e. href) (optional)
*
* @return string Absolute URL
*/
- public static function rewrite_relative($base_url, $rel_url) {
+ public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") {
$rel_parts = parse_url($rel_url);
if (!empty($rel_parts['host']) && !empty($rel_parts['scheme'])) {
return self::validate($rel_url);
+
+ // protocol-relative URL (rare but they exist)
} else if (strpos($rel_url, "//") === 0) {
- # protocol-relative URL (rare but they exist)
return self::validate("https:" . $rel_url);
- } else if (array_search($rel_parts["scheme"] ?? "", self::ALLOWED_RELATIVE_SCHEMES, true) !== false) {
+ // allow some extra schemes for A href
+ } else if (in_array($rel_parts["scheme"] ?? "", self::EXTRA_HREF_SCHEMES) &&
+ $owner_element == "a" &&
+ $owner_attribute == "href") {
+ return $rel_url;
+ // allow limited subset of inline base64-encoded images for IMG elements
+ } else if ($rel_parts["scheme"] == "data" &&
+ preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&
+ $owner_element == "img" &&
+ $owner_attribute == "src") {
return $rel_url;
} else {
$base_parts = parse_url($base_url);