summaryrefslogtreecommitdiff
path: root/classes/urlhelper.php
diff options
context:
space:
mode:
Diffstat (limited to 'classes/urlhelper.php')
-rw-r--r--classes/urlhelper.php105
1 files changed, 85 insertions, 20 deletions
diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index b2c1331b6..2dfb22a5d 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -6,16 +6,39 @@ class UrlHelper {
"tel"
];
+ const EXTRA_SCHEMES_BY_CONTENT_TYPE = [
+ "application/x-bittorrent" => [ "magnet" ],
+ ];
+
+ // TODO: class properties can be switched to PHP typing if/when the minimum PHP_VERSION is raised to 7.4.0+
+ /** @var string */
static $fetch_last_error;
+
+ /** @var int */
static $fetch_last_error_code;
+
+ /** @var string */
static $fetch_last_error_content;
+
+ /** @var string */
static $fetch_last_content_type;
+
+ /** @var string */
static $fetch_last_modified;
+
+ /** @var string */
static $fetch_effective_url;
+
+ /** @var string */
static $fetch_effective_ip_addr;
+
+ /** @var bool */
static $fetch_curl_used;
- static function build_url($parts) {
+ /**
+ * @param array<string, string|int> $parts
+ */
+ static function build_url(array $parts): string {
$tmp = $parts['scheme'] . "://" . $parts['host'];
if (isset($parts['path'])) $tmp .= $parts['path'];
@@ -33,13 +56,29 @@ class UrlHelper {
* @param string $rel_url Possibly relative URL in the document
* @param string $owner_element Owner element tag name (i.e. "a") (optional)
* @param string $owner_attribute Owner attribute (i.e. "href") (optional)
+ * @param string $content_type URL content type as specified by enclosures, etc.
*
- * @return string Absolute URL
+ * @return false|string Absolute URL or false on failure (either during URL parsing or validation)
*/
- public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") {
+ public static function rewrite_relative($base_url,
+ $rel_url,
+ string $owner_element = "",
+ string $owner_attribute = "",
+ string $content_type = "") {
$rel_parts = parse_url($rel_url);
+ /**
+ * If parse_url failed to parse $rel_url return false to match the current "invalid thing" behavior
+ * of UrlHelper::validate().
+ *
+ * TODO: There are many places where a string return value is assumed. We should either update those
+ * to account for the possibility of failure, or look into updating this function's return values.
+ */
+ if ($rel_parts === false) {
+ return false;
+ }
+
if (!empty($rel_parts['host']) && !empty($rel_parts['scheme'])) {
return self::validate($rel_url);
@@ -51,8 +90,13 @@ class UrlHelper {
$owner_element == "a" &&
$owner_attribute == "href") {
return $rel_url;
+ // allow some extra schemes for links with feed-specified content type i.e. enclosures
+ } else if ($content_type &&
+ isset(self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type]) &&
+ in_array($rel_parts["scheme"], self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type])) {
+ return $rel_url;
// allow limited subset of inline base64-encoded images for IMG elements
- } else if ($rel_parts["scheme"] ?? "" == "data" &&
+ } else if (($rel_parts["scheme"] ?? "") == "data" &&
preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&
$owner_element == "img" &&
$owner_attribute == "src") {
@@ -60,8 +104,8 @@ class UrlHelper {
} else {
$base_parts = parse_url($base_url);
- $rel_parts['host'] = $base_parts['host'];
- $rel_parts['scheme'] = $base_parts['scheme'];
+ $rel_parts['host'] = $base_parts['host'] ?? "";
+ $rel_parts['scheme'] = $base_parts['scheme'] ?? "";
if (isset($rel_parts['path'])) {
@@ -80,8 +124,10 @@ class UrlHelper {
}
}
- // extended filtering involves validation for safe ports and loopback
- static function validate($url, $extended_filtering = false) {
+ /** extended filtering involves validation for safe ports and loopback
+ * @return false|string false if something went wrong, otherwise the URL string
+ */
+ static function validate(string $url, bool $extended_filtering = false) {
$url = clean($url);
@@ -107,6 +153,11 @@ class UrlHelper {
} else {
$tokens['host'] = idn_to_ascii($tokens['host']);
}
+
+ // if `idn_to_ascii` failed
+ if ($tokens['host'] === false) {
+ return false;
+ }
}
}
@@ -138,7 +189,10 @@ class UrlHelper {
return $url;
}
- static function resolve_redirects($url, $timeout, $nest = 0) {
+ /**
+ * @return false|string
+ */
+ static function resolve_redirects(string $url, int $timeout, int $nest = 0) {
// too many redirects
if ($nest > 10)
@@ -162,8 +216,12 @@ class UrlHelper {
$context = stream_context_create($context_options);
+ // PHP 8 changed the second param from int to bool, but we still support PHP >= 7.1.0
+ // @phpstan-ignore-next-line
$headers = get_headers($url, 0, $context);
} else {
+ // PHP 8 changed the second param from int to bool, but we still support PHP >= 7.1.0
+ // @phpstan-ignore-next-line
$headers = get_headers($url, 0);
}
@@ -185,12 +243,16 @@ class UrlHelper {
return false;
}
- // TODO: max_size currently only works for CURL transfers
+ /**
+ * @param array<string, bool|int|string>|string $options
+ * @return false|string false if something went wrong, otherwise string contents
+ */
+ // TODO: max_size currently only works for CURL transfers
// TODO: multiple-argument way is deprecated, first parameter is a hash now
public static function fetch($options /* previously: 0: $url , 1: $type = false, 2: $login = false, 3: $pass = false,
4: $post_query = false, 5: $timeout = false, 6: $timestamp = 0, 7: $useragent = false*/) {
- self::$fetch_last_error = false;
+ self::$fetch_last_error = "";
self::$fetch_last_error_code = -1;
self::$fetch_last_error_content = "";
self::$fetch_last_content_type = "";
@@ -239,6 +301,8 @@ class UrlHelper {
$url = ltrim($url, ' ');
$url = str_replace(' ', '%20', $url);
+ Debug::log("[UrlHelper] fetching: $url", Debug::LOG_EXTENDED);
+
$url = self::validate($url, true);
if (!$url) {
@@ -275,15 +339,15 @@ class UrlHelper {
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : Config::get(Config::FILE_FETCH_CONNECT_TIMEOUT));
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout ? $timeout : Config::get(Config::FILE_FETCH_TIMEOUT));
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, !ini_get("open_basedir") && $followlocation);
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $followlocation);
curl_setopt($ch, CURLOPT_MAXREDIRS, 20);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
- curl_setopt($ch, CURLOPT_USERAGENT, $useragent ? $useragent :
- SELF_USER_AGENT);
+ curl_setopt($ch, CURLOPT_USERAGENT, $useragent ? $useragent : Config::get_user_agent());
curl_setopt($ch, CURLOPT_ENCODING, "");
+ curl_setopt($ch, CURLOPT_COOKIEJAR, "/dev/null");
if ($http_referrer)
curl_setopt($ch, CURLOPT_REFERER, $http_referrer);
@@ -298,7 +362,7 @@ class UrlHelper {
//Debug::log("[curl progressfunction] $downloaded $max_size", Debug::$LOG_EXTENDED);
if ($downloaded > $max_size) {
- Debug::log("curl: reached max size of $max_size bytes requesting $url, aborting.", Debug::LOG_VERBOSE);
+ Debug::log("[UrlHelper] fetch error: curl reached max size of $max_size bytes downloading $url, aborting.", Debug::LOG_VERBOSE);
return 1;
}
@@ -307,10 +371,6 @@ class UrlHelper {
}
- if (!ini_get("open_basedir")) {
- curl_setopt($ch, CURLOPT_COOKIEJAR, "/dev/null");
- }
-
if (Config::get(Config::HTTP_PROXY)) {
curl_setopt($ch, CURLOPT_PROXY, Config::get(Config::HTTP_PROXY));
}
@@ -374,6 +434,8 @@ class UrlHelper {
if (curl_errno($ch) != 0) {
self::$fetch_last_error .= "; " . curl_errno($ch) . " " . curl_error($ch);
+ } else {
+ self::$fetch_last_error = "HTTP Code: $http_code ";
}
self::$fetch_last_error_content = $contents;
@@ -510,7 +572,10 @@ class UrlHelper {
}
}
- public static function url_to_youtube_vid($url) {
+ /**
+ * @return false|string false if the provided URL didn't match expected patterns, otherwise the video ID string
+ */
+ public static function url_to_youtube_vid(string $url) {
$url = str_replace("youtube.com", "youtube-nocookie.com", $url);
$regexps = [