summaryrefslogtreecommitdiff
path: root/classes/urlhelper.php
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2020-09-28 19:46:31 +0300
committerAndrew Dolgov <[email protected]>2020-09-28 19:46:31 +0300
commitc70e26db31d520c554b867325ace95cbee6687e3 (patch)
tree606dae711fe6328f6511e40de73ca3466acd29b0 /classes/urlhelper.php
parent7c8bed05243156a4dc6290c6ac411401d773a03a (diff)
validate url: feed urlencoded() URL to filter_var() only
Diffstat (limited to 'classes/urlhelper.php')
-rw-r--r--classes/urlhelper.php21
1 files changed, 13 insertions, 8 deletions
diff --git a/classes/urlhelper.php b/classes/urlhelper.php
index 461d5fb7e..d7b7d004a 100644
--- a/classes/urlhelper.php
+++ b/classes/urlhelper.php
@@ -64,13 +64,6 @@ class UrlHelper {
if (!in_array(strtolower($tokens['scheme']), ['http', 'https']))
return false;
- if ($tokens['path']) {
- $tokens['path'] = implode("/",
- array_map("rawurlencode",
- array_map("rawurldecode",
- explode("/", $tokens['path']))));
- }
-
//convert IDNA hostname to punycode if possible
if (function_exists("idn_to_ascii")) {
if (mb_detect_encoding($tokens['host']) != 'ASCII') {
@@ -78,9 +71,21 @@ class UrlHelper {
}
}
+ // separate set of tokens with urlencoded 'path' because filter_var() rightfully fails on non-latin characters
+ // (used for validation only, we actually request the original URL, in case of urlencode breaking it)
+ $tokens_filter_var = $tokens;
+
+ if ($tokens['path']) {
+ $tokens_filter_var['path'] = implode("/",
+ array_map("rawurlencode",
+ array_map("rawurldecode",
+ explode("/", $tokens['path']))));
+ }
+
$url = self::build_url($tokens);
+ $url_filter_var = self::build_url($tokens_filter_var);
- if (filter_var($url, FILTER_VALIDATE_URL) === false)
+ if (filter_var($url_filter_var, FILTER_VALIDATE_URL) === false)
return false;
if ($extended_filtering) {