From d99c499c9c8646e0d40463b48f338541113210d6 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 18 Mar 2018 20:33:23 +0000 Subject: Check for base urls before generating paths for the URL resolver --- src/Readability.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Readability.php b/src/Readability.php index ef49763..5d450ba 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -582,7 +582,19 @@ class Readability */ public function getPathInfo($url) { - $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/'; + // Check for base URLs + if ($this->dom->baseURI !== null) { + if (substr($this->dom->baseURI, 0, 1) === '/') { + // URLs starting with '/' override completely the URL defined in the link + $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . $this->dom->baseURI; + } else { + // Otherwise just prepend the base to the actual path + $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/' . rtrim($this->dom->baseURI, '/') . '/'; + } + } else { + $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/'; + } + $scheme = parse_url($pathBase, PHP_URL_SCHEME); $prePath = $scheme . '://' . parse_url($pathBase, PHP_URL_HOST); -- cgit v1.2.3