summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2018-03-18 20:33:23 +0000
committerAndres Rey <[email protected]>2018-03-18 20:33:23 +0000
commitd99c499c9c8646e0d40463b48f338541113210d6 (patch)
treef5c147862345ea357940094a0689aa17d09e026a /src
parent4eed776f901446ba8edb54ff9babda2cd52706d4 (diff)
Check for base urls before generating paths for the URL resolver
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index ef49763..5d450ba 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -582,7 +582,19 @@ class Readability
*/
public function getPathInfo($url)
{
- $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/';
+ // Check for base URLs
+ if ($this->dom->baseURI !== null) {
+ if (substr($this->dom->baseURI, 0, 1) === '/') {
+ // URLs starting with '/' override completely the URL defined in the link
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . $this->dom->baseURI;
+ } else {
+ // Otherwise just prepend the base to the actual path
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/' . rtrim($this->dom->baseURI, '/') . '/';
+ }
+ } else {
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/';
+ }
+
$scheme = parse_url($pathBase, PHP_URL_SCHEME);
$prePath = $scheme . '://' . parse_url($pathBase, PHP_URL_HOST);