summaryrefslogtreecommitdiff
path: root/src/Readability.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Readability.php')
-rw-r--r--src/Readability.php14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index ef49763..5d450ba 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -582,7 +582,19 @@ class Readability
*/
public function getPathInfo($url)
{
- $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/';
+ // Check for base URLs
+ if ($this->dom->baseURI !== null) {
+ if (substr($this->dom->baseURI, 0, 1) === '/') {
+ // URLs starting with '/' override completely the URL defined in the link
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . $this->dom->baseURI;
+ } else {
+ // Otherwise just prepend the base to the actual path
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/' . rtrim($this->dom->baseURI, '/') . '/';
+ }
+ } else {
+ $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/';
+ }
+
$scheme = parse_url($pathBase, PHP_URL_SCHEME);
$prePath = $scheme . '://' . parse_url($pathBase, PHP_URL_HOST);