summaryrefslogtreecommitdiff
path: root/classes/feeds.php
diff options
context:
space:
mode:
Diffstat (limited to 'classes/feeds.php')
-rwxr-xr-xclasses/feeds.php91
1 files changed, 87 insertions, 4 deletions
diff --git a/classes/feeds.php b/classes/feeds.php
index 92bbd113b..2714f4596 100755
--- a/classes/feeds.php
+++ b/classes/feeds.php
@@ -1135,9 +1135,9 @@ class Feeds extends Handler_Protected {
$pdo = Db::pdo();
- $url = fix_url($url);
+ $url = Feeds::fix_url($url);
- if (!$url || !validate_feed_url($url)) return array("code" => 2);
+ if (!$url || !Feeds::validate_feed_url($url)) return array("code" => 2);
$contents = @fetch_file_contents($url, false, $auth_login, $auth_pass);
@@ -1153,8 +1153,8 @@ class Feeds extends Handler_Protected {
return array("code" => 5, "message" => $fetch_last_error);
}
- if (mb_strpos($fetch_last_content_type, "html") !== FALSE && is_html($contents)) {
- $feedUrls = get_feeds_from_html($url, $contents);
+ if (mb_strpos($fetch_last_content_type, "html") !== FALSE && Feeds::is_html($contents)) {
+ $feedUrls = Feeds::get_feeds_from_html($url, $contents);
if (count($feedUrls) == 0) {
return array("code" => 3);
@@ -1923,5 +1923,88 @@ class Feeds extends Handler_Protected {
return $colormap[$sum];
}
+ static function get_feeds_from_html($url, $content) {
+ $url = Feeds::fix_url($url);
+ $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
+
+ libxml_use_internal_errors(true);
+ $feedUrls = [];
+
+ $doc = new DOMDocument();
+ if ($doc->loadHTML($content)) {
+ $xpath = new DOMXPath($doc);
+ $entries = $xpath->query('/html/head/link[@rel="alternate" and '.
+ '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]');
+
+ foreach ($entries as $entry) {
+ if ($entry->hasAttribute('href')) {
+ $title = $entry->getAttribute('title');
+ if ($title == '') {
+ $title = $entry->getAttribute('type');
+ }
+ $feedUrl = rewrite_relative_url(
+ $baseUrl, $entry->getAttribute('href')
+ );
+ $feedUrls[$feedUrl] = $title;
+ }
+ }
+ }
+ return $feedUrls;
+ }
+
+ static function is_html($content) {
+ return preg_match("/<html|DOCTYPE html/i", substr($content, 0, 8192)) !== 0;
+ }
+
+ static function validate_feed_url($url) {
+ $parts = parse_url($url);
+
+ return ($parts['scheme'] == 'http' || $parts['scheme'] == 'feed' || $parts['scheme'] == 'https');
+ }
+
+ /**
+ * Fixes incomplete URLs by prepending "http://".
+ * Also replaces feed:// with http://, and
+ * prepends a trailing slash if the url is a domain name only.
+ *
+ * @param string $url Possibly incomplete URL
+ *
+ * @return string Fixed URL.
+ */
+ static function fix_url($url) {
+
+ // support schema-less urls
+ if (strpos($url, '//') === 0) {
+ $url = 'https:' . $url;
+ }
+
+ if (strpos($url, '://') === false) {
+ $url = 'http://' . $url;
+ } else if (substr($url, 0, 5) == 'feed:') {
+ $url = 'http:' . substr($url, 5);
+ }
+
+ //prepend slash if the URL has no slash in it
+ // "http://www.example" -> "http://www.example/"
+ if (strpos($url, '/', strpos($url, ':') + 3) === false) {
+ $url .= '/';
+ }
+
+ //convert IDNA hostname to punycode if possible
+ if (function_exists("idn_to_ascii")) {
+ $parts = parse_url($url);
+ if (mb_detect_encoding($parts['host']) != 'ASCII')
+ {
+ $parts['host'] = idn_to_ascii($parts['host']);
+ $url = build_url($parts);
+ }
+ }
+
+ if ($url != "http:///")
+ return $url;
+ else
+ return '';
+ }
+
}