From 6d746453c75e2c482458f687585ef436a28a9888 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 20 Jun 2019 07:51:48 +0300 Subject: get_feeds_from_html: remove XML preamble hack move several related helper functions to Feeds class --- classes/feeds.php | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 4 deletions(-) (limited to 'classes/feeds.php') diff --git a/classes/feeds.php b/classes/feeds.php index 92bbd113b..2714f4596 100755 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -1135,9 +1135,9 @@ class Feeds extends Handler_Protected { $pdo = Db::pdo(); - $url = fix_url($url); + $url = Feeds::fix_url($url); - if (!$url || !validate_feed_url($url)) return array("code" => 2); + if (!$url || !Feeds::validate_feed_url($url)) return array("code" => 2); $contents = @fetch_file_contents($url, false, $auth_login, $auth_pass); @@ -1153,8 +1153,8 @@ class Feeds extends Handler_Protected { return array("code" => 5, "message" => $fetch_last_error); } - if (mb_strpos($fetch_last_content_type, "html") !== FALSE && is_html($contents)) { - $feedUrls = get_feeds_from_html($url, $contents); + if (mb_strpos($fetch_last_content_type, "html") !== FALSE && Feeds::is_html($contents)) { + $feedUrls = Feeds::get_feeds_from_html($url, $contents); if (count($feedUrls) == 0) { return array("code" => 3); @@ -1923,5 +1923,88 @@ class Feeds extends Handler_Protected { return $colormap[$sum]; } + static function get_feeds_from_html($url, $content) { + $url = Feeds::fix_url($url); + $baseUrl = substr($url, 0, strrpos($url, '/') + 1); + + libxml_use_internal_errors(true); + $feedUrls = []; + + $doc = new DOMDocument(); + if ($doc->loadHTML($content)) { + $xpath = new DOMXPath($doc); + $entries = $xpath->query('/html/head/link[@rel="alternate" and '. + '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]'); + + foreach ($entries as $entry) { + if ($entry->hasAttribute('href')) { + $title = $entry->getAttribute('title'); + if ($title == '') { + $title = $entry->getAttribute('type'); + } + $feedUrl = rewrite_relative_url( + $baseUrl, $entry->getAttribute('href') + ); + $feedUrls[$feedUrl] = $title; + } + } + } + return $feedUrls; + } + + static function is_html($content) { + return preg_match("/ "http://www.example/" + if (strpos($url, '/', strpos($url, ':') + 3) === false) { + $url .= '/'; + } + + //convert IDNA hostname to punycode if possible + if (function_exists("idn_to_ascii")) { + $parts = parse_url($url); + if (mb_detect_encoding($parts['host']) != 'ASCII') + { + $parts['host'] = idn_to_ascii($parts['host']); + $url = build_url($parts); + } + } + + if ($url != "http:///") + return $url; + else + return ''; + } + } -- cgit v1.2.3