summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2019-06-20 07:51:48 +0300
committerAndrew Dolgov <[email protected]>2019-06-20 07:51:48 +0300
commit6d746453c75e2c482458f687585ef436a28a9888 (patch)
tree4e56e86c2751856fd635c86c69bac1e124beb687
parent270b39a33738ae45f1cab2d7a7dd518af10221ca (diff)
get_feeds_from_html: remove XML preamble hack
move several related helper functions to Feeds class
-rwxr-xr-xclasses/feeds.php91
-rwxr-xr-xclasses/handler/public.php12
-rwxr-xr-xclasses/pref/feeds.php2
-rw-r--r--include/functions.php98
4 files changed, 88 insertions, 115 deletions
diff --git a/classes/feeds.php b/classes/feeds.php
index 92bbd113b..2714f4596 100755
--- a/classes/feeds.php
+++ b/classes/feeds.php
@@ -1135,9 +1135,9 @@ class Feeds extends Handler_Protected {
$pdo = Db::pdo();
- $url = fix_url($url);
+ $url = Feeds::fix_url($url);
- if (!$url || !validate_feed_url($url)) return array("code" => 2);
+ if (!$url || !Feeds::validate_feed_url($url)) return array("code" => 2);
$contents = @fetch_file_contents($url, false, $auth_login, $auth_pass);
@@ -1153,8 +1153,8 @@ class Feeds extends Handler_Protected {
return array("code" => 5, "message" => $fetch_last_error);
}
- if (mb_strpos($fetch_last_content_type, "html") !== FALSE && is_html($contents)) {
- $feedUrls = get_feeds_from_html($url, $contents);
+ if (mb_strpos($fetch_last_content_type, "html") !== FALSE && Feeds::is_html($contents)) {
+ $feedUrls = Feeds::get_feeds_from_html($url, $contents);
if (count($feedUrls) == 0) {
return array("code" => 3);
@@ -1923,5 +1923,88 @@ class Feeds extends Handler_Protected {
return $colormap[$sum];
}
+ static function get_feeds_from_html($url, $content) {
+ $url = Feeds::fix_url($url);
+ $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
+
+ libxml_use_internal_errors(true);
+ $feedUrls = [];
+
+ $doc = new DOMDocument();
+ if ($doc->loadHTML($content)) {
+ $xpath = new DOMXPath($doc);
+ $entries = $xpath->query('/html/head/link[@rel="alternate" and '.
+ '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]');
+
+ foreach ($entries as $entry) {
+ if ($entry->hasAttribute('href')) {
+ $title = $entry->getAttribute('title');
+ if ($title == '') {
+ $title = $entry->getAttribute('type');
+ }
+ $feedUrl = rewrite_relative_url(
+ $baseUrl, $entry->getAttribute('href')
+ );
+ $feedUrls[$feedUrl] = $title;
+ }
+ }
+ }
+ return $feedUrls;
+ }
+
+ static function is_html($content) {
+ return preg_match("/<html|DOCTYPE html/i", substr($content, 0, 8192)) !== 0;
+ }
+
+ static function validate_feed_url($url) {
+ $parts = parse_url($url);
+
+ return ($parts['scheme'] == 'http' || $parts['scheme'] == 'feed' || $parts['scheme'] == 'https');
+ }
+
+ /**
+ * Fixes incomplete URLs by prepending "http://".
+ * Also replaces feed:// with http://, and
+ * prepends a trailing slash if the url is a domain name only.
+ *
+ * @param string $url Possibly incomplete URL
+ *
+ * @return string Fixed URL.
+ */
+ static function fix_url($url) {
+
+ // support schema-less urls
+ if (strpos($url, '//') === 0) {
+ $url = 'https:' . $url;
+ }
+
+ if (strpos($url, '://') === false) {
+ $url = 'http://' . $url;
+ } else if (substr($url, 0, 5) == 'feed:') {
+ $url = 'http:' . substr($url, 5);
+ }
+
+ //prepend slash if the URL has no slash in it
+ // "http://www.example" -> "http://www.example/"
+ if (strpos($url, '/', strpos($url, ':') + 3) === false) {
+ $url .= '/';
+ }
+
+ //convert IDNA hostname to punycode if possible
+ if (function_exists("idn_to_ascii")) {
+ $parts = parse_url($url);
+ if (mb_detect_encoding($parts['host']) != 'ASCII')
+ {
+ $parts['host'] = idn_to_ascii($parts['host']);
+ $url = build_url($parts);
+ }
+ }
+
+ if ($url != "http:///")
+ return $url;
+ else
+ return '';
+ }
+
}
diff --git a/classes/handler/public.php b/classes/handler/public.php
index 318cecd72..e9a3abc53 100755
--- a/classes/handler/public.php
+++ b/classes/handler/public.php
@@ -728,18 +728,6 @@ class Handler_Public extends Handler {
}
}
- /* function subtest() {
- header("Content-type: text/plain; charset=utf-8");
-
- $url = clean($_REQUEST["url"]);
-
- print "$url\n\n";
-
-
- print_r(get_feeds_from_html($url, fetch_file_contents($url)));
-
- } */
-
function subscribe() {
if (SINGLE_USER_MODE) {
login_sequence();
diff --git a/classes/pref/feeds.php b/classes/pref/feeds.php
index 6cbf15a58..f94f45430 100755
--- a/classes/pref/feeds.php
+++ b/classes/pref/feeds.php
@@ -1708,7 +1708,7 @@ class Pref_Feeds extends Handler_Protected {
foreach ($feeds as $feed) {
$feed = trim($feed);
- if (validate_feed_url($feed)) {
+ if (Feeds::validate_feed_url($feed)) {
$this->pdo->beginTransaction();
diff --git a/include/functions.php b/include/functions.php
index 6dc9990e8..fe4ca1ecb 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -2017,68 +2017,6 @@
return false;
}
- /**
- * Fixes incomplete URLs by prepending "http://".
- * Also replaces feed:// with http://, and
- * prepends a trailing slash if the url is a domain name only.
- *
- * @param string $url Possibly incomplete URL
- *
- * @return string Fixed URL.
- */
- function fix_url($url) {
-
- // support schema-less urls
- if (strpos($url, '//') === 0) {
- $url = 'https:' . $url;
- }
-
- if (strpos($url, '://') === false) {
- $url = 'http://' . $url;
- } else if (substr($url, 0, 5) == 'feed:') {
- $url = 'http:' . substr($url, 5);
- }
-
- //prepend slash if the URL has no slash in it
- // "http://www.example" -> "http://www.example/"
- if (strpos($url, '/', strpos($url, ':') + 3) === false) {
- $url .= '/';
- }
-
- //convert IDNA hostname to punycode if possible
- if (function_exists("idn_to_ascii")) {
- $parts = parse_url($url);
- if (mb_detect_encoding($parts['host']) != 'ASCII')
- {
- $parts['host'] = idn_to_ascii($parts['host']);
- $url = build_url($parts);
- }
- }
-
- if ($url != "http:///")
- return $url;
- else
- return '';
- }
-
- function validate_feed_url($url) {
- $parts = parse_url($url);
-
- return ($parts['scheme'] == 'http' || $parts['scheme'] == 'feed' || $parts['scheme'] == 'https');
-
- }
-
- /* function save_email_address($email) {
- // FIXME: implement persistent storage of emails
-
- if (!$_SESSION['stored_emails'])
- $_SESSION['stored_emails'] = array();
-
- if (!in_array($email, $_SESSION['stored_emails']))
- array_push($_SESSION['stored_emails'], $email);
- } */
-
-
function get_feed_access_key($feed_id, $is_cat, $owner_uid = false) {
if (!$owner_uid) $owner_uid = $_SESSION["uid"];
@@ -2107,42 +2045,6 @@
}
}
- function get_feeds_from_html($url, $content)
- {
- $url = fix_url($url);
- $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
-
- libxml_use_internal_errors(true);
-
- $doc = new DOMDocument();
- $doc->loadHTML('<?xml encoding="UTF-8">' . $content);
- $xpath = new DOMXPath($doc);
- $entries = $xpath->query('/html/head/link[@rel="alternate" and '.
- '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]');
- $feedUrls = array();
- foreach ($entries as $entry) {
- if ($entry->hasAttribute('href')) {
- $title = $entry->getAttribute('title');
- if ($title == '') {
- $title = $entry->getAttribute('type');
- }
- $feedUrl = rewrite_relative_url(
- $baseUrl, $entry->getAttribute('href')
- );
- $feedUrls[$feedUrl] = $title;
- }
- }
- return $feedUrls;
- }
-
- function is_html($content) {
- return preg_match("/<html|DOCTYPE html/i", substr($content, 0, 8192)) !== 0;
- }
-
- function url_is_html($url, $login = false, $pass = false) {
- return is_html(fetch_file_contents($url, false, $login, $pass));
- }
-
function build_url($parts) {
return $parts['scheme'] . "://" . $parts['host'] . $parts['path'];
}