summaryrefslogtreecommitdiff
path: root/functions.php
diff options
context:
space:
mode:
authorChristian Weiske <[email protected]>2010-11-07 15:45:50 +0100
committerAndrew Dolgov <[email protected]>2010-11-08 23:10:22 +0300
commitf0266f51ab36bf389415bcac9fe26c8084c67bdd (patch)
tree4ba5e1e99530cb9b1e6738764eeada1439158d34 /functions.php
parentf2c6c0088bccbfc9b1259559551064b9a3648acf (diff)
add "extractfeedurls" rpc action that extracts the feed URLs from a HTML page
Diffstat (limited to 'functions.php')
-rw-r--r--functions.php53
1 files changed, 46 insertions, 7 deletions
diff --git a/functions.php b/functions.php
index ae37e7d84..2373d5435 100644
--- a/functions.php
+++ b/functions.php
@@ -2943,15 +2943,9 @@
function subscribe_to_feed($link, $url, $cat_id = 0,
$auth_login = '', $auth_pass = '') {
- $url = fix_url($url);
- $parts = parse_url($url);
-
+ $url = fix_url($url);
if (!validate_feed_url($url)) return 2;
- if ($parts['scheme'] == 'feed') $parts['scheme'] = 'http';
-
- $url = make_url_from_parts($parts);
-
if ($cat_id == "0" || !$cat_id) {
$cat_qpart = "NULL";
} else {
@@ -6674,6 +6668,8 @@
/**
* Fixes incomplete URLs by prepending "http://".
+ * Also replaces feed:// with http://, and
+ * prepends a trailing slash if the url is a domain name only.
*
* @param string $url Possibly incomplete URL
*
@@ -6682,6 +6678,14 @@
function fix_url($url) {
if (strpos($url, '://') === false) {
$url = 'http://' . $url;
+ } else if (substr($url, 0, 5) == 'feed:') {
+ $url = 'http:' . substr($url, 5);
+ }
+
+ //prepend slash if the URL has no slash in it
+ // "http://www.example" -> "http://www.example/"
+ if (strpos($url, '/', 7) === false) {
+ $url .= '/';
}
return $url;
}
@@ -6973,4 +6977,39 @@
}
return false;
}
+
+ /**
+ * Extracts RSS/Atom feed URLs from the given HTML URL.
+ *
+ * @param string $url HTML page URL
+ *
+ * @return array Array of feeds. Key is the full URL, value the title
+ */
+ function get_feeds_from_html($url)
+ {
+ $url = fix_url($url);
+ $baseUrl = substr($url, 0, strrpos($url, '/') + 1);
+
+ $doc = new DOMDocument();
+ $doc->loadHTMLFile($url);
+ $xpath = new DOMXPath($doc);
+ $entries = $xpath->query('/html/head/link[@rel="alternate"]');
+ $feedUrls = array();
+ foreach ($entries as $entry) {
+ if ($entry->hasAttribute('href')) {
+ $title = $entry->getAttribute('title');
+ if ($title == '') {
+ $title = $entry->getAttribute('type');
+ }
+ $feedUrl = $entry->getAttribute('href');
+ if (strpos($feedUrl, '://') === false) {
+ //no protocol -> relative URL
+ $feedUrl = $baseUrl . $feedUrl;
+ }
+ $feedUrls[$feedUrl] = $title;
+ }
+ }
+ return $feedUrls;
+ }
+
?>