From cd07592c29391ca374f78a75a10465f894d50042 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 17:04:57 +0400 Subject: add basic tinyparser/atom --- classes/feeditem/atom.php | 118 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 classes/feeditem/atom.php (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php new file mode 100644 index 000000000..560484eda --- /dev/null +++ b/classes/feeditem/atom.php @@ -0,0 +1,118 @@ +elem = $elem; + } + + function get_id() { + $id = $this->elem->getElementsByTagName("id")->item(0); + + if ($id) { + return $id->nodeValue; + } else { + return $this->get_link(); + } + } + + function get_date() { + + + } + + function get_link() { + $links = $this->elem->getElementsByTagName("link"); + + foreach ($links as $link) { + if ($link && $link->hasAttribute("href") && !$link->hasAttribute("rel")) { + return $link->getAttribute("href"); + } + } + } + + function get_title() { + $title = $this->elem->getElementsByTagName("title")->item(0); + + if ($title) { + return $title->nodeValue; + } + } + + function get_content() { + $content = $this->elem->getElementsByTagName("content")->item(0); + + if ($content) { + return $content->nodeValue; + } + } + + function get_description() { + $summary = $this->elem->getElementsByTagName("summary")->item(0); + + if ($summary) { + return $summary->nodeValue; + } + } + + // todo + function get_comments_url() { + + } + + // todo + function get_comments_count() { + + } + + function get_categories() { + $categories = $this->elem->getElementsByTagName("category"); + $cats = array(); + + foreach ($categories as $cat) { + if ($cat->hasAttribute("term")) + array_push($cats, $cat->getAttribute("term")); + } + + + return $cats; + } + + function get_enclosures() { + $links = $this->elem->getElementsByTagName("link"); + + $encs = array(); + + foreach ($links as $link) { + if ($link && $link->hasAttribute("href") && $link->hasAttribute("rel")) { + if ($link->getAttribute("rel") == "enclosure") { + $enc = new FeedEnclosure(); + + $enc->type = $link->getAttribute("type"); + $enc->link = $link->getAttribute("href"); + $enc->length = $link->getAttribute("length"); + + array_push($encs, $enc); + } + } + } + + return $encs; + } + + function get_author() { + $author = $this->elem->getElementsByTagName("author")->item(0); + + if ($author) { + $name = $author->getElementsByTagName("name")->item(0); + + if ($name) return $name->nodeValue; + + $email = $author->getElementsByTagName("email")->item(0); + + if ($email) return $email->nodeValue; + + } + } +} +?> -- cgit v1.2.3 From 04d2f9c831b14f7295a3475746b9096402a055f0 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 17:38:16 +0400 Subject: add basic rss support --- classes/feeditem/atom.php | 5 ++- classes/feeditem/rss.php | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 classes/feeditem/rss.php (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 560484eda..0d888c443 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -17,8 +17,11 @@ class FeedItem_Atom { } function get_date() { + $updated = $this->elem->getElementsByTagName("updated")->item(0); - + if ($updated) { + return strtotime($updated->nodeValue); + } } function get_link() { diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php new file mode 100644 index 000000000..e7ea1df57 --- /dev/null +++ b/classes/feeditem/rss.php @@ -0,0 +1,107 @@ +elem = $elem; + } + + function get_id() { + return $this->get_link(); + } + + function get_date() { + $pubDate = $this->elem->getElementsByTagName("pubDate")->item(0); + + if ($pubDate) { + return strtotime($pubDate->nodeValue); + } + } + + function get_link() { + $link = $this->elem->getElementsByTagName("link")->item(0); + + if ($link) { + return $link->nodeValue; + } + } + + function get_title() { + $title = $this->elem->getElementsByTagName("title")->item(0); + + if ($title) { + return $title->nodeValue; + } + } + + function get_content() { + $content = $this->elem->getElementsByTagName("description")->item(0); + + if ($content) { + return $content->nodeValue; + } + } + + function get_description() { + $summary = $this->elem->getElementsByTagName("description")->item(0); + + if ($summary) { + return $summary->nodeValue; + } + } + + // todo + function get_comments_url() { + + } + + // todo + function get_comments_count() { + + } + + function get_categories() { + $categories = $this->elem->getElementsByTagName("category"); + $cats = array(); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } + + return $cats; + } + + function get_enclosures() { + $enclosures = $this->elem->getElementsByTagName("enclosure"); + + $encs = array(); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + + return $encs; + } + + function get_author() { + $author = $this->elem->getElementsByTagName("author")->item(0); + + if ($author) { + $name = $author->getElementsByTagName("name")->item(0); + + if ($name) return $name->nodeValue; + + $email = $author->getElementsByTagName("email")->item(0); + + if ($email) return $email->nodeValue; + + } + } +} +?> -- cgit v1.2.3 From b09a4cdccc0d8c85af5d5a97be32536579b9db33 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 19:12:32 +0400 Subject: feeditem_rss: use guid element --- classes/feeditem/rss.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'classes/feeditem') diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index e7ea1df57..d5667102d 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -7,7 +7,13 @@ class FeedItem_RSS { } function get_id() { - return $this->get_link(); + $id = $this->elem->getElementsByTagName("guid")->item(0); + + if ($id) { + return $id->nodeValue; + } else { + return $this->get_link(); + } } function get_date() { -- cgit v1.2.3 From 4c00e15b5d05b0d137af9a33bf79172bfeb0df24 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 19:40:43 +0400 Subject: pass xpath object to feeditem, support media-rss objects --- classes/feeditem/atom.php | 2 +- classes/feeditem/rss.php | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 0d888c443..7dc4ce5c1 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -2,7 +2,7 @@ class FeedItem_Atom { private $elem; - function __construct($elem) { + function __construct($elem, $doc, $xpath) { $this->elem = $elem; } diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index d5667102d..e5e2a8e56 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -1,9 +1,11 @@ elem = $elem; + $this->xpath = $xpath; } function get_id() { @@ -92,6 +94,20 @@ class FeedItem_RSS { array_push($encs, $enc); } + $enclosures = $this->xpath->query("media:content", $this->elem); + + $encs = array(); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + return $encs; } -- cgit v1.2.3 From d4992d6b48ed458ae3ff39c5cd5ac19244ccd6a8 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 20:55:08 +0400 Subject: add support for dc:subject and slash:comments --- classes/feeditem/atom.php | 24 +++++++++++++++++++++++- classes/feeditem/rss.php | 13 ++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 7dc4ce5c1..d21933897 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -1,9 +1,11 @@ elem = $elem; + $this->xpath = $xpath; } function get_id() { @@ -63,9 +65,12 @@ class FeedItem_Atom { } - // todo function get_comments_count() { + $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); + if ($comments) { + return $comments->nodeValue; + } } function get_categories() { @@ -77,6 +82,11 @@ class FeedItem_Atom { array_push($cats, $cat->getAttribute("term")); } + $categories = $this->xpath->query("dc:subject", $this->elem); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } return $cats; } @@ -100,6 +110,18 @@ class FeedItem_Atom { } } + $enclosures = $this->xpath->query("media:content", $this->elem); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = $enclosure->getAttribute("type"); + $enc->link = $enclosure->getAttribute("url"); + $enc->length = $enclosure->getAttribute("length"); + + array_push($encs, $enc); + } + return $encs; } diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index e5e2a8e56..7a3b0d342 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -63,9 +63,12 @@ class FeedItem_RSS { } - // todo function get_comments_count() { + $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); + if ($comments) { + return $comments->nodeValue; + } } function get_categories() { @@ -76,6 +79,12 @@ class FeedItem_RSS { array_push($cats, $cat->nodeValue); } + $categories = $this->xpath->query("dc:subject", $this->elem); + + foreach ($categories as $cat) { + array_push($cats, $cat->nodeValue); + } + return $cats; } @@ -96,8 +105,6 @@ class FeedItem_RSS { $enclosures = $this->xpath->query("media:content", $this->elem); - $encs = array(); - foreach ($enclosures as $enclosure) { $enc = new FeedEnclosure(); -- cgit v1.2.3 From f11015058d6e313d3cabc3d3edc0230a5b6b4c26 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 21:01:30 +0400 Subject: support dc:creator --- classes/feeditem/atom.php | 7 +++++++ classes/feeditem/rss.php | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index d21933897..1a9db9020 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -138,6 +138,13 @@ class FeedItem_Atom { if ($email) return $email->nodeValue; } + + $author = $this->xpath->query("dc:creator", $this->elem)->item(0); + + if ($author) { + return $author->nodeValue; + } + } } ?> diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 7a3b0d342..9e54a9c09 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -129,8 +129,14 @@ class FeedItem_RSS { $email = $author->getElementsByTagName("email")->item(0); if ($email) return $email->nodeValue; + } + + $author = $this->xpath->query("dc:creator", $this->elem)->item(0); + if ($author) { + return $author->nodeValue; } + } } ?> -- cgit v1.2.3 From b4d1690097946bdf89dc88f218e7790126f4fd3b Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 21:06:48 +0400 Subject: move common methods to feeditem_common --- classes/feeditem/atom.php | 44 +---------------------------------------- classes/feeditem/common.php | 48 +++++++++++++++++++++++++++++++++++++++++++++ classes/feeditem/rss.php | 43 +--------------------------------------- 3 files changed, 50 insertions(+), 85 deletions(-) create mode 100644 classes/feeditem/common.php (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index 1a9db9020..df1c31d99 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -1,13 +1,5 @@ elem = $elem; - $this->xpath = $xpath; - } - +class FeedItem_Atom extends FeedItem_Common { function get_id() { $id = $this->elem->getElementsByTagName("id")->item(0); @@ -60,19 +52,6 @@ class FeedItem_Atom { } } - // todo - function get_comments_url() { - - } - - function get_comments_count() { - $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); - - if ($comments) { - return $comments->nodeValue; - } - } - function get_categories() { $categories = $this->elem->getElementsByTagName("category"); $cats = array(); @@ -125,26 +104,5 @@ class FeedItem_Atom { return $encs; } - function get_author() { - $author = $this->elem->getElementsByTagName("author")->item(0); - - if ($author) { - $name = $author->getElementsByTagName("name")->item(0); - - if ($name) return $name->nodeValue; - - $email = $author->getElementsByTagName("email")->item(0); - - if ($email) return $email->nodeValue; - - } - - $author = $this->xpath->query("dc:creator", $this->elem)->item(0); - - if ($author) { - return $author->nodeValue; - } - - } } ?> diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php new file mode 100644 index 000000000..fca9433a4 --- /dev/null +++ b/classes/feeditem/common.php @@ -0,0 +1,48 @@ +elem = $elem; + $this->xpath = $xpath; + $this->doc = $doc; + } + + function get_author() { + $author = $this->elem->getElementsByTagName("author")->item(0); + + if ($author) { + $name = $author->getElementsByTagName("name")->item(0); + + if ($name) return $name->nodeValue; + + $email = $author->getElementsByTagName("email")->item(0); + + if ($email) return $email->nodeValue; + } + + $author = $this->xpath->query("dc:creator", $this->elem)->item(0); + + if ($author) { + return $author->nodeValue; + } + } + + // todo + function get_comments_url() { + + } + + function get_comments_count() { + $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); + + if ($comments) { + return $comments->nodeValue; + } + } + + +} +?> diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 9e54a9c09..29c8cb6b4 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -1,13 +1,5 @@ elem = $elem; - $this->xpath = $xpath; - } - +class FeedItem_RSS extends FeedItem_Common { function get_id() { $id = $this->elem->getElementsByTagName("guid")->item(0); @@ -58,19 +50,6 @@ class FeedItem_RSS { } } - // todo - function get_comments_url() { - - } - - function get_comments_count() { - $comments = $this->xpath->query("slash:comments", $this->elem)->item(0); - - if ($comments) { - return $comments->nodeValue; - } - } - function get_categories() { $categories = $this->elem->getElementsByTagName("category"); $cats = array(); @@ -118,25 +97,5 @@ class FeedItem_RSS { return $encs; } - function get_author() { - $author = $this->elem->getElementsByTagName("author")->item(0); - - if ($author) { - $name = $author->getElementsByTagName("name")->item(0); - - if ($name) return $name->nodeValue; - - $email = $author->getElementsByTagName("email")->item(0); - - if ($email) return $email->nodeValue; - } - - $author = $this->xpath->query("dc:creator", $this->elem)->item(0); - - if ($author) { - return $author->nodeValue; - } - - } } ?> -- cgit v1.2.3 From 8a95d630a9183bf2c7b79f9e1f015eec328d8804 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 1 May 2013 22:05:59 +0400 Subject: fix rss content:encoded not used --- classes/feeditem/rss.php | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'classes/feeditem') diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 29c8cb6b4..5b43d0e8c 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -40,6 +40,13 @@ class FeedItem_RSS extends FeedItem_Common { if ($content) { return $content->nodeValue; } + + $content = $this->xpath->query("content:encoded", $this->elem)->item(0); + + if ($content) { + return $content->nodeValue; + } + } function get_description() { -- cgit v1.2.3 From 99b82567944a96afb70ab7af1f0bd344ac3ab95a Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 2 May 2013 10:30:41 +0400 Subject: feedparser: make content:encoded take precedence over description --- classes/feeditem/rss.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'classes/feeditem') diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php index 5b43d0e8c..2f363b4f6 100644 --- a/classes/feeditem/rss.php +++ b/classes/feeditem/rss.php @@ -35,18 +35,17 @@ class FeedItem_RSS extends FeedItem_Common { } function get_content() { - $content = $this->elem->getElementsByTagName("description")->item(0); + $content = $this->xpath->query("content:encoded", $this->elem)->item(0); if ($content) { return $content->nodeValue; } - $content = $this->xpath->query("content:encoded", $this->elem)->item(0); + $content = $this->elem->getElementsByTagName("description")->item(0); if ($content) { return $content->nodeValue; } - } function get_description() { -- cgit v1.2.3 From 602fe53496fcc5c95983bebf0d789d01cdf3e5aa Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 2 May 2013 10:36:05 +0400 Subject: fix atom value elements --- classes/feeditem/common.php | 3 +++ 1 file changed, 3 insertions(+) (limited to 'classes/feeditem') diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php index fca9433a4..0787a42cb 100644 --- a/classes/feeditem/common.php +++ b/classes/feeditem/common.php @@ -21,6 +21,9 @@ abstract class FeedItem_Common extends FeedItem { $email = $author->getElementsByTagName("email")->item(0); if ($email) return $email->nodeValue; + + if ($author->nodeValue) + return $author->nodeValue; } $author = $this->xpath->query("dc:creator", $this->elem)->item(0); -- cgit v1.2.3 From 72c29b65d4494f6df83c558e3bd281ff440e20af Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 2 May 2013 10:40:59 +0400 Subject: atom: fix rel=alternate links --- classes/feeditem/atom.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'classes/feeditem') diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php index df1c31d99..b981dc319 100644 --- a/classes/feeditem/atom.php +++ b/classes/feeditem/atom.php @@ -22,7 +22,8 @@ class FeedItem_Atom extends FeedItem_Common { $links = $this->elem->getElementsByTagName("link"); foreach ($links as $link) { - if ($link && $link->hasAttribute("href") && !$link->hasAttribute("rel")) { + if ($link && $link->hasAttribute("href") && (!$link->hasAttribute("rel") + || $link->getAttribute("rel") == "alternate")) { return $link->getAttribute("href"); } } -- cgit v1.2.3