summaryrefslogtreecommitdiff
path: root/classes/feeditem
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2023-10-25 12:55:09 +0300
committerAndrew Dolgov <[email protected]>2023-10-25 12:55:09 +0300
commit865ecc87963dc3b26e66296616eef2a1cc41ac3f (patch)
treebf2ecd8a391103bdb2c8b70cd33c47467310754b /classes/feeditem
parent0a5507d3bd79d04c860455664f919bf8e7274fda (diff)
move to psr-4 autoloader
Diffstat (limited to 'classes/feeditem')
-rwxr-xr-xclasses/feeditem/atom.php224
-rwxr-xr-xclasses/feeditem/common.php221
-rwxr-xr-xclasses/feeditem/rss.php169
3 files changed, 0 insertions, 614 deletions
diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php
deleted file mode 100755
index f6c96f959..000000000
--- a/classes/feeditem/atom.php
+++ /dev/null
@@ -1,224 +0,0 @@
-<?php
-class FeedItem_Atom extends FeedItem_Common {
- const NS_XML = "http://www.w3.org/XML/1998/namespace";
-
- function get_id(): string {
- $id = $this->elem->getElementsByTagName("id")->item(0);
-
- if ($id) {
- return $id->nodeValue;
- } else {
- return clean($this->get_link());
- }
- }
-
- /**
- * @return int|false a timestamp on success, false otherwise
- */
- function get_date() {
- $updated = $this->elem->getElementsByTagName("updated")->item(0);
-
- if ($updated) {
- return strtotime($updated->nodeValue ?? '');
- }
-
- $published = $this->elem->getElementsByTagName("published")->item(0);
-
- if ($published) {
- return strtotime($published->nodeValue ?? '');
- }
-
- $date = $this->xpath->query("dc:date", $this->elem)->item(0);
-
- if ($date) {
- return strtotime($date->nodeValue ?? '');
- }
-
- // consistent with strtotime failing to parse
- return false;
- }
-
-
- function get_link(): string {
- $links = $this->elem->getElementsByTagName("link");
-
- foreach ($links as $link) {
- /** @phpstan-ignore-next-line */
- if ($link->hasAttribute("href") &&
- (!$link->hasAttribute("rel")
- || $link->getAttribute("rel") == "alternate"
- || $link->getAttribute("rel") == "standout")) {
- $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
-
- if ($base)
- return UrlHelper::rewrite_relative($base, clean(trim($link->getAttribute("href"))));
- else
- return clean(trim($link->getAttribute("href")));
- }
- }
-
- return '';
- }
-
- function get_title(): string {
- $title = $this->elem->getElementsByTagName("title")->item(0);
- return $title ? clean(trim($title->nodeValue)) : '';
- }
-
- /**
- * @param string|null $base optional (returns $content if $base is null)
- * @param string $content an HTML string
- *
- * @return string the rewritten XML or original $content
- */
- private function rewrite_content_to_base(?string $base = null, ?string $content = '') {
-
- if (!empty($base) && !empty($content)) {
-
- $tmpdoc = new DOMDocument();
- if (@$tmpdoc->loadHTML('<?xml encoding="UTF-8">' . $content)) {
- $tmpxpath = new DOMXPath($tmpdoc);
-
- $elems = $tmpxpath->query("(//*[@href]|//*[@src])");
-
- foreach ($elems as $elem) {
- if ($elem->hasAttribute("href")) {
- $elem->setAttribute("href",
- UrlHelper::rewrite_relative($base, $elem->getAttribute("href")));
- } else if ($elem->hasAttribute("src")) {
- $elem->setAttribute("src",
- UrlHelper::rewrite_relative($base, $elem->getAttribute("src")));
- }
- }
-
- // Fall back to $content if saveXML somehow fails (i.e. returns false)
- $modified_content = $tmpdoc->saveXML();
- return $modified_content !== false ? $modified_content : $content;
- }
- }
-
- return $content;
- }
-
- function get_content(): string {
- /** @var DOMElement|null */
- $content = $this->elem->getElementsByTagName("content")->item(0);
-
- if ($content) {
- $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content);
-
- if ($content->hasAttribute('type')) {
- if ($content->getAttribute('type') == 'xhtml') {
- for ($i = 0; $i < $content->childNodes->length; $i++) {
- $child = $content->childNodes->item($i);
-
- if ($child->hasChildNodes()) {
- return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child));
- }
- }
- }
- }
-
- return $this->rewrite_content_to_base($base, $this->subtree_or_text($content));
- }
-
- return '';
- }
-
- // TODO: duplicate code should be merged with get_content()
- function get_description(): string {
- /** @var DOMElement|null */
- $content = $this->elem->getElementsByTagName("summary")->item(0);
-
- if ($content) {
- $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content);
-
- if ($content->hasAttribute('type')) {
- if ($content->getAttribute('type') == 'xhtml') {
- for ($i = 0; $i < $content->childNodes->length; $i++) {
- $child = $content->childNodes->item($i);
-
- if ($child->hasChildNodes()) {
- return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child));
- }
- }
- }
- }
-
- return $this->rewrite_content_to_base($base, $this->subtree_or_text($content));
- }
-
- return '';
- }
-
- /**
- * @return array<int, string>
- */
- function get_categories(): array {
- $categories = $this->elem->getElementsByTagName("category");
- $cats = [];
-
- foreach ($categories as $cat) {
- if ($cat->hasAttribute("term"))
- array_push($cats, $cat->getAttribute("term"));
- }
-
- $categories = $this->xpath->query("dc:subject", $this->elem);
-
- foreach ($categories as $cat) {
- array_push($cats, $cat->nodeValue);
- }
-
- return $this->normalize_categories($cats);
- }
-
- /**
- * @return array<int, FeedEnclosure>
- */
- function get_enclosures(): array {
- $links = $this->elem->getElementsByTagName("link");
-
- $encs = [];
-
- foreach ($links as $link) {
- /** @phpstan-ignore-next-line */
- if ($link->hasAttribute("href") && $link->hasAttribute("rel")) {
- $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
-
- if ($link->getAttribute("rel") == "enclosure") {
- $enc = new FeedEnclosure();
-
- $enc->type = clean($link->getAttribute("type"));
- $enc->length = clean($link->getAttribute("length"));
- $enc->link = clean($link->getAttribute("href"));
-
- if (!empty($base)) {
- $enc->link = UrlHelper::rewrite_relative($base, $enc->link);
- }
-
- array_push($encs, $enc);
- }
- }
- }
-
- array_push($encs, ...parent::get_enclosures());
-
- return $encs;
- }
-
- function get_language(): string {
- $lang = $this->elem->getAttributeNS(self::NS_XML, "lang");
-
- if (!empty($lang)) {
- return clean($lang);
- } else {
- // Fall back to the language declared on the feed, if any.
- foreach ($this->doc->childNodes as $child) {
- if (method_exists($child, "getAttributeNS")) {
- return clean($child->getAttributeNS(self::NS_XML, "lang"));
- }
- }
- }
- return '';
- }
-}
diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php
deleted file mode 100755
index fde481179..000000000
--- a/classes/feeditem/common.php
+++ /dev/null
@@ -1,221 +0,0 @@
-<?php
-abstract class FeedItem_Common extends FeedItem {
- /** @var DOMElement */
- protected $elem;
-
- /** @var DOMDocument */
- protected $doc;
-
- /** @var DOMXPath */
- protected $xpath;
-
- function __construct(DOMElement $elem, DOMDocument $doc, DOMXPath $xpath) {
- $this->elem = $elem;
- $this->xpath = $xpath;
- $this->doc = $doc;
-
- try {
- $source = $elem->getElementsByTagName("source")->item(0);
-
- // we don't need <source> element
- if ($source)
- $elem->removeChild($source);
- } catch (DOMException $e) {
- //
- }
- }
-
- function get_element(): DOMElement {
- return $this->elem;
- }
-
- function get_author(): string {
- /** @var DOMElement|null */
- $author = $this->elem->getElementsByTagName("author")->item(0);
-
- if ($author) {
- $name = $author->getElementsByTagName("name")->item(0);
-
- if ($name) return clean($name->nodeValue);
-
- $email = $author->getElementsByTagName("email")->item(0);
-
- if ($email) return clean($email->nodeValue);
-
- if ($author->nodeValue)
- return clean($author->nodeValue);
- }
-
- $author_elems = $this->xpath->query("dc:creator", $this->elem);
- $authors = [];
-
- foreach ($author_elems as $author) {
- array_push($authors, clean($author->nodeValue));
- }
-
- return implode(", ", $authors);
- }
-
- function get_comments_url(): string {
- //RSS only. Use a query here to avoid namespace clashes (e.g. with slash).
- //might give a wrong result if a default namespace was declared (possible with XPath 2.0)
- $com_url = $this->xpath->query("comments", $this->elem)->item(0);
-
- if ($com_url)
- return clean($com_url->nodeValue);
-
- //Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common.
- //'text/html' for type is too restrictive?
- $com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0);
-
- if ($com_url)
- return clean($com_url->nodeValue);
-
- return '';
- }
-
- function get_comments_count(): int {
- //also query for ATE stuff here
- $query = "slash:comments|thread:total|atom:link[@rel='replies']/@thread:count";
- $comments = $this->xpath->query($query, $this->elem)->item(0);
-
- if ($comments && is_numeric($comments->nodeValue)) {
- return (int) clean($comments->nodeValue);
- }
-
- return 0;
- }
-
- /**
- * this is common for both Atom and RSS types and deals with various 'media:' elements
- *
- * @return array<int, FeedEnclosure>
- */
- function get_enclosures(): array {
- $encs = [];
-
- $enclosures = $this->xpath->query("media:content", $this->elem);
-
- foreach ($enclosures as $enclosure) {
- $enc = new FeedEnclosure();
-
- $enc->type = clean($enclosure->getAttribute("type"));
- $enc->link = clean($enclosure->getAttribute("url"));
- $enc->length = clean($enclosure->getAttribute("length"));
- $enc->height = clean($enclosure->getAttribute("height"));
- $enc->width = clean($enclosure->getAttribute("width"));
-
- $medium = clean($enclosure->getAttribute("medium"));
- if (!$enc->type && $medium) {
- $enc->type = strtolower("$medium/generic");
- }
-
- $desc = $this->xpath->query("media:description", $enclosure)->item(0);
- if ($desc) $enc->title = clean($desc->nodeValue);
-
- array_push($encs, $enc);
- }
-
- $enclosures = $this->xpath->query("media:group", $this->elem);
-
- foreach ($enclosures as $enclosure) {
- $enc = new FeedEnclosure();
-
- /** @var DOMElement|null */
- $content = $this->xpath->query("media:content", $enclosure)->item(0);
-
- if ($content) {
- $enc->type = clean($content->getAttribute("type"));
- $enc->link = clean($content->getAttribute("url"));
- $enc->length = clean($content->getAttribute("length"));
- $enc->height = clean($content->getAttribute("height"));
- $enc->width = clean($content->getAttribute("width"));
-
- $medium = clean($content->getAttribute("medium"));
- if (!$enc->type && $medium) {
- $enc->type = strtolower("$medium/generic");
- }
-
- $desc = $this->xpath->query("media:description", $content)->item(0);
- if ($desc) {
- $enc->title = clean($desc->nodeValue);
- } else {
- $desc = $this->xpath->query("media:description", $enclosure)->item(0);
- if ($desc) $enc->title = clean($desc->nodeValue);
- }
-
- array_push($encs, $enc);
- }
- }
-
- $enclosures = $this->xpath->query("media:thumbnail", $this->elem);
-
- foreach ($enclosures as $enclosure) {
- $enc = new FeedEnclosure();
-
- $enc->type = "image/generic";
- $enc->link = clean($enclosure->getAttribute("url"));
- $enc->height = clean($enclosure->getAttribute("height"));
- $enc->width = clean($enclosure->getAttribute("width"));
-
- array_push($encs, $enc);
- }
-
- return $encs;
- }
-
- function count_children(DOMElement $node): int {
- return $node->getElementsByTagName("*")->length;
- }
-
- /**
- * @return false|string false on failure, otherwise string contents
- */
- function subtree_or_text(DOMElement $node) {
- if ($this->count_children($node) == 0) {
- return $node->nodeValue;
- } else {
- return $node->c14n();
- }
- }
-
- /**
- * @param array<int, string> $cats
- *
- * @return array<int, string>
- */
- static function normalize_categories(array $cats): array {
-
- $tmp = [];
-
- foreach ($cats as $rawcat) {
- array_push($tmp, ...explode(",", $rawcat));
- }
-
- $tmp = array_map(function($srccat) {
- $cat = clean(trim(mb_strtolower($srccat)));
-
- // we don't support numeric tags
- if (is_numeric($cat))
- $cat = 't:' . $cat;
-
- $cat = preg_replace('/[,\'\"]/', "", $cat);
-
- if (Config::get(Config::DB_TYPE) == "mysql") {
- $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat);
- }
-
- if (mb_strlen($cat) > 250)
- $cat = mb_substr($cat, 0, 250);
-
- return $cat;
- }, $tmp);
-
- // remove empty values
- $tmp = array_filter($tmp, 'strlen');
-
- asort($tmp);
-
- return array_unique($tmp);
- }
-}
diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php
deleted file mode 100755
index b5710ef4f..000000000
--- a/classes/feeditem/rss.php
+++ /dev/null
@@ -1,169 +0,0 @@
-<?php
-class FeedItem_RSS extends FeedItem_Common {
- function get_id(): string {
- $id = $this->elem->getElementsByTagName("guid")->item(0);
-
- if ($id) {
- return clean($id->nodeValue);
- } else {
- return clean($this->get_link());
- }
- }
-
- /**
- * @return int|false a timestamp on success, false otherwise
- */
- function get_date() {
- $pubDate = $this->elem->getElementsByTagName("pubDate")->item(0);
-
- if ($pubDate) {
- return strtotime($pubDate->nodeValue ?? '');
- }
-
- $date = $this->xpath->query("dc:date", $this->elem)->item(0);
-
- if ($date) {
- return strtotime($date->nodeValue ?? '');
- }
-
- // consistent with strtotime failing to parse
- return false;
- }
-
- function get_link(): string {
- $links = $this->xpath->query("atom:link", $this->elem);
-
- foreach ($links as $link) {
- if ($link && $link->hasAttribute("href") &&
- (!$link->hasAttribute("rel")
- || $link->getAttribute("rel") == "alternate"
- || $link->getAttribute("rel") == "standout")) {
-
- return clean(trim($link->getAttribute("href")));
- }
- }
-
- /** @var DOMElement|null */
- $link = $this->elem->getElementsByTagName("guid")->item(0);
-
- if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
- return clean(trim($link->nodeValue));
- }
-
- $link = $this->elem->getElementsByTagName("link")->item(0);
-
- if ($link) {
- return clean(trim($link->nodeValue));
- }
-
- return '';
- }
-
- function get_title(): string {
- $title = $this->xpath->query("title", $this->elem)->item(0);
-
- if ($title) {
- return clean(trim($title->nodeValue));
- }
-
- // if the document has a default namespace then querying for
- // title would fail because of reasons so let's try the old way
- $title = $this->elem->getElementsByTagName("title")->item(0);
-
- if ($title) {
- return clean(trim($title->nodeValue));
- }
-
- return '';
- }
-
- function get_content(): string {
- /** @var DOMElement|null */
- $contentA = $this->xpath->query("content:encoded", $this->elem)->item(0);
-
- /** @var DOMElement|null */
- $contentB = $this->elem->getElementsByTagName("description")->item(0);
-
- if ($contentA && $contentB) {
- $resultA = $this->subtree_or_text($contentA);
- $resultB = $this->subtree_or_text($contentB);
-
- return mb_strlen($resultA) > mb_strlen($resultB) ? $resultA : $resultB;
- }
-
- if ($contentA) {
- return $this->subtree_or_text($contentA);
- }
-
- if ($contentB) {
- return $this->subtree_or_text($contentB);
- }
-
- return '';
- }
-
- function get_description(): string {
- $summary = $this->elem->getElementsByTagName("description")->item(0);
-
- if ($summary) {
- return $summary->nodeValue;
- }
-
- return '';
- }
-
- /**
- * @return array<int, string>
- */
- function get_categories(): array {
- $categories = $this->elem->getElementsByTagName("category");
- $cats = [];
-
- foreach ($categories as $cat) {
- array_push($cats, $cat->nodeValue);
- }
-
- $categories = $this->xpath->query("dc:subject", $this->elem);
-
- foreach ($categories as $cat) {
- array_push($cats, $cat->nodeValue);
- }
-
- return $this->normalize_categories($cats);
- }
-
- /**
- * @return array<int, FeedEnclosure>
- */
- function get_enclosures(): array {
- $enclosures = $this->elem->getElementsByTagName("enclosure");
-
- $encs = array();
-
- foreach ($enclosures as $enclosure) {
- $enc = new FeedEnclosure();
-
- $enc->type = clean($enclosure->getAttribute("type"));
- $enc->link = clean($enclosure->getAttribute("url"));
- $enc->length = clean($enclosure->getAttribute("length"));
- $enc->height = clean($enclosure->getAttribute("height"));
- $enc->width = clean($enclosure->getAttribute("width"));
-
- array_push($encs, $enc);
- }
-
- array_push($encs, ...parent::get_enclosures());
-
- return $encs;
- }
-
- function get_language(): string {
- $languages = $this->doc->getElementsByTagName('language');
-
- if (count($languages) == 0) {
- return "";
- }
-
- return clean($languages[0]->textContent);
- }
-}