diff options
author | Andrew Dolgov <[email protected]> | 2023-10-25 12:55:09 +0300 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2023-10-25 12:55:09 +0300 |
commit | 865ecc87963dc3b26e66296616eef2a1cc41ac3f (patch) | |
tree | bf2ecd8a391103bdb2c8b70cd33c47467310754b /classes/feeditem | |
parent | 0a5507d3bd79d04c860455664f919bf8e7274fda (diff) |
move to psr-4 autoloader
Diffstat (limited to 'classes/feeditem')
-rwxr-xr-x | classes/feeditem/atom.php | 224 | ||||
-rwxr-xr-x | classes/feeditem/common.php | 221 | ||||
-rwxr-xr-x | classes/feeditem/rss.php | 169 |
3 files changed, 0 insertions, 614 deletions
diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php deleted file mode 100755 index f6c96f959..000000000 --- a/classes/feeditem/atom.php +++ /dev/null @@ -1,224 +0,0 @@ -<?php -class FeedItem_Atom extends FeedItem_Common { - const NS_XML = "http://www.w3.org/XML/1998/namespace"; - - function get_id(): string { - $id = $this->elem->getElementsByTagName("id")->item(0); - - if ($id) { - return $id->nodeValue; - } else { - return clean($this->get_link()); - } - } - - /** - * @return int|false a timestamp on success, false otherwise - */ - function get_date() { - $updated = $this->elem->getElementsByTagName("updated")->item(0); - - if ($updated) { - return strtotime($updated->nodeValue ?? ''); - } - - $published = $this->elem->getElementsByTagName("published")->item(0); - - if ($published) { - return strtotime($published->nodeValue ?? ''); - } - - $date = $this->xpath->query("dc:date", $this->elem)->item(0); - - if ($date) { - return strtotime($date->nodeValue ?? ''); - } - - // consistent with strtotime failing to parse - return false; - } - - - function get_link(): string { - $links = $this->elem->getElementsByTagName("link"); - - foreach ($links as $link) { - /** @phpstan-ignore-next-line */ - if ($link->hasAttribute("href") && - (!$link->hasAttribute("rel") - || $link->getAttribute("rel") == "alternate" - || $link->getAttribute("rel") == "standout")) { - $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link); - - if ($base) - return UrlHelper::rewrite_relative($base, clean(trim($link->getAttribute("href")))); - else - return clean(trim($link->getAttribute("href"))); - } - } - - return ''; - } - - function get_title(): string { - $title = $this->elem->getElementsByTagName("title")->item(0); - return $title ? clean(trim($title->nodeValue)) : ''; - } - - /** - * @param string|null $base optional (returns $content if $base is null) - * @param string $content an HTML string - * - * @return string the rewritten XML or original $content - */ - private function rewrite_content_to_base(?string $base = null, ?string $content = '') { - - if (!empty($base) && !empty($content)) { - - $tmpdoc = new DOMDocument(); - if (@$tmpdoc->loadHTML('<?xml encoding="UTF-8">' . $content)) { - $tmpxpath = new DOMXPath($tmpdoc); - - $elems = $tmpxpath->query("(//*[@href]|//*[@src])"); - - foreach ($elems as $elem) { - if ($elem->hasAttribute("href")) { - $elem->setAttribute("href", - UrlHelper::rewrite_relative($base, $elem->getAttribute("href"))); - } else if ($elem->hasAttribute("src")) { - $elem->setAttribute("src", - UrlHelper::rewrite_relative($base, $elem->getAttribute("src"))); - } - } - - // Fall back to $content if saveXML somehow fails (i.e. returns false) - $modified_content = $tmpdoc->saveXML(); - return $modified_content !== false ? $modified_content : $content; - } - } - - return $content; - } - - function get_content(): string { - /** @var DOMElement|null */ - $content = $this->elem->getElementsByTagName("content")->item(0); - - if ($content) { - $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content); - - if ($content->hasAttribute('type')) { - if ($content->getAttribute('type') == 'xhtml') { - for ($i = 0; $i < $content->childNodes->length; $i++) { - $child = $content->childNodes->item($i); - - if ($child->hasChildNodes()) { - return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child)); - } - } - } - } - - return $this->rewrite_content_to_base($base, $this->subtree_or_text($content)); - } - - return ''; - } - - // TODO: duplicate code should be merged with get_content() - function get_description(): string { - /** @var DOMElement|null */ - $content = $this->elem->getElementsByTagName("summary")->item(0); - - if ($content) { - $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content); - - if ($content->hasAttribute('type')) { - if ($content->getAttribute('type') == 'xhtml') { - for ($i = 0; $i < $content->childNodes->length; $i++) { - $child = $content->childNodes->item($i); - - if ($child->hasChildNodes()) { - return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child)); - } - } - } - } - - return $this->rewrite_content_to_base($base, $this->subtree_or_text($content)); - } - - return ''; - } - - /** - * @return array<int, string> - */ - function get_categories(): array { - $categories = $this->elem->getElementsByTagName("category"); - $cats = []; - - foreach ($categories as $cat) { - if ($cat->hasAttribute("term")) - array_push($cats, $cat->getAttribute("term")); - } - - $categories = $this->xpath->query("dc:subject", $this->elem); - - foreach ($categories as $cat) { - array_push($cats, $cat->nodeValue); - } - - return $this->normalize_categories($cats); - } - - /** - * @return array<int, FeedEnclosure> - */ - function get_enclosures(): array { - $links = $this->elem->getElementsByTagName("link"); - - $encs = []; - - foreach ($links as $link) { - /** @phpstan-ignore-next-line */ - if ($link->hasAttribute("href") && $link->hasAttribute("rel")) { - $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link); - - if ($link->getAttribute("rel") == "enclosure") { - $enc = new FeedEnclosure(); - - $enc->type = clean($link->getAttribute("type")); - $enc->length = clean($link->getAttribute("length")); - $enc->link = clean($link->getAttribute("href")); - - if (!empty($base)) { - $enc->link = UrlHelper::rewrite_relative($base, $enc->link); - } - - array_push($encs, $enc); - } - } - } - - array_push($encs, ...parent::get_enclosures()); - - return $encs; - } - - function get_language(): string { - $lang = $this->elem->getAttributeNS(self::NS_XML, "lang"); - - if (!empty($lang)) { - return clean($lang); - } else { - // Fall back to the language declared on the feed, if any. - foreach ($this->doc->childNodes as $child) { - if (method_exists($child, "getAttributeNS")) { - return clean($child->getAttributeNS(self::NS_XML, "lang")); - } - } - } - return ''; - } -} diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php deleted file mode 100755 index fde481179..000000000 --- a/classes/feeditem/common.php +++ /dev/null @@ -1,221 +0,0 @@ -<?php -abstract class FeedItem_Common extends FeedItem { - /** @var DOMElement */ - protected $elem; - - /** @var DOMDocument */ - protected $doc; - - /** @var DOMXPath */ - protected $xpath; - - function __construct(DOMElement $elem, DOMDocument $doc, DOMXPath $xpath) { - $this->elem = $elem; - $this->xpath = $xpath; - $this->doc = $doc; - - try { - $source = $elem->getElementsByTagName("source")->item(0); - - // we don't need <source> element - if ($source) - $elem->removeChild($source); - } catch (DOMException $e) { - // - } - } - - function get_element(): DOMElement { - return $this->elem; - } - - function get_author(): string { - /** @var DOMElement|null */ - $author = $this->elem->getElementsByTagName("author")->item(0); - - if ($author) { - $name = $author->getElementsByTagName("name")->item(0); - - if ($name) return clean($name->nodeValue); - - $email = $author->getElementsByTagName("email")->item(0); - - if ($email) return clean($email->nodeValue); - - if ($author->nodeValue) - return clean($author->nodeValue); - } - - $author_elems = $this->xpath->query("dc:creator", $this->elem); - $authors = []; - - foreach ($author_elems as $author) { - array_push($authors, clean($author->nodeValue)); - } - - return implode(", ", $authors); - } - - function get_comments_url(): string { - //RSS only. Use a query here to avoid namespace clashes (e.g. with slash). - //might give a wrong result if a default namespace was declared (possible with XPath 2.0) - $com_url = $this->xpath->query("comments", $this->elem)->item(0); - - if ($com_url) - return clean($com_url->nodeValue); - - //Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common. - //'text/html' for type is too restrictive? - $com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0); - - if ($com_url) - return clean($com_url->nodeValue); - - return ''; - } - - function get_comments_count(): int { - //also query for ATE stuff here - $query = "slash:comments|thread:total|atom:link[@rel='replies']/@thread:count"; - $comments = $this->xpath->query($query, $this->elem)->item(0); - - if ($comments && is_numeric($comments->nodeValue)) { - return (int) clean($comments->nodeValue); - } - - return 0; - } - - /** - * this is common for both Atom and RSS types and deals with various 'media:' elements - * - * @return array<int, FeedEnclosure> - */ - function get_enclosures(): array { - $encs = []; - - $enclosures = $this->xpath->query("media:content", $this->elem); - - foreach ($enclosures as $enclosure) { - $enc = new FeedEnclosure(); - - $enc->type = clean($enclosure->getAttribute("type")); - $enc->link = clean($enclosure->getAttribute("url")); - $enc->length = clean($enclosure->getAttribute("length")); - $enc->height = clean($enclosure->getAttribute("height")); - $enc->width = clean($enclosure->getAttribute("width")); - - $medium = clean($enclosure->getAttribute("medium")); - if (!$enc->type && $medium) { - $enc->type = strtolower("$medium/generic"); - } - - $desc = $this->xpath->query("media:description", $enclosure)->item(0); - if ($desc) $enc->title = clean($desc->nodeValue); - - array_push($encs, $enc); - } - - $enclosures = $this->xpath->query("media:group", $this->elem); - - foreach ($enclosures as $enclosure) { - $enc = new FeedEnclosure(); - - /** @var DOMElement|null */ - $content = $this->xpath->query("media:content", $enclosure)->item(0); - - if ($content) { - $enc->type = clean($content->getAttribute("type")); - $enc->link = clean($content->getAttribute("url")); - $enc->length = clean($content->getAttribute("length")); - $enc->height = clean($content->getAttribute("height")); - $enc->width = clean($content->getAttribute("width")); - - $medium = clean($content->getAttribute("medium")); - if (!$enc->type && $medium) { - $enc->type = strtolower("$medium/generic"); - } - - $desc = $this->xpath->query("media:description", $content)->item(0); - if ($desc) { - $enc->title = clean($desc->nodeValue); - } else { - $desc = $this->xpath->query("media:description", $enclosure)->item(0); - if ($desc) $enc->title = clean($desc->nodeValue); - } - - array_push($encs, $enc); - } - } - - $enclosures = $this->xpath->query("media:thumbnail", $this->elem); - - foreach ($enclosures as $enclosure) { - $enc = new FeedEnclosure(); - - $enc->type = "image/generic"; - $enc->link = clean($enclosure->getAttribute("url")); - $enc->height = clean($enclosure->getAttribute("height")); - $enc->width = clean($enclosure->getAttribute("width")); - - array_push($encs, $enc); - } - - return $encs; - } - - function count_children(DOMElement $node): int { - return $node->getElementsByTagName("*")->length; - } - - /** - * @return false|string false on failure, otherwise string contents - */ - function subtree_or_text(DOMElement $node) { - if ($this->count_children($node) == 0) { - return $node->nodeValue; - } else { - return $node->c14n(); - } - } - - /** - * @param array<int, string> $cats - * - * @return array<int, string> - */ - static function normalize_categories(array $cats): array { - - $tmp = []; - - foreach ($cats as $rawcat) { - array_push($tmp, ...explode(",", $rawcat)); - } - - $tmp = array_map(function($srccat) { - $cat = clean(trim(mb_strtolower($srccat))); - - // we don't support numeric tags - if (is_numeric($cat)) - $cat = 't:' . $cat; - - $cat = preg_replace('/[,\'\"]/', "", $cat); - - if (Config::get(Config::DB_TYPE) == "mysql") { - $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat); - } - - if (mb_strlen($cat) > 250) - $cat = mb_substr($cat, 0, 250); - - return $cat; - }, $tmp); - - // remove empty values - $tmp = array_filter($tmp, 'strlen'); - - asort($tmp); - - return array_unique($tmp); - } -} diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php deleted file mode 100755 index b5710ef4f..000000000 --- a/classes/feeditem/rss.php +++ /dev/null @@ -1,169 +0,0 @@ -<?php -class FeedItem_RSS extends FeedItem_Common { - function get_id(): string { - $id = $this->elem->getElementsByTagName("guid")->item(0); - - if ($id) { - return clean($id->nodeValue); - } else { - return clean($this->get_link()); - } - } - - /** - * @return int|false a timestamp on success, false otherwise - */ - function get_date() { - $pubDate = $this->elem->getElementsByTagName("pubDate")->item(0); - - if ($pubDate) { - return strtotime($pubDate->nodeValue ?? ''); - } - - $date = $this->xpath->query("dc:date", $this->elem)->item(0); - - if ($date) { - return strtotime($date->nodeValue ?? ''); - } - - // consistent with strtotime failing to parse - return false; - } - - function get_link(): string { - $links = $this->xpath->query("atom:link", $this->elem); - - foreach ($links as $link) { - if ($link && $link->hasAttribute("href") && - (!$link->hasAttribute("rel") - || $link->getAttribute("rel") == "alternate" - || $link->getAttribute("rel") == "standout")) { - - return clean(trim($link->getAttribute("href"))); - } - } - - /** @var DOMElement|null */ - $link = $this->elem->getElementsByTagName("guid")->item(0); - - if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") { - return clean(trim($link->nodeValue)); - } - - $link = $this->elem->getElementsByTagName("link")->item(0); - - if ($link) { - return clean(trim($link->nodeValue)); - } - - return ''; - } - - function get_title(): string { - $title = $this->xpath->query("title", $this->elem)->item(0); - - if ($title) { - return clean(trim($title->nodeValue)); - } - - // if the document has a default namespace then querying for - // title would fail because of reasons so let's try the old way - $title = $this->elem->getElementsByTagName("title")->item(0); - - if ($title) { - return clean(trim($title->nodeValue)); - } - - return ''; - } - - function get_content(): string { - /** @var DOMElement|null */ - $contentA = $this->xpath->query("content:encoded", $this->elem)->item(0); - - /** @var DOMElement|null */ - $contentB = $this->elem->getElementsByTagName("description")->item(0); - - if ($contentA && $contentB) { - $resultA = $this->subtree_or_text($contentA); - $resultB = $this->subtree_or_text($contentB); - - return mb_strlen($resultA) > mb_strlen($resultB) ? $resultA : $resultB; - } - - if ($contentA) { - return $this->subtree_or_text($contentA); - } - - if ($contentB) { - return $this->subtree_or_text($contentB); - } - - return ''; - } - - function get_description(): string { - $summary = $this->elem->getElementsByTagName("description")->item(0); - - if ($summary) { - return $summary->nodeValue; - } - - return ''; - } - - /** - * @return array<int, string> - */ - function get_categories(): array { - $categories = $this->elem->getElementsByTagName("category"); - $cats = []; - - foreach ($categories as $cat) { - array_push($cats, $cat->nodeValue); - } - - $categories = $this->xpath->query("dc:subject", $this->elem); - - foreach ($categories as $cat) { - array_push($cats, $cat->nodeValue); - } - - return $this->normalize_categories($cats); - } - - /** - * @return array<int, FeedEnclosure> - */ - function get_enclosures(): array { - $enclosures = $this->elem->getElementsByTagName("enclosure"); - - $encs = array(); - - foreach ($enclosures as $enclosure) { - $enc = new FeedEnclosure(); - - $enc->type = clean($enclosure->getAttribute("type")); - $enc->link = clean($enclosure->getAttribute("url")); - $enc->length = clean($enclosure->getAttribute("length")); - $enc->height = clean($enclosure->getAttribute("height")); - $enc->width = clean($enclosure->getAttribute("width")); - - array_push($encs, $enc); - } - - array_push($encs, ...parent::get_enclosures()); - - return $encs; - } - - function get_language(): string { - $languages = $this->doc->getElementsByTagName('language'); - - if (count($languages) == 0) { - return ""; - } - - return clean($languages[0]->textContent); - } -} |