From 865ecc87963dc3b26e66296616eef2a1cc41ac3f Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 25 Oct 2023 12:55:09 +0300 Subject: move to psr-4 autoloader --- classes/FeedItem_Common.php | 221 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 classes/FeedItem_Common.php (limited to 'classes/FeedItem_Common.php') diff --git a/classes/FeedItem_Common.php b/classes/FeedItem_Common.php new file mode 100644 index 000000000..fde481179 --- /dev/null +++ b/classes/FeedItem_Common.php @@ -0,0 +1,221 @@ +elem = $elem; + $this->xpath = $xpath; + $this->doc = $doc; + + try { + $source = $elem->getElementsByTagName("source")->item(0); + + // we don't need element + if ($source) + $elem->removeChild($source); + } catch (DOMException $e) { + // + } + } + + function get_element(): DOMElement { + return $this->elem; + } + + function get_author(): string { + /** @var DOMElement|null */ + $author = $this->elem->getElementsByTagName("author")->item(0); + + if ($author) { + $name = $author->getElementsByTagName("name")->item(0); + + if ($name) return clean($name->nodeValue); + + $email = $author->getElementsByTagName("email")->item(0); + + if ($email) return clean($email->nodeValue); + + if ($author->nodeValue) + return clean($author->nodeValue); + } + + $author_elems = $this->xpath->query("dc:creator", $this->elem); + $authors = []; + + foreach ($author_elems as $author) { + array_push($authors, clean($author->nodeValue)); + } + + return implode(", ", $authors); + } + + function get_comments_url(): string { + //RSS only. Use a query here to avoid namespace clashes (e.g. with slash). + //might give a wrong result if a default namespace was declared (possible with XPath 2.0) + $com_url = $this->xpath->query("comments", $this->elem)->item(0); + + if ($com_url) + return clean($com_url->nodeValue); + + //Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common. + //'text/html' for type is too restrictive? + $com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0); + + if ($com_url) + return clean($com_url->nodeValue); + + return ''; + } + + function get_comments_count(): int { + //also query for ATE stuff here + $query = "slash:comments|thread:total|atom:link[@rel='replies']/@thread:count"; + $comments = $this->xpath->query($query, $this->elem)->item(0); + + if ($comments && is_numeric($comments->nodeValue)) { + return (int) clean($comments->nodeValue); + } + + return 0; + } + + /** + * this is common for both Atom and RSS types and deals with various 'media:' elements + * + * @return array + */ + function get_enclosures(): array { + $encs = []; + + $enclosures = $this->xpath->query("media:content", $this->elem); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = clean($enclosure->getAttribute("type")); + $enc->link = clean($enclosure->getAttribute("url")); + $enc->length = clean($enclosure->getAttribute("length")); + $enc->height = clean($enclosure->getAttribute("height")); + $enc->width = clean($enclosure->getAttribute("width")); + + $medium = clean($enclosure->getAttribute("medium")); + if (!$enc->type && $medium) { + $enc->type = strtolower("$medium/generic"); + } + + $desc = $this->xpath->query("media:description", $enclosure)->item(0); + if ($desc) $enc->title = clean($desc->nodeValue); + + array_push($encs, $enc); + } + + $enclosures = $this->xpath->query("media:group", $this->elem); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + /** @var DOMElement|null */ + $content = $this->xpath->query("media:content", $enclosure)->item(0); + + if ($content) { + $enc->type = clean($content->getAttribute("type")); + $enc->link = clean($content->getAttribute("url")); + $enc->length = clean($content->getAttribute("length")); + $enc->height = clean($content->getAttribute("height")); + $enc->width = clean($content->getAttribute("width")); + + $medium = clean($content->getAttribute("medium")); + if (!$enc->type && $medium) { + $enc->type = strtolower("$medium/generic"); + } + + $desc = $this->xpath->query("media:description", $content)->item(0); + if ($desc) { + $enc->title = clean($desc->nodeValue); + } else { + $desc = $this->xpath->query("media:description", $enclosure)->item(0); + if ($desc) $enc->title = clean($desc->nodeValue); + } + + array_push($encs, $enc); + } + } + + $enclosures = $this->xpath->query("media:thumbnail", $this->elem); + + foreach ($enclosures as $enclosure) { + $enc = new FeedEnclosure(); + + $enc->type = "image/generic"; + $enc->link = clean($enclosure->getAttribute("url")); + $enc->height = clean($enclosure->getAttribute("height")); + $enc->width = clean($enclosure->getAttribute("width")); + + array_push($encs, $enc); + } + + return $encs; + } + + function count_children(DOMElement $node): int { + return $node->getElementsByTagName("*")->length; + } + + /** + * @return false|string false on failure, otherwise string contents + */ + function subtree_or_text(DOMElement $node) { + if ($this->count_children($node) == 0) { + return $node->nodeValue; + } else { + return $node->c14n(); + } + } + + /** + * @param array $cats + * + * @return array + */ + static function normalize_categories(array $cats): array { + + $tmp = []; + + foreach ($cats as $rawcat) { + array_push($tmp, ...explode(",", $rawcat)); + } + + $tmp = array_map(function($srccat) { + $cat = clean(trim(mb_strtolower($srccat))); + + // we don't support numeric tags + if (is_numeric($cat)) + $cat = 't:' . $cat; + + $cat = preg_replace('/[,\'\"]/', "", $cat); + + if (Config::get(Config::DB_TYPE) == "mysql") { + $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat); + } + + if (mb_strlen($cat) > 250) + $cat = mb_substr($cat, 0, 250); + + return $cat; + }, $tmp); + + // remove empty values + $tmp = array_filter($tmp, 'strlen'); + + asort($tmp); + + return array_unique($tmp); + } +} -- cgit v1.2.3