From 6322fc6870f5df3878670a690f13a26a4ce76d20 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 7 Jul 2015 08:44:31 +0300 Subject: af_redditimgur: fix youtube regexp, try to inline images again if parsed using readability --- plugins/af_redditimgur/init.php | 315 +++++++++++++++++++--------------------- 1 file changed, 153 insertions(+), 162 deletions(-) diff --git a/plugins/af_redditimgur/init.php b/plugins/af_redditimgur/init.php index d42f4c601..265999e2b 100644 --- a/plugins/af_redditimgur/init.php +++ b/plugins/af_redditimgur/init.php @@ -69,239 +69,230 @@ class Af_RedditImgur extends Plugin { echo __("Configuration saved"); } - function hook_article_filter($article) { - - if (strpos($article["link"], "reddit.com/r/") !== FALSE) { - $doc = new DOMDocument(); - @$doc->loadHTML($article["content"]); + private function inline_stuff($article, &$doc, $xpath) { - if ($doc) { - $xpath = new DOMXPath($doc); - $entries = $xpath->query('(//a[@href]|//img[@src])'); + $entries = $xpath->query('(//a[@href]|//img[@src])'); - $found = false; + $found = false; - foreach ($entries as $entry) { - if ($entry->hasAttribute("href")) { + foreach ($entries as $entry) { + if ($entry->hasAttribute("href")) { - $matches = array(); + $matches = array(); - if (preg_match("/https?:\/\/gfycat.com\/([a-z]+)$/i", $entry->getAttribute("href"), $matches)) { + if (preg_match("/https?:\/\/gfycat.com\/([a-z]+)$/i", $entry->getAttribute("href"), $matches)) { - $tmp = fetch_file_contents($entry->getAttribute("href")); + $tmp = fetch_file_contents($entry->getAttribute("href")); - if ($tmp) { - $tmpdoc = new DOMDocument(); - @$tmpdoc->loadHTML($tmp); + if ($tmp) { + $tmpdoc = new DOMDocument(); + @$tmpdoc->loadHTML($tmp); - if ($tmpdoc) { - $tmpxpath = new DOMXPath($tmpdoc); - $source_meta = $tmpxpath->query("//meta[@property='og:video']")->item(0); + if ($tmpdoc) { + $tmpxpath = new DOMXPath($tmpdoc); + $source_meta = $tmpxpath->query("//meta[@property='og:video']")->item(0); - if ($source_meta) { - $source_stream = $source_meta->getAttribute("content"); + if ($source_meta) { + $source_stream = $source_meta->getAttribute("content"); - if ($source_stream) { - $this->handle_as_video($doc, $entry, $source_stream); - $found = 1; - } - } - } + if ($source_stream) { + $this->handle_as_video($doc, $entry, $source_stream); + $found = 1; } - } + } + } - if (preg_match("/\.(gifv)$/i", $entry->getAttribute("href"))) { - - /*$video = $doc->createElement('video'); - $video->setAttribute("autoplay", "1"); - $video->setAttribute("loop", "1"); - - $source = $doc->createElement('source'); - $source->setAttribute("src", str_replace(".gifv", ".mp4", $entry->getAttribute("href"))); - $source->setAttribute("type", "video/mp4"); - - $video->appendChild($source); - - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($video, $entry); - $entry->parentNode->insertBefore($br, $entry); - - $img = $doc->createElement('img'); - $img->setAttribute("src", - "%3D"); + } - $entry->parentNode->insertBefore($img, $entry);*/ + if (preg_match("/\.(gifv)$/i", $entry->getAttribute("href"))) { - $source_stream = str_replace(".gifv", ".mp4", $entry->getAttribute("href")); - $this->handle_as_video($doc, $entry, $source_stream); + $source_stream = str_replace(".gifv", ".mp4", $entry->getAttribute("href")); + $this->handle_as_video($doc, $entry, $source_stream); - $found = true; - } + $found = true; + } - $matches = array(); - if (preg_match("/\/\/www\.youtube\.com\/v\/([\w-]+)/", $entry->getAttribute("href"), $matches) || - preg_match("/\/\/www\.youtube\.com\/watch?v=([\w-]+)/", $entry->getAttribute("href"), $matches) || - preg_match("/\/\/youtu.be\/([\w-]+)/", $entry->getAttribute("href"), $matches)) { + $matches = array(); + if (preg_match("/\/\/www\.youtube\.com\/v\/([\w-]+)/", $entry->getAttribute("href"), $matches) || + preg_match("/\/\/www\.youtube\.com\/watch\?v=([\w-]+)/", $entry->getAttribute("href"), $matches) || + preg_match("/\/\/youtu.be\/([\w-]+)/", $entry->getAttribute("href"), $matches)) { - $vid_id = $matches[1]; + $vid_id = $matches[1]; - $iframe = $doc->createElement("iframe"); - $iframe->setAttribute("class", "youtube-player"); - $iframe->setAttribute("type", "text/html"); - $iframe->setAttribute("width", "640"); - $iframe->setAttribute("height", "385"); - $iframe->setAttribute("src", "https://www.youtube.com/embed/$vid_id"); - $iframe->setAttribute("allowfullscreen", "1"); - $iframe->setAttribute("frameborder", "0"); + $iframe = $doc->createElement("iframe"); + $iframe->setAttribute("class", "youtube-player"); + $iframe->setAttribute("type", "text/html"); + $iframe->setAttribute("width", "640"); + $iframe->setAttribute("height", "385"); + $iframe->setAttribute("src", "https://www.youtube.com/embed/$vid_id"); + $iframe->setAttribute("allowfullscreen", "1"); + $iframe->setAttribute("frameborder", "0"); - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($iframe, $entry); - $entry->parentNode->insertBefore($br, $entry); + $br = $doc->createElement('br'); + $entry->parentNode->insertBefore($iframe, $entry); + $entry->parentNode->insertBefore($br, $entry); - $found = true; - } + $found = true; + } - if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href"))) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $entry->getAttribute("href")); + if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href"))) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $entry->getAttribute("href")); - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); + $br = $doc->createElement('br'); + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); - $found = true; - } + $found = true; + } - // links to imgur pages - $matches = array(); - if (preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) { + // links to imgur pages + $matches = array(); + if (preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) { - $token = $matches[2]; + $token = $matches[2]; - $album_content = fetch_file_contents($entry->getAttribute("href"), - false, false, false, false, 10); + $album_content = fetch_file_contents($entry->getAttribute("href"), + false, false, false, false, 10); - if ($album_content && $token) { - $adoc = new DOMDocument(); - @$adoc->loadHTML($album_content); + if ($album_content && $token) { + $adoc = new DOMDocument(); + @$adoc->loadHTML($album_content); - if ($adoc) { - $axpath = new DOMXPath($adoc); - $aentries = $axpath->query('(//img[@src])'); + if ($adoc) { + $axpath = new DOMXPath($adoc); + $aentries = $axpath->query('(//img[@src])'); - foreach ($aentries as $aentry) { - if (preg_match("/\/\/i.imgur.com\/$token\./", $aentry->getAttribute("src"))) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $aentry->getAttribute("src")); + foreach ($aentries as $aentry) { + if (preg_match("/\/\/i.imgur.com\/$token\./", $aentry->getAttribute("src"))) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $aentry->getAttribute("src")); - $br = $doc->createElement('br'); + $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); - $found = true; + $found = true; - break; - } - } - } + break; } } + } + } + } - // linked albums, ffs - if (preg_match("/^https?:\/\/imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) { + // linked albums, ffs + if (preg_match("/^https?:\/\/imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) { - $album_content = fetch_file_contents($entry->getAttribute("href"), - false, false, false, false, 10); + $album_content = fetch_file_contents($entry->getAttribute("href"), + false, false, false, false, 10); - if ($album_content) { - $adoc = new DOMDocument(); - @$adoc->loadHTML($album_content); + if ($album_content) { + $adoc = new DOMDocument(); + @$adoc->loadHTML($album_content); - if ($adoc) { - $axpath = new DOMXPath($adoc); - $aentries = $axpath->query("//meta[@property='og:image']"); - $urls = array(); + if ($adoc) { + $axpath = new DOMXPath($adoc); + $aentries = $axpath->query("//meta[@property='og:image']"); + $urls = array(); - foreach ($aentries as $aentry) { + foreach ($aentries as $aentry) { - if (!in_array($aentry->getAttribute("content"), $urls)) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $aentry->getAttribute("content")); - $entry->parentNode->insertBefore($doc->createElement('br'), $entry); + if (!in_array($aentry->getAttribute("content"), $urls)) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $aentry->getAttribute("content")); + $entry->parentNode->insertBefore($doc->createElement('br'), $entry); - $br = $doc->createElement('br'); + $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); - array_push($urls, $aentry->getAttribute("content")); + array_push($urls, $aentry->getAttribute("content")); - $found = true; - } - } - } + $found = true; } } } - - // remove tiny thumbnails - if ($entry->hasAttribute("src")) { - if ($entry->parentNode && $entry->parentNode->parentNode) { - $entry->parentNode->parentNode->removeChild($entry->parentNode); - } - } } + } + } + + // remove tiny thumbnails + if ($entry->hasAttribute("src")) { + if ($entry->parentNode && $entry->parentNode->parentNode) { + $entry->parentNode->parentNode->removeChild($entry->parentNode); + } + } + } - if (!$found && $this->host->get($this, "enable_readability") && mb_strlen(strip_tags($article["content"])) <= 150) { - if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); + return $found; + } - $content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0); + function hook_article_filter($article) { - if ($content_link && strpos($content_link->getAttribute("href"), "reddit.com") === FALSE) { + if (strpos($article["link"], "reddit.com/r/") !== FALSE) { + $doc = new DOMDocument(); + @$doc->loadHTML($article["content"]); + $xpath = new DOMXPath($doc); - $tmp = fetch_file_contents($content_link->getAttribute("href")); + $found = $this->inline_stuff($article, $doc, $xpath); - if ($tmp) { - $r = new Readability($tmp, $content_link->getAttribute("href")); + if (!$found && $this->host->get($this, "enable_readability") && mb_strlen(strip_tags($article["content"])) <= 150) { + if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); - if ($r->init()) { - //$article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; + $content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0); - $tmpxpath = new DOMXPath($r->dom); + if ($content_link && strpos($content_link->getAttribute("href"), "reddit.com") === FALSE) { - $entries = $tmpxpath->query('(//a[@href]|//img[@src])'); + $tmp = fetch_file_contents($content_link->getAttribute("href")); - foreach ($entries as $entry) { - if ($entry->hasAttribute("href")) { - $entry->setAttribute("href", - rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("href"))); + if ($tmp) { + $r = new Readability($tmp, $content_link->getAttribute("href")); - } + if ($r->init()) { + //$article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - if ($entry->hasAttribute("src")) { - $entry->setAttribute("src", - rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("src"))); + $tmpxpath = new DOMXPath($r->dom); - } + $entries = $tmpxpath->query('(//a[@href]|//img[@src])'); - } + foreach ($entries as $entry) { + if ($entry->hasAttribute("href")) { + $entry->setAttribute("href", + rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("href"))); - $article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; } - } - } + if ($entry->hasAttribute("src")) { + $entry->setAttribute("src", + rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("src"))); - } + } + + } - $node = $doc->getElementsByTagName('body')->item(0); + $article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - if ($node && $found) { - $article["content"] = $doc->saveXML($node); + $doc = new DOMDocument(); + @$doc->loadHTML($article["content"]); + $xpath = new DOMXPath($doc); + + $found = $this->inline_stuff($article, $doc, $xpath); + } } + } + + } + + $node = $doc->getElementsByTagName('body')->item(0); + + if ($node && $found) { + $article["content"] = $doc->saveXML($node); + } } return $article; -- cgit v1.2.3