host->set($this, "similarity", $similarity); $this->host->set($this, "enable_globally", $enable_globally); echo $this->__("Data saved."); } function init($host) { $this->host = $host; $this->cache = DiskCache::instance("images"); Config::add("IMG_HASH_SQL_FUNCTION", ""); $migrations = new Db_Migrations(); $migrations->initialize_for_plugin($this); if ($migrations->migrate()) { $host->add_hook($host::HOOK_ARTICLE_FILTER, $this, 100); $host->add_hook($host::HOOK_PREFS_TAB, $this); $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this); $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this); $host->add_hook($host::HOOK_HOUSE_KEEPING, $this); $host->add_hook($host::HOOK_RENDER_ARTICLE, $this, 100); $host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this, 100); $host->add_hook($host::HOOK_RENDER_ARTICLE_API, $this, 100); $host->add_hook($host::HOOK_ARTICLE_IMAGE, $this, 100); } } function hook_prefs_tab($args) { if ($args != "prefFeeds") return; ?>
pdo->query("select 'unique_1bits'::regproc"); } catch (PDOException $e) { ; } if (empty($res) || !$res->fetch()) { print_error("Required function from count_bits extension not found."); } } } $similarity = (int) $this->host->get($this, "similarity", $this->default_similarity); $enable_globally = $this->host->get($this, "enable_globally"); ?>
__( "Lower Hamming distance value indicates images being more similar.") ?>

__( "Save")) ?>
filter_unknown_feeds( $this->host->get_array($this, "enabled_feeds")); $this->host->set($this, "enabled_feeds", $enabled_feeds); if (count($enabled_feeds) > 0) { ?>

host->get_array($this, "enabled_feeds"); ?>
__( "Similar images") ?>
host->get_array($this, "enabled_feeds"); $enable = checkbox_to_sql_bool($_POST["phash_similarity_enabled"] ?? ""); $key = array_search($feed_id, $enabled_feeds); if ($enable) { if ($key === false) { array_push($enabled_feeds, $feed_id); } } else { if ($key !== false) { unset($enabled_feeds[$key]); } } $this->host->set($this, "enabled_feeds", $enabled_feeds); } private function rewrite_duplicate(DOMDocument $doc, DOMElement $elem, bool $api_mode = false) : void { if ($elem->hasAttribute("src")) { $uri = UrlHelper::validate($elem->getAttribute("src")); $check_uri = $uri; } else if ($elem->hasAttribute("poster")) { $check_uri = UrlHelper::validate($elem->getAttribute("poster")); /** @var DOMElement|false */ $video_source = $elem->getElementsByTagName("source")->item(0); if ($video_source) { $uri = $video_source->getAttribute("src"); } } if (!empty($check_uri) && !empty($uri)) { if ($api_mode) { $a = $doc->createElement("a"); $a->appendChild(new DOMText(truncate_middle($uri, 48, "..."))); $a->setAttribute("href", $uri); $a->setAttribute("target", "_blank"); $a->setAttribute("rel", "noopener noreferrer"); $elem->parentNode->replaceChild($a, $elem); } else { $det = $doc->createElement("details"); $sum = $doc->createElement("summary"); $a = $doc->createElement("a"); $a->appendChild(new DOMText(truncate_middle($uri, 48, "..."))); $a->setAttribute("href", $uri); $a->setAttribute("target", "_blank"); $a->setAttribute("rel", "noopener noreferrer"); $sum->appendChild($a); $det->appendChild($sum); $a = $doc->createElement("a"); $a->setAttribute("href", "#"); $a->setAttribute("onclick", "Plugins.Af_Img_Phash.showSimilar(this)"); $a->setAttribute("data-check-url", UrlHelper::validate($check_uri)); $a->appendChild(new DOMText("(similar)")); $sum->appendChild(new DOMText(" ")); $sum->appendChild($a); $elem->parentNode->replaceChild($det, $elem); $det->appendChild($elem); } } } function hook_article_filter($article) { $enable_globally = $this->host->get($this, "enable_globally"); if (!$enable_globally) { if (!in_array($article["feed"]["id"], $this->host->get_array($this, "enabled_feeds"))) { return $article; } } $owner_uid = $article["owner_uid"]; $article_guid = $article["guid_hashed"]; $doc = new DOMDocument(); if (!empty($article["content"]) && @$doc->loadHTML($article["content"])) { $xpath = new DOMXPath($doc); $imgs = $xpath->query("//img[@src]|//video[@poster]"); foreach ($imgs as $img) { $src = $img->tagName == "video" ? $img->getAttribute("poster") : $img->getAttribute("src"); $src = UrlHelper::validate(UrlHelper::rewrite_relative($article["link"], $src)); Debug::log("phash: checking $src", Debug::LOG_VERBOSE); $sth = $this->pdo->prepare("SELECT id FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND url = ? LIMIT 1"); $sth->execute([$owner_uid, $src]); if ($sth->fetch()) { Debug::log("phash: url already stored, not processing", Debug::LOG_VERBOSE); continue; } else { Debug::log("phash: probing content type...", Debug::LOG_VERBOSE); $content_type = $this->get_content_type($src); Debug::log("phash: content type: $content_type", Debug::LOG_VERBOSE); if (strpos($content_type, "image/") === FALSE) { Debug::log("phash: received content type is not an image, marking as processed and skipping.", Debug::LOG_VERBOSE); $sth = $this->pdo->prepare("INSERT INTO ttrss_plugin_img_phash_urls (url, article_guid, owner_uid, phash) VALUES (?, ?, ?, ?)"); $sth->execute([$src, $article_guid, $owner_uid, '-1']); continue; } $cached_file = sha1($src); $cached_file_flag = "$cached_file.phash-flag"; if ($this->cache->is_writable()) { // check for .flag to prevent repeated failures or create it if ($this->cache->exists($cached_file_flag)) { Debug::log("phash: $cached_file_flag exists, looks like we failed on this URL before; skipping.", Debug::LOG_VERBOSE); continue; } else { $this->cache->put($cached_file_flag, ""); } // check for local cache if (!$this->cache->exists($cached_file)) { Debug::log("phash: downloading URL...", Debug::LOG_VERBOSE); $data = UrlHelper::fetch(["url" => $src, "max_size" => Config::get(Config::MAX_CACHE_FILE_SIZE)]); if ($data) { $this->cache->put($cached_file, $data); } } if ($this->cache->exists($cached_file)) { Debug::log("phash: using local cache...", Debug::LOG_VERBOSE); $implementation = new PerceptualHash(); $hasher = new ImageHash($implementation); $hash = (string)$hasher->hash($this->cache->get_full_path($cached_file)); Debug::log("phash: calculated perceptual hash: $hash", Debug::LOG_VERBOSE); // we managed to process this image, it should be safe to remove the flag now $this->cache->remove($cached_file_flag); if ($hash) { $hash = base_convert($hash, 16, 10); if (PHP_INT_SIZE > 4) { while ($hash > PHP_INT_MAX) { $bitstring = base_convert($hash, 10, 2); $bitstring = substr($bitstring, 1); $hash = base_convert($bitstring, 2, 10); } } $sth = $this->pdo->prepare("INSERT INTO ttrss_plugin_img_phash_urls (url, article_guid, owner_uid, phash) VALUES (?, ?, ?, ?)"); $sth->execute([$src, $article_guid, $owner_uid, $hash]); } } } else { Debug::log("phash: cache directory is not writable", Debug::LOG_VERBOSE); return $article; } } } } return $article; } function api_version() { return 2; } /** * @param array $enabled_feeds * @return array * @throws PDOException */ private function filter_unknown_feeds(array $enabled_feeds) : array { $tmp = array(); foreach ($enabled_feeds as $feed) { $sth = $this->pdo->prepare("SELECT id FROM ttrss_feeds WHERE id = ? AND owner_uid = ?"); $sth->execute([$feed, $_SESSION['uid']]); if ($row = $sth->fetch()) { array_push($tmp, $feed); } } return $tmp; } function hook_render_article($article) { return $this->_hook_render_article_cdm($article, false); } function hook_render_article_api($row) { $article = isset($row['headline']) ? $row['headline'] : $row['article']; return $this->_hook_render_article_cdm($article, true); } function hook_article_image($enclosures, $content, $site_url, $article) { // fake guid because of further checking in hook_render_article_cdm() which we don't need here $article = $this->_hook_render_article_cdm(["guid" => time(), "content" => $content], true); return ["", "", $article["content"]]; } /** we can't freely screw around with hook argument lists anymore * @param array $article * @param bool $api_mode * @return array * @throws PDOException */ private function _hook_render_article_cdm($article, $api_mode) { /* if (Config::get(Config::DB_TYPE) == "pgsql" && !Config::get("IMG_HASH_SQL_FUNCTION")) { try { $res = $this->pdo->query("select 'unique_1bits'::regproc"); } catch (PDOException $e) { ; } if (empty($res) || !$res->fetch()) return $article; } */ $owner_uid = $_SESSION["uid"]; $similarity = (int) $this->host->get($this, "similarity", $this->default_similarity); $doc = new DOMDocument(); $article_guid = ($article["guid"] ?? false); $need_saving = false; if (!empty($article_guid) && !empty($article["content"]) && @$doc->loadHTML($article["content"])) { $xpath = new DOMXPath($doc); $imgs = $xpath->query("//img[@src]|//video[@poster]"); foreach ($imgs as $img) { $src = $img->tagName == "video" ? $img->getAttribute("poster") : $img->getAttribute("src"); $src = UrlHelper::validate(UrlHelper::rewrite_relative($article["link"] ?? "", $src)); // check for URL duplicates first $sth = $this->pdo->prepare("SELECT id FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND url = ? AND article_guid != ? LIMIT 1"); $sth->execute([$owner_uid, $src, $article_guid]); if ($sth->fetch()) { $need_saving = true; $this->rewrite_duplicate($doc, $img, $api_mode); continue; } // check using perceptual hash duplicates $sth = $this->pdo->prepare("SELECT phash FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND phash != -1 AND url = ? LIMIT 1"); $sth->execute([$owner_uid, $src]); if ($row = $sth->fetch()) { $phash = $row['phash']; //$similarity = 15; $sth = $this->pdo->prepare("SELECT article_guid FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND created_at >= ".$this->interval_days($this->data_max_age)." AND ".$this->bitcount_func($phash)." <= ? ORDER BY created_at LIMIT 1"); $sth->execute([$owner_uid, $similarity]); if ($row = $sth->fetch()) { $test_guid = $row['article_guid']; if ($test_guid != $article_guid) { $need_saving = true; $this->rewrite_duplicate($doc, $img, $api_mode); } } } } } if ($need_saving) $article["content"] = $doc->saveXML(); return $article; } function hook_render_article_cdm($article) { return $this->_hook_render_article_cdm($article, false); } function hook_house_keeping() { $this->pdo->query("DELETE FROM ttrss_plugin_img_phash_urls WHERE created_at < ".$this->interval_days($this->data_max_age)); } private function guid_to_article_title(string $article_guid, int $owner_uid) : string { $sth = $this->pdo->prepare("SELECT feed_id, title, updated FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND guid = ? AND owner_uid = ?"); $sth->execute([$article_guid, $owner_uid]); if ($row = $sth->fetch()) { $article_title = htmlspecialchars($row["title"]); $feed_id = $row["feed_id"]; $updated = $row["updated"]; $article_title = $this->T_sprintf("%s (%s) %s", "$article_title", TimeHelper::make_local_datetime($updated, true), "rss_feed"); } else { $article_title = "N/A ($article_guid)"; } return $article_title; } function showsimilar() : void { $url = $_REQUEST["url"]; $owner_uid = $_SESSION["uid"]; $similarity = (int) $this->host->get($this, "similarity", $this->default_similarity); ?>
pdo->prepare("SELECT phash FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND phash != -1 AND url = ? LIMIT 1"); $sth->execute([$owner_uid, $url]); if ($row = $sth->fetch()) { $phash = $row['phash']; $sth = $this->pdo->prepare("SELECT article_guid, ".SUBSTRING_FOR_DATE."(created_at,1,19) AS created_at FROM ttrss_plugin_img_phash_urls WHERE owner_uid = ? AND created_at >= ".$this->interval_days($this->data_max_age)." AND ".$this->bitcount_func($phash)." <= ? ORDER BY created_at LIMIT 1"); $sth->execute([$owner_uid, $similarity]); if ($row = $sth->fetch()) { $article_guid = $row['article_guid']; $article_title = $this->guid_to_article_title($article_guid, $owner_uid); $created_at = $row['created_at']; ?>
    pdo->prepare("SELECT url, article_guid, ".$this->bitcount_func($phash)." AS distance FROM ttrss_plugin_img_phash_urls WHERE ".$this->bitcount_func($phash)." <= ? ORDER BY distance LIMIT 30"); $sth->execute([$similarity]); while ($line = $sth->fetch()) { $url = htmlspecialchars($line["url"]); $distance = $line["distance"]; $rel_article_guid = $line["article_guid"]; $article_title = $this->guid_to_article_title($rel_article_guid, $owner_uid); $is_checked = ($rel_article_guid == $article_guid) ? "checked" : ""; ?>
  • (T_sprintf("Distance: %d", $distance) ?>) (__( "Original") ?>)
" . $this->__( "No information found for this URL.") . ""; } } else { print "
" . $this->__( "No information found for this URL.") . "
"; } ?>
get_header($url, CURLINFO_CONTENT_TYPE, $useragent); } }