From 2ea9bbfd075fc03acceb608746c3989613f768e3 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 9 Jul 2012 19:49:48 +0400 Subject: implement proof of concept postgresql pg_trgm N-gram based marking of similar article titles as read automatically --- include/rssfuncs.php | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/rssfuncs.php') diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 12c4a57d2..59fa3d547 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -979,6 +979,27 @@ $published = 'false'; } + // N-grams + + if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { + + $result = db_query($link, "SELECT COUNT(*) AS similar FROM + ttrss_entries,ttrss_user_entries + WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day' + AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD." + AND owner_uid = $owner_uid"); + + $ngram_similar = db_fetch_result($result, 0, "similar"); + + if ($debug_enabled) { + _debug("update_rss_feed: N-gram similar results: $ngram_similar"); + } + + if ($ngram_similar > 0) { + $unread = 'false'; + } + } + $result = db_query($link, "INSERT INTO ttrss_user_entries (ref_id, owner_uid, feed_id, unread, last_read, marked, -- cgit v1.2.3