summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2012-07-09 19:49:48 +0400
committerAndrew Dolgov <[email protected]>2012-07-09 19:49:48 +0400
commit2ea9bbfd075fc03acceb608746c3989613f768e3 (patch)
tree5b98f9e579a2fbe4352e8d5eaa21b61286373684 /include
parent687bb90d27b522b2a35263db9e1cd2ba53b41e74 (diff)
implement proof of concept postgresql pg_trgm N-gram based marking of similar article titles as read automatically
Diffstat (limited to 'include')
-rw-r--r--include/functions.php2
-rw-r--r--include/rssfuncs.php21
2 files changed, 23 insertions, 0 deletions
diff --git a/include/functions.php b/include/functions.php
index 1e527e70d..b14515bdd 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -3302,6 +3302,8 @@
</head><body>";
}
+ $rv['title'] = $line['title'];
+
$rv['content'] .= "<div id=\"PTITLE-$id\" style=\"display : none\">" .
truncate_string(strip_tags($line['title']), 15) . "</div>";
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 12c4a57d2..59fa3d547 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -979,6 +979,27 @@
$published = 'false';
}
+ // N-grams
+
+ if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
+
+ $result = db_query($link, "SELECT COUNT(*) AS similar FROM
+ ttrss_entries,ttrss_user_entries
+ WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'
+ AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD."
+ AND owner_uid = $owner_uid");
+
+ $ngram_similar = db_fetch_result($result, 0, "similar");
+
+ if ($debug_enabled) {
+ _debug("update_rss_feed: N-gram similar results: $ngram_similar");
+ }
+
+ if ($ngram_similar > 0) {
+ $unread = 'false';
+ }
+ }
+
$result = db_query($link,
"INSERT INTO ttrss_user_entries
(ref_id, owner_uid, feed_id, unread, last_read, marked,