diff options
author | Andrew Dolgov <[email protected]> | 2012-07-09 19:49:48 +0400 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2012-07-09 19:49:48 +0400 |
commit | 2ea9bbfd075fc03acceb608746c3989613f768e3 (patch) | |
tree | 5b98f9e579a2fbe4352e8d5eaa21b61286373684 /include | |
parent | 687bb90d27b522b2a35263db9e1cd2ba53b41e74 (diff) |
implement proof of concept postgresql pg_trgm N-gram based marking of similar article titles as read automatically
Diffstat (limited to 'include')
-rw-r--r-- | include/functions.php | 2 | ||||
-rw-r--r-- | include/rssfuncs.php | 21 |
2 files changed, 23 insertions, 0 deletions
diff --git a/include/functions.php b/include/functions.php index 1e527e70d..b14515bdd 100644 --- a/include/functions.php +++ b/include/functions.php @@ -3302,6 +3302,8 @@ </head><body>"; } + $rv['title'] = $line['title']; + $rv['content'] .= "<div id=\"PTITLE-$id\" style=\"display : none\">" . truncate_string(strip_tags($line['title']), 15) . "</div>"; diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 12c4a57d2..59fa3d547 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -979,6 +979,27 @@ $published = 'false'; } + // N-grams + + if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { + + $result = db_query($link, "SELECT COUNT(*) AS similar FROM + ttrss_entries,ttrss_user_entries + WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day' + AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD." + AND owner_uid = $owner_uid"); + + $ngram_similar = db_fetch_result($result, 0, "similar"); + + if ($debug_enabled) { + _debug("update_rss_feed: N-gram similar results: $ngram_similar"); + } + + if ($ngram_similar > 0) { + $unread = 'false'; + } + } + $result = db_query($link, "INSERT INTO ttrss_user_entries (ref_id, owner_uid, feed_id, unread, last_read, marked, |