From b82af8c382441bddfb9fccf5828ff7e7c61682ac Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 23 Aug 2005 06:46:48 +0100 Subject: use transactions in feed update process, better handle feeds without entry dates (schema updated) --- functions.php | 67 +++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 22 deletions(-) (limited to 'functions.php') diff --git a/functions.php b/functions.php index ef3bfa98e..36f191e83 100644 --- a/functions.php +++ b/functions.php @@ -3,6 +3,8 @@ function update_all_feeds($link, $fetch) { + pg_query("BEGIN"); + if (!$fetch) { $result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE @@ -21,6 +23,8 @@ $num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]); } + pg_query("COMMIT"); + } function update_rss_feed($link, $feed_url, $feed) { @@ -30,13 +34,14 @@ $num_unread = 0; if ($rss) { - + + pg_query("BEGIN"); + $result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'"); $registered_title = pg_fetch_result($result, 0, "title"); if (!$registered_title) { - $feed_title = $rss->channel["title"]; pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'"); } @@ -49,15 +54,22 @@ if (!$entry_guid) $entry_guid = $item["link"]; $entry_timestamp = ""; - + $rss_2_date = $item['pubdate']; $rss_1_date = $item['dc']['date']; $atom_date = $item['issued']; + $no_orig_date = 'false'; + if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date); if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date); if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date); - if ($entry_timestamp == "") $entry_timestamp = 0; +// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date); + + if ($entry_timestamp == "") { + $entry_timestamp = time(); + $no_orig_date = 'true'; + } if (!$entry_timestamp) continue; @@ -77,7 +89,7 @@ $result = pg_query($link, " SELECT - id,unread,md5_hash,last_read, + id,unread,md5_hash,last_read,no_orig_date,title, EXTRACT(EPOCH FROM updated) as updated_timestamp FROM ttrss_entries @@ -89,11 +101,12 @@ $entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); $query = "INSERT INTO ttrss_entries - (title, guid, link, updated, content, feed_id, md5_hash) + (title, guid, link, updated, content, feed_id, + md5_hash, no_orig_date) VALUES ('$entry_title', '$entry_guid', '$entry_link', '$entry_timestamp', '$entry_content', '$feed', - '$content_md5')"; + '$content_md5', $no_orig_date)"; $result = pg_query($link, $query); @@ -108,33 +121,41 @@ $unread = pg_fetch_result($result, 0, "unread"); $md5_hash = pg_fetch_result($result, 0, "md5_hash"); - -// if ($md5_hash != $content_md5 && CONTENT_CHECK_MD5) -// $unread = "true"; + $no_orig_date = pg_fetch_result($result, 0, "no_orig_date"); + $orig_title = pg_fetch_result($result, 0, "title"); - if (!$last_read || $md5_hash != $content_md5) { - $last_read = 'null'; + // disable update detection for posts which didn't have correct + // publishment date, because they will always register as updated + // sadly this doesn't catch feed generators which input current date + // in posts all the time (some planets do this) + + if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) { + $last_read_qpart = 'last_read = null,'; } else { - $last_read = "'$last_read'"; + $last_read_qpart = ''; } -// if ($unread || !CONTENT_CHECK_MD5) { -// $updated_query_part = "updated = '$entry_timestamp',"; -// } + // mark post as updated on title change + // maybe we should mark it as unread instead? -// if ($updated_timestamp > $entry_timestamp) { -// $unread = "true"; -// print "$updated_timestamp : $entry_timestamp
"; -// } + if ($orig_title != $entry_title) { + $last_read_qpart = 'last_read = null,'; + } + + // don't bother updating timestamps on posts with broken pubDate + + if ($no_orig_date != 't') { + $update_timestamp_qpart = "updated = '$entry_timestamp_fmt',"; + } $query = "UPDATE ttrss_entries SET title ='$entry_title', link = '$entry_link', - updated = '$entry_timestamp_fmt', + $update_timestamp_qpart + $last_read_qpart content = '$entry_content', md5_hash = '$content_md5', - last_read = $last_read, unread = '$unread' WHERE id = '$entry_id'"; @@ -151,6 +172,8 @@ $result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()"); } + pg_query("COMMIT"); + } } -- cgit v1.2.3