summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2005-08-23 11:51:29 +0100
committerAndrew Dolgov <[email protected]>2005-08-23 11:51:29 +0100
commit466001c4fabb43a86b4366b2790169b5377f7d51 (patch)
treec3e668e507c27d92adc445fafdeec8591be77568
parentcb0bd8bd0fc516a51867041a20909a56f631e20e (diff)
cleaned up headline update process, fixed bug mentioned in previous patch
-rw-r--r--functions.php123
-rw-r--r--ttrss_schema.sql4
2 files changed, 68 insertions, 59 deletions
diff --git a/functions.php b/functions.php
index 266e38846..f97e958df 100644
--- a/functions.php
+++ b/functions.php
@@ -17,10 +17,8 @@
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds");
}
- $num_unread = 0;
-
while ($line = pg_fetch_assoc($result)) {
- $num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
+ update_rss_feed($link, $line["feed_url"], $line["id"]);
}
pg_query("COMMIT");
@@ -33,7 +31,6 @@
$rss = fetch_rss($feed_url);
error_reporting (E_ERROR | E_WARNING | E_PARSE);
- $num_unread = 0;
if ($rss) {
@@ -46,14 +43,14 @@
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
}
- pg_query("BEGIN");
-
foreach ($rss->items as $item) {
$entry_guid = $item["id"];
if (!$entry_guid) $entry_guid = $item["guid"];
if (!$entry_guid) $entry_guid = $item["link"];
+
+ if (!$entry_guid) continue;
$entry_timestamp = "";
@@ -61,19 +58,18 @@
$rss_1_date = $item['dc']['date'];
$atom_date = $item['issued'];
- $no_orig_date = 'false';
-
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
-// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
if ($entry_timestamp == "") {
$entry_timestamp = time();
$no_orig_date = 'true';
+ } else {
+ $no_orig_date = 'false';
}
- if (!$entry_timestamp) continue;
+ $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
$entry_title = $item["title"];
$entry_link = $item["link"];
@@ -82,91 +78,102 @@
if (!$entry_link) continue;
$entry_content = $item["description"];
+ if (!$entry_content) $entry_content = $item["content:escaped"];
if (!$entry_content) $entry_content = $item["content"];
if (!$entry_content) continue;
- $entry_content = pg_escape_string($entry_content);
- $entry_title = pg_escape_string($entry_title);
-
- $content_md5 = md5(strip_tags($entry_content));
+ $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
$result = pg_query($link, "
SELECT
- id,unread,md5_hash,last_read,no_orig_date,title,
+ id,last_read,no_orig_date,title,feed_id,content_hash,
EXTRACT(EPOCH FROM updated) as updated_timestamp
FROM
ttrss_entries
WHERE
guid = '$entry_guid'");
-
+
if (pg_num_rows($result) == 0) {
-
- $entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
-
- $query = "INSERT INTO ttrss_entries
- (title, guid, link, updated, content, feed_id,
- md5_hash, no_orig_date)
+
+ $entry_content = pg_escape_string($entry_content);
+ $entry_title = pg_escape_string($entry_title);
+ $entry_link = pg_escape_string($entry_link);
+
+ $query = "INSERT
+ INTO ttrss_entries
+ (title,
+ guid,
+ link,
+ updated,
+ content,
+ content_hash,
+ feed_id,
+ no_orig_date)
VALUES
- ('$entry_title', '$entry_guid', '$entry_link',
- '$entry_timestamp', '$entry_content', '$feed',
- '$content_md5', $no_orig_date)";
-
+ ('$entry_title',
+ '$entry_guid',
+ '$entry_link',
+ '$entry_timestamp_fmt',
+ '$entry_content',
+ '$content_hash',
+ '$feed',
+ $no_orig_date)";
+
$result = pg_query($link, $query);
- if ($result) ++$num_unread;
-
} else {
-
- $entry_id = pg_fetch_result($result, 0, "id");
- $updated_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
- $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
- $last_read = pg_fetch_result($result, 0, "last_read");
-
- $unread = pg_fetch_result($result, 0, "unread");
- $md5_hash = pg_fetch_result($result, 0, "md5_hash");
- $no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
+
+ $orig_entry_id = pg_fetch_result($result, 0, "id");
+ $orig_feed_id = pg_fetch_result($result, 0, "feed_id");
+
+ if ($orig_feed_id != $feed) {
+// print "<p>Update from different feed ($orig_feed_id, $feed): $entry_guid [$entry_title]";
+ continue;
+ }
+
+ $orig_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
+ $orig_content_hash = pg_fetch_result($result, 0, "content_hash");
+ $orig_last_read = pg_fetch_result($result, 0, "last_read");
+ $orig_no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
$orig_title = pg_fetch_result($result, 0, "title");
- if ($content_md5 != $md5_hash) {
- $update_md5_qpart = "md5_hash = '$content_md5',";
+ if ($orig_title != $entry_title) {
+ $last_read_qpart = 'last_read = null,';
+ }
+
+ if ($orig_content_hash != $content_hash) {
$last_read_qpart = 'last_read = null,';
- $update_content_qpart = "content = '$entry_content',";
}
- if ($orig_title != $entry_title) {
- print "[$orig_title] : [$entry_title]";
- $entry_title_qpart = "title ='$entry_title',";
+ if ($orig_timestamp < $entry_timestamp) {
+ $last_read_qpart = 'last_read = null,';
}
+ $entry_content = pg_escape_string($entry_content);
+ $entry_title = pg_escape_string($entry_title);
+ $entry_link = pg_escape_string($entry_link);
+
$query = "UPDATE ttrss_entries
SET
- $entry_title_qpart
+ $last_read_qpart
+ title = '$entry_title',
link = '$entry_link',
- $update_timestamp_qpart
- $last_read_qpart
- $update_md5_qpart
- $update_content_qpart
- unread = '$unread'
+ updated = '$entry_timestamp_fmt',
+ content = '$entry_content',
+ content_hash = '$content_hash'
WHERE
- id = '$entry_id'";
-
- print "<pre>".htmlspecialchars($query)."</pre>";
+ id = '$orig_entry_id'";
$result = pg_query($link, $query);
-
- if ($result) ++$num_unread;
-
- }
+ }
}
if ($result) {
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
}
- pg_query("COMMIT");
-
}
}
diff --git a/ttrss_schema.sql b/ttrss_schema.sql
index e926ef226..26ab01b27 100644
--- a/ttrss_schema.sql
+++ b/ttrss_schema.sql
@@ -10,6 +10,8 @@ insert into ttrss_feeds (title,feed_url) values ('Footnotes', 'http://gnomedeskt
insert into ttrss_feeds (title,feed_url) values ('Freedesktop.org', 'http://planet.freedesktop.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Planet Debian', 'http://planet.debian.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Planet GNOME', 'http://planet.gnome.org/rss20.xml');
+insert into ttrss_feeds (title,feed_url) values ('Planet Ubuntu', 'http://planet.ubuntulinux.org/rss20.xml');
+
insert into ttrss_feeds (title,feed_url) values ('Monologue', 'http://www.go-mono.com/monologue/index.rss');
insert into ttrss_feeds (title,feed_url) values ('Latest Linux Kernel Versions',
@@ -30,8 +32,8 @@ create table ttrss_entries (id serial not null primary key,
title varchar(250) not null,
guid varchar(300) not null unique,
link varchar(300) not null unique,
- md5_hash varchar(200) not null,
content text not null,
+ content_hash varchar(250) not null,
last_read timestamp,
no_orig_date boolean not null default false,
unread boolean not null default true);