Mercurial > prosody-modules
changeset 5571:ca3c2d11823c
mod_pubsub_feeds: Track latest timestamp seen in feeds instead of last poll
This should ensure that an entry that has a publish timestmap after the
previously oldest post, but before the time of the last poll check, is
published to the node.
Previously if an entry would be skipped if it was published at 13:00
with a timestamp of 12:30, where the last poll was at 12:45.
For feeds that lack a timestamp, it now looks for the first post that is
not published, assuming that the feed is in reverse chronological order,
then iterates back up from there.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Sun, 25 Jun 2023 16:27:55 +0200 |
parents | f93b1fc1aa31 |
children | fd1c535dcb92 |
files | mod_pubsub_feeds/mod_pubsub_feeds.lua |
diffstat | 1 files changed, 39 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/mod_pubsub_feeds/mod_pubsub_feeds.lua Sun Jun 25 16:24:12 2023 +0200 +++ b/mod_pubsub_feeds/mod_pubsub_feeds.lua Sun Jun 25 16:27:55 2023 +0200 @@ -98,36 +98,50 @@ end items = {}; end - for i = #entries, 1, -1 do -- Feeds are usually in reverse order + + local start_from = #entries; + for i, entry in ipairs(entries) do + local id = entry:get_child_text("id"); + if not id then + local link = entry:get_child("link"); + if link then + module:log("debug", "Feed %q item %s is missing an id, using <link> instead", feed.url, entry:top_tag()); + id = link and link.attr.href; + else + module:log("debug", "Feed %q item %s is missing an id, using a HMAC of the item instead", feed.url, entry:top_tag()); + id = feed.url .. "#" .. hmac_sha1(feed.url, tostring(entry), true) .. "@" .. dt_datetime(timestamp); + end + entry:text_tag("id", id); + end + + if items[id] then + -- This should be the first item that we already have. + start_from = i-1; + break + end + end + + for i = start_from, 1, -1 do -- Feeds are usually in reverse order local entry = entries[i]; entry.attr.xmlns = xmlns_atom; - local e_published = entry:get_child_text("published"); - e_published = e_published and dt_parse(e_published); - local e_updated = entry:get_child_text("updated"); - e_updated = e_updated and dt_parse(e_updated); + local id = entry:get_child_text("id"); - local timestamp = e_updated or e_published or nil; - --module:log("debug", "timestamp is %s, item.last_update is %s", tostring(timestamp), tostring(item.last_update)); + local timestamp = dt_parse(entry:get_child_text("published")); + if not timestamp then + timestamp = time(); + entry:text_tag("published", dt_datetime(timestamp)); + end + if not timestamp or not item.last_update or timestamp > item.last_update then - local id = entry:get_child_text("id"); - if not id then - local link = entry:get_child("link"); - id = link and link.attr.href; - end - if not id then - -- Sigh, no link? - id = feed.url .. "#" .. hmac_sha1(feed.url, tostring(entry), true) .. "@" .. dt_datetime(timestamp); - end - if not items[id] then - local xitem = st.stanza("item", { id = id, xmlns = "http://jabber.org/protocol/pubsub" }):add_child(entry); - -- TODO Put data from /feed into item/source + local xitem = st.stanza("item", { id = id, xmlns = "http://jabber.org/protocol/pubsub" }):add_child(entry); + -- TODO Put data from /feed into item/source - --module:log("debug", "publishing to %s, id %s", node, id); - local ok, err = pubsub.service:publish(node, true, id, xitem); - if not ok then - module:log("error", "Publishing to node %s failed: %s", node, err); - end + local ok, err = pubsub.service:publish(node, true, id, xitem); + if not ok then + module:log("error", "Publishing to node %s failed: %s", node, err); + elseif timestamp then + item.last_update = timestamp; end end end @@ -157,12 +171,11 @@ if code == 200 then item.data = data; if callback then callback(item) end - item.last_update = time(); if resp.headers then item.etag = resp.headers.etag end elseif code == 304 then - item.last_update = time(); + module:log("debug", "No updates to %q", item.url); elseif code == 301 and resp.headers.location then module:log("info", "Feed %q has moved to %q", item.url, resp.headers.location); elseif code <= 100 then @@ -271,7 +284,6 @@ end feed.data = body; update_entry(feed); - feed.last_update = time(); return 202; end return 400;