Mercurial > prosody-modules
comparison mod_pubsub_feeds/mod_pubsub_feeds.lua @ 5571:ca3c2d11823c
mod_pubsub_feeds: Track latest timestamp seen in feeds instead of last poll
This should ensure that an entry that has a publish timestmap after the
previously oldest post, but before the time of the last poll check, is
published to the node.
Previously if an entry would be skipped if it was published at 13:00
with a timestamp of 12:30, where the last poll was at 12:45.
For feeds that lack a timestamp, it now looks for the first post that is
not published, assuming that the feed is in reverse chronological order,
then iterates back up from there.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Sun, 25 Jun 2023 16:27:55 +0200 |
parents | f93b1fc1aa31 |
children | fd1c535dcb92 |
comparison
equal
deleted
inserted
replaced
5570:f93b1fc1aa31 | 5571:ca3c2d11823c |
---|---|
96 module:log("error", "Could not create node %s: %s", node, err); | 96 module:log("error", "Could not create node %s: %s", node, err); |
97 return; | 97 return; |
98 end | 98 end |
99 items = {}; | 99 items = {}; |
100 end | 100 end |
101 for i = #entries, 1, -1 do -- Feeds are usually in reverse order | 101 |
102 local start_from = #entries; | |
103 for i, entry in ipairs(entries) do | |
104 local id = entry:get_child_text("id"); | |
105 if not id then | |
106 local link = entry:get_child("link"); | |
107 if link then | |
108 module:log("debug", "Feed %q item %s is missing an id, using <link> instead", feed.url, entry:top_tag()); | |
109 id = link and link.attr.href; | |
110 else | |
111 module:log("debug", "Feed %q item %s is missing an id, using a HMAC of the item instead", feed.url, entry:top_tag()); | |
112 id = feed.url .. "#" .. hmac_sha1(feed.url, tostring(entry), true) .. "@" .. dt_datetime(timestamp); | |
113 end | |
114 entry:text_tag("id", id); | |
115 end | |
116 | |
117 if items[id] then | |
118 -- This should be the first item that we already have. | |
119 start_from = i-1; | |
120 break | |
121 end | |
122 end | |
123 | |
124 for i = start_from, 1, -1 do -- Feeds are usually in reverse order | |
102 local entry = entries[i]; | 125 local entry = entries[i]; |
103 entry.attr.xmlns = xmlns_atom; | 126 entry.attr.xmlns = xmlns_atom; |
104 | 127 |
105 local e_published = entry:get_child_text("published"); | 128 local id = entry:get_child_text("id"); |
106 e_published = e_published and dt_parse(e_published); | 129 |
107 local e_updated = entry:get_child_text("updated"); | 130 local timestamp = dt_parse(entry:get_child_text("published")); |
108 e_updated = e_updated and dt_parse(e_updated); | 131 if not timestamp then |
109 | 132 timestamp = time(); |
110 local timestamp = e_updated or e_published or nil; | 133 entry:text_tag("published", dt_datetime(timestamp)); |
111 --module:log("debug", "timestamp is %s, item.last_update is %s", tostring(timestamp), tostring(item.last_update)); | 134 end |
135 | |
112 if not timestamp or not item.last_update or timestamp > item.last_update then | 136 if not timestamp or not item.last_update or timestamp > item.last_update then |
113 local id = entry:get_child_text("id"); | 137 local xitem = st.stanza("item", { id = id, xmlns = "http://jabber.org/protocol/pubsub" }):add_child(entry); |
114 if not id then | 138 -- TODO Put data from /feed into item/source |
115 local link = entry:get_child("link"); | 139 |
116 id = link and link.attr.href; | 140 local ok, err = pubsub.service:publish(node, true, id, xitem); |
117 end | 141 if not ok then |
118 if not id then | 142 module:log("error", "Publishing to node %s failed: %s", node, err); |
119 -- Sigh, no link? | 143 elseif timestamp then |
120 id = feed.url .. "#" .. hmac_sha1(feed.url, tostring(entry), true) .. "@" .. dt_datetime(timestamp); | 144 item.last_update = timestamp; |
121 end | |
122 if not items[id] then | |
123 local xitem = st.stanza("item", { id = id, xmlns = "http://jabber.org/protocol/pubsub" }):add_child(entry); | |
124 -- TODO Put data from /feed into item/source | |
125 | |
126 --module:log("debug", "publishing to %s, id %s", node, id); | |
127 local ok, err = pubsub.service:publish(node, true, id, xitem); | |
128 if not ok then | |
129 module:log("error", "Publishing to node %s failed: %s", node, err); | |
130 end | |
131 end | 145 end |
132 end | 146 end |
133 end | 147 end |
134 | 148 |
135 if item.lease_expires and item.lease_expires > time() then | 149 if item.lease_expires and item.lease_expires > time() then |
155 end | 169 end |
156 http.request(item.url, { headers = headers }, function(data, code, resp) | 170 http.request(item.url, { headers = headers }, function(data, code, resp) |
157 if code == 200 then | 171 if code == 200 then |
158 item.data = data; | 172 item.data = data; |
159 if callback then callback(item) end | 173 if callback then callback(item) end |
160 item.last_update = time(); | |
161 if resp.headers then | 174 if resp.headers then |
162 item.etag = resp.headers.etag | 175 item.etag = resp.headers.etag |
163 end | 176 end |
164 elseif code == 304 then | 177 elseif code == 304 then |
165 item.last_update = time(); | 178 module:log("debug", "No updates to %q", item.url); |
166 elseif code == 301 and resp.headers.location then | 179 elseif code == 301 and resp.headers.location then |
167 module:log("info", "Feed %q has moved to %q", item.url, resp.headers.location); | 180 module:log("info", "Feed %q has moved to %q", item.url, resp.headers.location); |
168 elseif code <= 100 then | 181 elseif code <= 100 then |
169 module:log("error", "Error fetching %q: %q[%d]", item.url, data, code); | 182 module:log("error", "Error fetching %q: %q[%d]", item.url, data, code); |
170 else | 183 else |
269 end | 282 end |
270 module:log("debug", "Valid signature"); | 283 module:log("debug", "Valid signature"); |
271 end | 284 end |
272 feed.data = body; | 285 feed.data = body; |
273 update_entry(feed); | 286 update_entry(feed); |
274 feed.last_update = time(); | |
275 return 202; | 287 return 202; |
276 end | 288 end |
277 return 400; | 289 return 400; |
278 end | 290 end |
279 return 501; | 291 return 501; |