annotate mod_pubsub_feed/mod_pubsub_feed.lua @ 299:801066bf5793

mod_pubsub_feed: Fix detection of updated posts
author Kim Alvefur <zash@zash.se>
date Sun, 26 Dec 2010 18:59:13 +0100
parents aa0df3db4901
children b81e4f86a231
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
1 -- Fetches Atom feeds and publishes to PubSub nodes
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
2 --
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
3 -- Config:
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
4 -- Component "pubsub.example.com" "pubsub"
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
5 -- modules_enabled = {
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
6 -- "pubsub_feed";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
7 -- }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
8 -- feeds = { -- node -> url
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
9 -- prosody_blog = "http://blog.prosody.im/feed/atom.xml";
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
10 -- }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
11 -- feed_pull_interval = 20 -- minutes
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
12
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
13 local modules = hosts[module.host].modules;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
14 if not modules.pubsub then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
15 module:log("warn", "Pubsub needs to be loaded on this host");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
16 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
17 local add_task = require "util.timer".add_task;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
18 local date, time = os.date, os.time;
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
19 local dt_parse, dt_datetime = require "util.datetime".parse, require "util.datetime".datetime;
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
20 local http = require "net.http";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
21 local parse_feed = require "feeds".feed_from_string;
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
22 local st = require "util.stanza";
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
23
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
24 local config = module:get_option("feeds") or {
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
25 planet_jabber = "http://planet.jabber.org/atom.xml";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
26 prosody_blog = "http://blog.prosody.im/feed/atom.xml";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
27 };
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
28 local refresh_interval = (module:get_option("feed_pull_interval") or 15) * 60;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
29 local feed_list = { }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
30 for node, url in pairs(config) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
31 feed_list[node] = { url = url };
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
32 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
33
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
34 local function update(item, callback)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
35 local headers = { };
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
36 if item.data and item.last_update then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
37 headers["If-Modified-Since"] = date("!%a, %d %b %Y %T %Z", item.last_update);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
38 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
39 http.request(item.url, {headers = headers}, function(data, code, req)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
40 if code == 200 then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
41 item.data = data;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
42 callback(item)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
43 item.last_update = time();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
44 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
45 if code == 304 then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
46 item.last_update = time();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
47 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
48 end);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
49 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
50
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
51 local actor = module.host.."/"..module.name;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
52
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
53 local function refresh_feeds()
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
54 for node, item in pairs(feed_list) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
55 update(item, function(item)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
56 local feed = parse_feed(item.data);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
57 module:log("debug", "node: %s", node);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
58 for _, entry in ipairs(feed) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
59 entry.attr.xmlns = "http://www.w3.org/2005/Atom";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
60
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
61 local e_published = entry:get_child("published");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
62 e_published = e_published and e_published[1];
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
63 e_published = e_published and dt_parse(e_published);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
64 local e_updated = entry:get_child("updated");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
65 e_updated = e_updated and e_updated[1];
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
66 e_updated = e_updated and dt_parse(e_updated);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
67
299
801066bf5793 mod_pubsub_feed: Fix detection of updated posts
Kim Alvefur <zash@zash.se>
parents: 279
diff changeset
68 local timestamp = e_updated or e_published or nil;
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
69 module:log("debug", "timestamp is %s, item.last_update is %s", tostring(timestamp), tostring(item.last_update));
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
70 if not timestamp or not item.last_update or timestamp > item.last_update then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
71 local id = entry:get_child("id");
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
72 id = id[1] or item.url.."#"..dt_datetime(timestamp); -- Missing id, so make one up
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
73 local item = st.stanza("item", { id = id }):add_child(entry);
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
74
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
75 module:log("debug", "publishing to %s, id %s", node, id);
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
76 modules.pubsub.service:publish(node, actor, id, item)
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
77 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
78 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
79 end);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
80 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
81 return refresh_interval;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
82 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
83
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
84 function init()
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
85 add_task(0, refresh_feeds);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
86 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
87
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
88 if prosody.start_time then -- already started
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
89 init();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
90 else
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
91 prosody.events.add_handler("server-started", init);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
92 end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
93