changeset 2132:b149ea428b81

mod_pubsub_feeds: Switch to use util.xml for parsing feeds and include RSS to Atom translation code from lua-feeds
author Kim Alvefur <zash@zash.se>
date Sun, 20 Mar 2016 12:32:45 +0100
parents ba42c8882026
children 85762420a2c0
files mod_pubsub_feeds/feeds.lib.lua mod_pubsub_feeds/mod_pubsub_feeds.lua
diffstat 2 files changed, 100 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mod_pubsub_feeds/feeds.lib.lua	Sun Mar 20 12:32:45 2016 +0100
@@ -0,0 +1,80 @@
+local st = require "util.stanza";
+-- RSS->Atom translator
+-- http://code.matthewwild.co.uk/lua-feeds/
+
+-- Helpers to translate item child elements
+local rss2atom = {};
+function rss2atom.title(atom_entry, tag)
+	atom_entry:tag("title"):text(tag:get_text()):up();
+end
+
+function rss2atom.link(atom_entry, tag)
+	atom_entry:tag("link", { href = tag:get_text() }):up();
+end
+
+function rss2atom.author(atom_entry, tag)
+	atom_entry:tag("author")
+		:tag("email"):text(tag:get_text()):up()
+	:up();
+end
+
+function rss2atom.guid(atom_entry, tag)
+	atom_entry:tag("id"):text(tag:get_text()):up();
+end
+
+function rss2atom.category(atom_entry, tag)
+	atom_entry:tag("category", { term = tag:get_text(), scheme = tag.attr.domain }):up();
+end
+
+function rss2atom.description(atom_entry, tag)
+	atom_entry:tag("summary"):text(tag:get_text()):up();
+end
+
+local months = {
+	jan = "01", feb = "02", mar = "03", apr = "04", may = "05", jun = "06";
+	jul = "07", aug = "08", sep = "09", oct = "10", nov = "11", dec = "12";
+};
+
+function rss2atom.pubDate(atom_entry, tag)
+	local pubdate = tag:get_text():gsub("^%a+,", ""):gsub("^%s*", "");
+	local date, month, year, hour, minute, second, zone =
+		pubdate:match("^(%d%d?) (%a+) (%d+) (%d+):(%d+):?(%d*) ?(.*)$");
+	if not date then return; end
+	if #date == 1 then
+		date = "0"..date;
+	end
+	month = months[month:sub(1,3):lower()];
+	if #year == 2 then -- GAH!
+		if tonumber(year) > 80 then
+			year = "19"..year;
+		else
+			year = "20"..year;
+		end
+	end
+	if zone == "UT" or zone == "GMT" then zone = "Z"; end
+	if #second == 0 then
+		second = "00";
+	end
+	local date_string = string.format("%s-%s-%sT%s:%s:%s%s", year, month, date, hour, minute, second, zone);
+	atom_entry:tag("published"):text(date_string):up();
+end
+
+-- Translate a single item to atom
+local function translate_rss(rss_feed)
+	local feed = st.stanza("feed", { xmlns = "http://www.w3.org/2005/Atom" });
+	local channel = rss_feed:get_child("channel");
+	-- TODO channel properties
+	feed:tag("entry");
+	for item in channel:childtags("item") do
+		for tag in rss_item:childtags() do
+			local translator = rss2atom[tag.name];
+			if translator then
+				translator(feed, tag);
+			end
+		end
+	end
+	feed:reset();
+	return feed;
+end
+
+return { translate_rss = translate_rss }
--- a/mod_pubsub_feeds/mod_pubsub_feeds.lua	Fri Mar 18 09:59:42 2016 +0000
+++ b/mod_pubsub_feeds/mod_pubsub_feeds.lua	Sun Mar 20 12:32:45 2016 +0100
@@ -1,7 +1,5 @@
 -- Fetches Atom feeds and publishes to PubSub nodes
 --
--- Depends: http://code.matthewwild.co.uk/lua-feeds
---
 -- Config:
 -- Component "pubsub.example.com" "pubsub"
 -- modules_enabled = {
@@ -21,12 +19,23 @@
 local dt_parse, dt_datetime = require "util.datetime".parse, require "util.datetime".datetime;
 local uuid = require "util.uuid".generate;
 local hmac_sha1 = require "util.hashes".hmac_sha1;
-local parse_feed = require "feeds".feed_from_string;
+local parse_xml = require "uit.xml".parse;
 local st = require "util.stanza";
---local dump = require"util.serialization".serialize;
+local translate_rss = module:require("feeds").translate_rss;
 
 local xmlns_atom = "http://www.w3.org/2005/Atom";
 
+local function parse_feed(data)
+	local feed, err = parse_xml(data);
+	if not feed then return feed, err; end
+	if feed.attr.xmlns == xmlns_atom then
+		return feed;
+	elseif feed.attr.xmlns == nil and feed.name == "rss" then
+		return translate_rss(feed);
+	end
+	return nil, "unsupported-format";
+end
+
 local use_pubsubhubub = module:get_option_boolean("use_pubsubhubub", true);
 if use_pubsubhubub then
 	module:depends"http";
@@ -75,7 +84,7 @@
 	local node = item.node;
 	module:log("debug", "parsing %d bytes of data in node %s", #item.data or 0, node)
 	local feed = parse_feed(item.data);
-	for _, entry in ipairs(feed) do
+	for entry in feed:childtags("entry") do
 		entry.attr.xmlns = xmlns_atom;
 
 		local e_published = entry:get_child_text("published");
@@ -119,11 +128,12 @@
 	end
 	if use_pubsubhubub and not item.subscription then
 		--module:log("debug", "check if %s has a hub", item.node);
-		local hub = item.hub or feed.links and feed.links.hub;
-		if hub then
-			item.hub = hub;
-			module:log("debug", "%s has a hub: %s", item.node, item.hub);
-			subscribe(item);
+		for link in feed:childtags("link") do
+			if link.attr.rel == "hub" then
+				item.hub = link.attr.href;
+				module:log("debug", "Node %s has a hub: %s", item.node, item.hub);
+				return subscribe(item);
+			end
 		end
 	end
 end