annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 4426:3fe2c264aac4

mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds This module is meant for use with mod_pubsub_feeds and tries to improve on mod_pubsub's built-in Atom summary generator.
author Kim Alvefur <zash@zash.se>
date Thu, 04 Feb 2021 01:12:41 +0100
parents
children a620bf249e63
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds.
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
2 --
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
3 -- Compose a textual representation of Atom payloads
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event)
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
5 local payload = event.payload;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
6 local title = payload:get_child_text("title");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
7 local content_tag = payload:get_child("content") or payload:get_child("summary");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
8 local content = content_tag:get_text();
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
9 if content_tag.attr.type == "html" then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
10 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
11 content = content:gsub("<li>(.-)</li>\n", "* %1\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
12 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "%2 <%1>");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
13 content = content:gsub("<b>(.-)</b>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
14 content = content:gsub("<strong>(.-)</strong>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
15 content = content:gsub("<em>(.-)</em>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
16 content = content:gsub("<i>(.-)</i>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
17 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
18 content = content:gsub("<br[^>]*>", "\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
19 content = content:gsub("<[^>]+>", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
20 content = content:gsub("^%s*", ""):gsub("%s*$", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
21 content = content:gsub("\n\n\n+", "\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
22 content = content:gsub("&(%w+);", {
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
23 apos = "'";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
24 quot = '"';
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
25 lt = "<";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
26 gt = ">";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
27 amp = "&";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
28 nbsp = utf8 and utf8.char(0xa0) or " ";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
29 });
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
30 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
31 local link = payload:get_child("link");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
32 local summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
33 if title and content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
34 summary = title .. "\n\n" .. content;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
35 elseif title or content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
36 summary = content or title;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
37 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
38 if link and link.attr.href and link.attr.href ~= content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
39 summary = (summary and summary .. "\n" or "") .. link.attr.href;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
40 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
41 return summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
42 end, 1);