annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 5491:7842502c1157

mod_http_debug: Log some extended info about requests If you point something external at this module, you don't get the response body back, hence it can be useful to see some details in the log as well.
author Kim Alvefur <zash@zash.se>
date Fri, 26 May 2023 15:37:15 +0200
parents 35085e0d52ad
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds.
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
2 --
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
3 -- Compose a textual representation of Atom payloads
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event)
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
5 local payload = event.payload;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
6 local title = payload:get_child_text("title");
5135
35085e0d52ad mod_pubsub_summary: Trim preceding and trailing whitespace from title
Kim Alvefur <zash@zash.se>
parents: 5131
diff changeset
7 if title then title = title:gsub("^%s+", ""):gsub("%s+$", ""); end
4435
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
8 -- Note: This prefers content over summary, it was made for a news feed where
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
9 -- the interesting stuff was in the content and the summary was .. meh.
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
10 local content_tag = payload:get_child("content") or payload:get_child("summary");
4507
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4438
diff changeset
11 local content = content_tag and content_tag:get_text();
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4438
diff changeset
12 if content and content_tag.attr.type == "html" then
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
13 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
14 content = content:gsub("<li>(.-)</li>\n", "* %1\n");
4513
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4507
diff changeset
15 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
16 content = content:gsub("<b>(.-)</b>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
17 content = content:gsub("<strong>(.-)</strong>", "*%1*");
4600
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4513
diff changeset
18 content = content:gsub("<em>(.-)</em>", "_%1_");
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4513
diff changeset
19 content = content:gsub("<i>(.-)</i>", "_%1_");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
20 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
21 content = content:gsub("<br[^>]*>", "\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
22 content = content:gsub("<[^>]+>", "");
4513
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4507
diff changeset
23 content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
24 content = content:gsub("^%s*", ""):gsub("%s*$", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
25 content = content:gsub("\n\n\n+", "\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
26 content = content:gsub("&(%w+);", {
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
27 apos = "'";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
28 quot = '"';
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
29 lt = "<";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
30 gt = ">";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
31 amp = "&";
4609
fcfe691d6322 mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents: 4600
diff changeset
32 nbsp = "\194\160"; -- U+00A0
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
33 });
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
34 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
35 local summary;
4437
09657f758f53 mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents: 4436
diff changeset
36 if title and content and content:sub(1, #title) ~= title then
4438
2bb11055e4bb mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents: 4437
diff changeset
37 summary = "*" .. title .. "*\n\n" .. content;
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
38 elseif title or content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
39 summary = content or title;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
40 end
4436
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
41 for link in payload:childtags("link") do
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
42 if link and link.attr.href and link.attr.href ~= content then
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
43 summary = (summary and summary .. "\n" or "") .. link.attr.href;
5129
cde38b7de04a mod_pubsub_summary: Hide link relation when value is "alternate"
Kim Alvefur <zash@zash.se>
parents: 4609
diff changeset
44 if link.attr.rel and link.attr.rel ~= "alternate" then summary = summary .. " [" .. link.attr.rel .. "]" end
4436
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
45 end
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
46 end
5131
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
47 for area in payload:childtags("area", "urn:oasis:names:tc:emergency:cap:1.2") do
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
48 local pos = area:get_child_text("circle");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
49 if pos then
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
50 summary = summary .. "\n" .. "geo:"..pos:match("[%d.,]+");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
51 end
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
52 end
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
53 return summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
54 end, 1);