annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 5669:30f91daa40b4

mod_storage_s3: Beginnings of an experimental S3 storage driver Tested against MinIO
author Kim Alvefur <zash@zash.se>
date Sat, 14 Oct 2023 17:31:06 +0200
parents 35085e0d52ad
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds.
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
2 --
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
3 -- Compose a textual representation of Atom payloads
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event)
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
5 local payload = event.payload;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
6 local title = payload:get_child_text("title");
5135
35085e0d52ad mod_pubsub_summary: Trim preceding and trailing whitespace from title
Kim Alvefur <zash@zash.se>
parents: 5131
diff changeset
7 if title then title = title:gsub("^%s+", ""):gsub("%s+$", ""); end
4435
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
8 -- Note: This prefers content over summary, it was made for a news feed where
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
9 -- the interesting stuff was in the content and the summary was .. meh.
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
10 local content_tag = payload:get_child("content") or payload:get_child("summary");
4507
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4438
diff changeset
11 local content = content_tag and content_tag:get_text();
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4438
diff changeset
12 if content and content_tag.attr.type == "html" then
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
13 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
14 content = content:gsub("<li>(.-)</li>\n", "* %1\n");
4513
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4507
diff changeset
15 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
16 content = content:gsub("<b>(.-)</b>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
17 content = content:gsub("<strong>(.-)</strong>", "*%1*");
4600
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4513
diff changeset
18 content = content:gsub("<em>(.-)</em>", "_%1_");
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4513
diff changeset
19 content = content:gsub("<i>(.-)</i>", "_%1_");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
20 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
21 content = content:gsub("<br[^>]*>", "\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
22 content = content:gsub("<[^>]+>", "");
4513
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4507
diff changeset
23 content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>");
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
24 content = content:gsub("^%s*", ""):gsub("%s*$", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
25 content = content:gsub("\n\n\n+", "\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
26 content = content:gsub("&(%w+);", {
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
27 apos = "'";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
28 quot = '"';
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
29 lt = "<";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
30 gt = ">";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
31 amp = "&";
4609
fcfe691d6322 mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents: 4600
diff changeset
32 nbsp = "\194\160"; -- U+00A0
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
33 });
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
34 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
35 local summary;
4437
09657f758f53 mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents: 4436
diff changeset
36 if title and content and content:sub(1, #title) ~= title then
4438
2bb11055e4bb mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents: 4437
diff changeset
37 summary = "*" .. title .. "*\n\n" .. content;
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
38 elseif title or content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
39 summary = content or title;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
40 end
4436
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
41 for link in payload:childtags("link") do
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
42 if link and link.attr.href and link.attr.href ~= content then
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
43 summary = (summary and summary .. "\n" or "") .. link.attr.href;
5129
cde38b7de04a mod_pubsub_summary: Hide link relation when value is "alternate"
Kim Alvefur <zash@zash.se>
parents: 4609
diff changeset
44 if link.attr.rel and link.attr.rel ~= "alternate" then summary = summary .. " [" .. link.attr.rel .. "]" end
4436
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
45 end
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
46 end
5131
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
47 for area in payload:childtags("area", "urn:oasis:names:tc:emergency:cap:1.2") do
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
48 local pos = area:get_child_text("circle");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
49 if pos then
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
50 summary = summary .. "\n" .. "geo:"..pos:match("[%d.,]+");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
51 end
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5129
diff changeset
52 end
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
53 return summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
54 end, 1);