annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 4499:8e644bf36627

mod_rest: Change OOB namespace to the one used in messages Because of it's current popularity as companion to HTTP Upload this is probably more useful than the iq one.
author Kim Alvefur <zash@zash.se>
date Sun, 07 Mar 2021 01:26:20 +0100
parents 2bb11055e4bb
children 86a97e7572b2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds.
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
2 --
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
3 -- Compose a textual representation of Atom payloads
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event)
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
5 local payload = event.payload;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
6 local title = payload:get_child_text("title");
4435
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
7 -- Note: This prefers content over summary, it was made for a news feed where
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4426
diff changeset
8 -- the interesting stuff was in the content and the summary was .. meh.
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
9 local content_tag = payload:get_child("content") or payload:get_child("summary");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
10 local content = content_tag:get_text();
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
11 if content_tag.attr.type == "html" then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
12 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
13 content = content:gsub("<li>(.-)</li>\n", "* %1\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
14 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "%2 <%1>");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
15 content = content:gsub("<b>(.-)</b>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
16 content = content:gsub("<strong>(.-)</strong>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
17 content = content:gsub("<em>(.-)</em>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
18 content = content:gsub("<i>(.-)</i>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
19 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
20 content = content:gsub("<br[^>]*>", "\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
21 content = content:gsub("<[^>]+>", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
22 content = content:gsub("^%s*", ""):gsub("%s*$", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
23 content = content:gsub("\n\n\n+", "\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
24 content = content:gsub("&(%w+);", {
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
25 apos = "'";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
26 quot = '"';
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
27 lt = "<";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
28 gt = ">";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
29 amp = "&";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
30 nbsp = utf8 and utf8.char(0xa0) or " ";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
31 });
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
32 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
33 local summary;
4437
09657f758f53 mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents: 4436
diff changeset
34 if title and content and content:sub(1, #title) ~= title then
4438
2bb11055e4bb mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents: 4437
diff changeset
35 summary = "*" .. title .. "*\n\n" .. content;
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
36 elseif title or content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
37 summary = content or title;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
38 end
4436
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
39 for link in payload:childtags("link") do
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
40 if link and link.attr.href and link.attr.href ~= content then
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
41 summary = (summary and summary .. "\n" or "") .. link.attr.href;
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
42 if link.attr.rel then summary = summary .. " [" .. link.attr.rel .. "]" end
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4435
diff changeset
43 end
4426
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
44 end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
45 return summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
46 end, 1);