Mercurial > prosody-modules
annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 5491:7842502c1157
mod_http_debug: Log some extended info about requests
If you point something external at this module, you don't get the
response body back, hence it can be useful to see some details in the
log as well.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Fri, 26 May 2023 15:37:15 +0200 |
parents | 35085e0d52ad |
children |
rev | line source |
---|---|
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds. |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
2 -- |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
3 -- Compose a textual representation of Atom payloads |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event) |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
5 local payload = event.payload; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
6 local title = payload:get_child_text("title"); |
5135
35085e0d52ad
mod_pubsub_summary: Trim preceding and trailing whitespace from title
Kim Alvefur <zash@zash.se>
parents:
5131
diff
changeset
|
7 if title then title = title:gsub("^%s+", ""):gsub("%s+$", ""); end |
4435
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4426
diff
changeset
|
8 -- Note: This prefers content over summary, it was made for a news feed where |
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4426
diff
changeset
|
9 -- the interesting stuff was in the content and the summary was .. meh. |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
10 local content_tag = payload:get_child("content") or payload:get_child("summary"); |
4507
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4438
diff
changeset
|
11 local content = content_tag and content_tag:get_text(); |
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4438
diff
changeset
|
12 if content and content_tag.attr.type == "html" then |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
13 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
14 content = content:gsub("<li>(.-)</li>\n", "* %1\n"); |
4513
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4507
diff
changeset
|
15 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
16 content = content:gsub("<b>(.-)</b>", "*%1*"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
17 content = content:gsub("<strong>(.-)</strong>", "*%1*"); |
4600
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4513
diff
changeset
|
18 content = content:gsub("<em>(.-)</em>", "_%1_"); |
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4513
diff
changeset
|
19 content = content:gsub("<i>(.-)</i>", "_%1_"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
20 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
21 content = content:gsub("<br[^>]*>", "\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
22 content = content:gsub("<[^>]+>", ""); |
4513
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4507
diff
changeset
|
23 content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
24 content = content:gsub("^%s*", ""):gsub("%s*$", ""); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
25 content = content:gsub("\n\n\n+", "\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
26 content = content:gsub("&(%w+);", { |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
27 apos = "'"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
28 quot = '"'; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
29 lt = "<"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
30 gt = ">"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
31 amp = "&"; |
4609
fcfe691d6322
mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents:
4600
diff
changeset
|
32 nbsp = "\194\160"; -- U+00A0 |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
33 }); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
34 end |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
35 local summary; |
4437
09657f758f53
mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents:
4436
diff
changeset
|
36 if title and content and content:sub(1, #title) ~= title then |
4438
2bb11055e4bb
mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents:
4437
diff
changeset
|
37 summary = "*" .. title .. "*\n\n" .. content; |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
38 elseif title or content then |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
39 summary = content or title; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
40 end |
4436
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
41 for link in payload:childtags("link") do |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
42 if link and link.attr.href and link.attr.href ~= content then |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
43 summary = (summary and summary .. "\n" or "") .. link.attr.href; |
5129
cde38b7de04a
mod_pubsub_summary: Hide link relation when value is "alternate"
Kim Alvefur <zash@zash.se>
parents:
4609
diff
changeset
|
44 if link.attr.rel and link.attr.rel ~= "alternate" then summary = summary .. " [" .. link.attr.rel .. "]" end |
4436
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
45 end |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
46 end |
5131
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
47 for area in payload:childtags("area", "urn:oasis:names:tc:emergency:cap:1.2") do |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
48 local pos = area:get_child_text("circle"); |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
49 if pos then |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
50 summary = summary .. "\n" .. "geo:"..pos:match("[%d.,]+"); |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
51 end |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5129
diff
changeset
|
52 end |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
53 return summary; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
54 end, 1); |