Mercurial > prosody-modules
annotate mod_pubsub_summary/mod_pubsub_summary.lua @ 4895:2542fd80cd15
mod_turn_external: Fix type of config option (thanks mirux)
There was a separate boolean option to enable TLS before, but it was
merged with the port number option and it seems the typed API interface
got confused.
Backport of trunk rev aa7a8aa64d3f
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Tue, 15 Feb 2022 21:51:52 +0100 |
parents | fcfe691d6322 |
children | cde38b7de04a |
rev | line source |
---|---|
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
1 -- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds. |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
2 -- |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
3 -- Compose a textual representation of Atom payloads |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
4 module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event) |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
5 local payload = event.payload; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
6 local title = payload:get_child_text("title"); |
4435
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4426
diff
changeset
|
7 -- Note: This prefers content over summary, it was made for a news feed where |
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4426
diff
changeset
|
8 -- the interesting stuff was in the content and the summary was .. meh. |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
9 local content_tag = payload:get_child("content") or payload:get_child("summary"); |
4507
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4438
diff
changeset
|
10 local content = content_tag and content_tag:get_text(); |
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4438
diff
changeset
|
11 if content and content_tag.attr.type == "html" then |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
12 content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
13 content = content:gsub("<li>(.-)</li>\n", "* %1\n"); |
4513
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4507
diff
changeset
|
14 content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
15 content = content:gsub("<b>(.-)</b>", "*%1*"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
16 content = content:gsub("<strong>(.-)</strong>", "*%1*"); |
4600
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4513
diff
changeset
|
17 content = content:gsub("<em>(.-)</em>", "_%1_"); |
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4513
diff
changeset
|
18 content = content:gsub("<i>(.-)</i>", "_%1_"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
19 content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
20 content = content:gsub("<br[^>]*>", "\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
21 content = content:gsub("<[^>]+>", ""); |
4513
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4507
diff
changeset
|
22 content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>"); |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
23 content = content:gsub("^%s*", ""):gsub("%s*$", ""); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
24 content = content:gsub("\n\n\n+", "\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
25 content = content:gsub("&(%w+);", { |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
26 apos = "'"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
27 quot = '"'; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
28 lt = "<"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
29 gt = ">"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
30 amp = "&"; |
4609
fcfe691d6322
mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents:
4600
diff
changeset
|
31 nbsp = "\194\160"; -- U+00A0 |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
32 }); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
33 end |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
34 local summary; |
4437
09657f758f53
mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents:
4436
diff
changeset
|
35 if title and content and content:sub(1, #title) ~= title then |
4438
2bb11055e4bb
mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents:
4437
diff
changeset
|
36 summary = "*" .. title .. "*\n\n" .. content; |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
37 elseif title or content then |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
38 summary = content or title; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
39 end |
4436
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
40 for link in payload:childtags("link") do |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
41 if link and link.attr.href and link.attr.href ~= content then |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
42 summary = (summary and summary .. "\n" or "") .. link.attr.href; |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
43 if link.attr.rel then summary = summary .. " [" .. link.attr.rel .. "]" end |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4435
diff
changeset
|
44 end |
4426
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
45 end |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
46 return summary; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
47 end, 1); |