annotate mod_ogp/mod_ogp.lua @ 4513:ade2064160e3

mod_pubsub_summary: Fix to not strip inline links Links were turned into `label <url>` and then a later pass removed the `<url>` part leaving only the label. This should avoid that. Escaping is hard.
author Kim Alvefur <zash@zash.se>
date Wed, 17 Mar 2021 15:52:12 +0100
parents 0136c98f574c
children 09f0911c735d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
1 local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
2 local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
3 local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
4 local url_pattern = [[https?://%S+]]
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
5 local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
6 local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
7 local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
8 local whitelist = module:get_option_set("ogp_domain_whitelist", {})
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
9
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
11 local function is_whitelisted(url)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
12 if whitelist:empty() then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
13 return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
14 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
15 local domain = url:match(domain_pattern)
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
16 if whitelist:contains(domain) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
17 return true;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
18 end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
19 return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
20 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
21
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
22
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
23 local function fetch_ogp_data(room, url, origin_id)
4483
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
24 if not url or not is_whitelisted(url) then
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
25 return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4482
diff changeset
26 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
27
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
28 http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
29 url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
30 nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
31 function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
32 if response_code ~= 200 then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
33 module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body)
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
34 return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
35 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
36
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
37 local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
38 local from = room and room.jid or module.host
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
39 local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
40 local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
41 local message_body = ""
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
42
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
43 local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
44 for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
45 local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
46 if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
47 property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
48 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
49
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
50 local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
51 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
52 content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
53 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
54 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
55 content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
56 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
57 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
58 content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
59 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
60
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
61 if property and content then
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
62 module:log("debug", property .. "\t" .. content)
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
63 fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
64 "meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
65 {
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
66 xmlns = xmlns_xhtml,
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
67 property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
68 content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
69 }
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
70 ):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
71 found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
72 message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
73 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
74 end
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
75
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
76 if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
77 mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
78 end
4504
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4483
diff changeset
79 module:log("debug", tostring(fastening))
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
80 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
81 )
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
82 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
83
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
84 local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
85 local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
86 local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
87
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
88 if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
89
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
90 local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
91 if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
92
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
93 for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
94 fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
95 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
96 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
97
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
98 module:hook("muc-occupant-groupchat", ogp_handler)
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
99
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
100
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
101 module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
102 local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
103 if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
104 return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
105 end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
106 end);