annotate mod_ogp/mod_ogp.lua @ 4482:21698b960bd6

mod_ogp: Add the ability to detect and process multiple URLs in a body
author JC Brand <jc@opkode.com>
date Tue, 02 Mar 2021 12:04:14 +0100
parents dbfda7f5522d
children c4f11a4b5ac7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
1 local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
2 local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
3 local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
4 local url_pattern = [[https?://%S+]]
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
5 local xmlns_fasten = "urn:xmpp:fasten:0";
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
6 local xmlns_xhtml = "http://www.w3.org/1999/xhtml";
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
7
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
8
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
9 local function fetch_ogp_data(room, url, origin_id)
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
10 if not url then return; end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
11
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
12 http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
13 url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
14 nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
15 function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
16 if response_code ~= 200 then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
17 return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
18 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
19
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
20 local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
21 local from = room and room.jid or module.host
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
22 local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
23 local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
24 local message_body = ""
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
25
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
26 local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
27 for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
28 local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
29 if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
30 property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
31 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
32
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
33 local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
34 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
35 content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
36 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
37 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
38 content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
39 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
40 if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
41 content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
42 end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
43
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
44 if property and content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
45 module:log("info", property .. "\t" .. content)
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
46 fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
47 "meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
48 {
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
49 xmlns = xmlns_xhtml,
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
50 property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
51 content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
52 }
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
53 ):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
54 found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
55 message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
56 end
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
57 end
4255
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4254
diff changeset
58
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
59 if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
60 mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
61 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
62 module:log("info", tostring(fastening))
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
63 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
64 )
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
65 end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
66
4482
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
67 local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
68 local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
69 local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
70
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
71 if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
72
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
73 local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
74 if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
75
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
76 for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
77 fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
78 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
79 end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4459
diff changeset
80
4252
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
81 module:hook("muc-occupant-groupchat", ogp_handler)
4459
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
82
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
83
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
84 module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
85 local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
86 if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
87 return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
88 end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4255
diff changeset
89 end);