Mercurial > prosody-modules
view mod_tweet_data/mod_tweet_data.lua @ 4651:8231774f5bfd
mod_cloud_notify_encrypted: Ensure body substring remains valid UTF-8
The `body:sub()` call risks splitting the string in the middle of a
multi-byte UTF-8 sequence. This should have been caught by util.stanza
validation, but that would have caused some havoc, at the very least causing
the notification to not be sent.
There have been no reports of this happening. Likely because this module
isn't widely deployed among users with languages that use many longer UTF-8
sequences.
The util.encodings.utf8.valid() function is O(n) where only the last
sequence really needs to be checked, but it's in C and expected to be fast.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Sun, 22 Aug 2021 13:22:59 +0200 |
parents | c858c76d0845 |
children | efdc3e4dc5df |
line wrap: on
line source
local mod_muc = module:depends("muc") local http = require "net.http" local st = require "util.stanza" local json = require "util.json" local url_pattern = [[https://twitter.com/%S+/status/%S+]] local xmlns_fasten = "urn:xmpp:fasten:0" local xmlns_xhtml = "http://www.w3.org/1999/xhtml" local twitter_apiv2_bearer_token = module:get_option_string("twitter_apiv2_bearer_token"); local function fetch_tweet_data(room, url, tweet_id, origin_id) if not url then return; end local options = { method = "GET"; headers = { Authorization = "Bearer "..twitter_apiv2_bearer_token; }; }; http.request( 'https://api.twitter.com/2/tweets/'..tweet_id..'?expansions=author_id&tweet.fields=created_at,text&user.fields=id,name,username,profile_image_url', options, function(response_body, response_code, _) if response_code ~= 200 then module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body) return; end local response = json.decode(response_body); if not response then return; end local tweet = response['data']; local author = response['includes']['users'][1]; local to = room.jid local from = room and room.jid or module.host local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id}) fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:article:author', content = author['username'] } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:article:published_time', content = tweet['created_at'] } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:description', content = tweet['text'] } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:image', content = author['profile_image_url'] } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:title', content = author['username'] } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:type', content = 'tweet' } ):up() fastening:tag( "meta", { xmlns = xmlns_xhtml, property = 'og:url', content = 'https://twitter.com/'..author['username']..'/status/'..tweet['id'] } ):up() mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening) module:log("debug", tostring(fastening)) end ) end local function tweet_handler(event) local room, stanza = event.room, st.clone(event.stanza) local body = stanza:get_child_text("body") if not body then return; end local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id") if not origin_id then return; end for url in body:gmatch(url_pattern) do local _, _, _, tweet_id = string.find(url, "https://twitter.com/(%S+)/status/(%S+)"); fetch_tweet_data(room, url, tweet_id, origin_id); end end module:hook("muc-occupant-groupchat", tweet_handler) module:hook("muc-message-is-historic", function (event) local fastening = event.stanza:get_child('apply-to', xmlns_fasten) if fastening and fastening:get_child('meta', xmlns_xhtml) then return true end end);