Mercurial > prosody-modules
diff mod_anti_spam/mod_anti_spam.lua @ 5859:259ffdbf8906
mod_anti_spam: New module for spam filtering (pre-alpha)
author | Matthew Wild <mwild1@gmail.com> |
---|---|
date | Tue, 05 Mar 2024 18:26:29 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mod_anti_spam/mod_anti_spam.lua Tue Mar 05 18:26:29 2024 +0000 @@ -0,0 +1,165 @@ +local ip = require "util.ip"; +local jid_bare = require "util.jid".bare; +local jid_split = require "util.jid".split; +local set = require "util.set"; +local sha256 = require "util.hashes".sha256; +local st = require"util.stanza"; +local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed; +local full_sessions = prosody.full_sessions; + +local user_exists = require "core.usermanager".user_exists; + +local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription; +local trie = module:require("trie"); + +local spam_source_domains = set.new(); +local spam_source_ips = trie.new(); +local spam_source_jids = set.new(); + +local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"}); + +function block_spam(event, reason, action) + event.spam_reason = reason; + event.spam_action = action; + if module:fire_event("spam-blocked", event) == false then + module:log("debug", "Spam allowed by another module"); + return; + end + + count_spam_blocked:with_labels(reason):add(1); + + if action == "bounce" then + module:log("debug", "Bouncing likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason); + event.origin.send(st.error_reply("cancel", "policy-violation", "Rejected as spam")); + else + module:log("debug", "Discarding likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason); + end + + return true; +end + +function is_from_stranger(from_jid, event) + local stanza = event.stanza; + local to_user, to_host, to_resource = jid_split(stanza.attr.to); + + if not to_user then return false; end + + local to_session = full_sessions[stanza.attr.to]; + if to_session then return false; end + + if not is_contact_subscribed(to_user, to_host, from_jid) then + -- Allow all messages from your own jid + if from_jid == to_user.."@"..to_host then + return false; -- Pass through + end + if to_resource and stanza.attr.type == "groupchat" then + return false; -- Pass through + end + return true; -- Stranger danger + end +end + +function is_spammy_server(session) + if spam_source_domains:contains(session.from_host) then + return true; + end + local origin_ip = ip.new(session.ip); + if spam_source_ips:contains_ip(origin_ip) then + return true; + end +end + +function is_spammy_sender(sender_jid) + return spam_source_jids:contains(sha256(sender_jid, true)); +end + +local spammy_strings = module:get_option_array("anti_spam_block_strings"); +local spammy_patterns = module:get_option_array("anti_spam_block_patterns"); + +function is_spammy_content(stanza) + -- Only support message content + if stanza.name ~= "message" then return; end + if not (spammy_strings or spammy_patterns) then return; end + + local body = stanza:get_child_text("body"); + if spammy_strings then + for _, s in ipairs(spammy_strings) do + if body:find(s, 1, true) then + return true; + end + end + end + if spammy_patterns then + for _, s in ipairs(spammy_patterns) do + if body:find(s) then + return true; + end + end + end +end + +-- Set up RTBLs + +local anti_spam_services = module:get_option_array("anti_spam_services"); + +for _, rtbl_service_jid in ipairs(anti_spam_services) do + new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", { + added = function (item) + spam_source_domains:add(item); + end; + removed = function (item) + spam_source_domains:remove(item); + end; + }); + new_rtbl_subscription(rtbl_service_jid, "spam_source_ips", { + added = function (item) + spam_source_ips:add_subnet(ip.parse_cidr(item)); + end; + removed = function (item) + spam_source_ips:remove_subnet(ip.parse_cidr(item)); + end; + }); + new_rtbl_subscription(rtbl_service_jid, "spam_source_jids_sha256", { + added = function (item) + spam_source_jids:add(item); + end; + removed = function (item) + spam_source_jids:remove(item); + end; + }); +end + +module:hook("message/bare", function (event) + local to_bare = jid_bare(event.stanza.attr.to); + + if not user_exists(to_bare) then return; end + + local from_bare = jid_bare(event.stanza.attr.from); + if not is_from_stranger(from_bare, event) then return; end + + if is_spammy_server(event.origin) then + return block_spam(event, "known-spam-source", "drop"); + end + + if is_spammy_sender(from_bare) then + return block_spam(event, "known-spam-jid", "drop"); + end + + if is_spammy_content(event.stanza) then + return block_spam(event, "spam-content", "drop"); + end +end, 500); + +module:hook("presence/bare", function (event) + if event.stanza.type ~= "subscribe" then + return; + end + + if is_spammy_server(event.origin) then + return block_spam(event, "known-spam-source", "drop"); + end + + if is_spammy_sender(event.stanza) then + return block_spam(event, "known-spam-jid", "drop"); + end +end, 500);