diff mod_anti_spam/mod_anti_spam.lua @ 5859:259ffdbf8906

mod_anti_spam: New module for spam filtering (pre-alpha)
author Matthew Wild <mwild1@gmail.com>
date Tue, 05 Mar 2024 18:26:29 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mod_anti_spam/mod_anti_spam.lua	Tue Mar 05 18:26:29 2024 +0000
@@ -0,0 +1,165 @@
+local ip = require "util.ip";
+local jid_bare = require "util.jid".bare;
+local jid_split = require "util.jid".split;
+local set = require "util.set";
+local sha256 = require "util.hashes".sha256;
+local st = require"util.stanza";
+local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed;
+local full_sessions = prosody.full_sessions;
+
+local user_exists = require "core.usermanager".user_exists;
+
+local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription;
+local trie = module:require("trie");
+
+local spam_source_domains = set.new();
+local spam_source_ips = trie.new();
+local spam_source_jids = set.new();
+
+local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"});
+
+function block_spam(event, reason, action)
+	event.spam_reason = reason;
+	event.spam_action = action;
+	if module:fire_event("spam-blocked", event) == false then
+		module:log("debug", "Spam allowed by another module");
+		return;
+	end
+
+	count_spam_blocked:with_labels(reason):add(1);
+
+	if action == "bounce" then
+		module:log("debug", "Bouncing likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason);
+		event.origin.send(st.error_reply("cancel", "policy-violation", "Rejected as spam"));
+	else
+		module:log("debug", "Discarding likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason);
+	end
+
+	return true;
+end
+
+function is_from_stranger(from_jid, event)
+	local stanza = event.stanza;
+	local to_user, to_host, to_resource = jid_split(stanza.attr.to);
+
+	if not to_user then return false; end
+
+	local to_session = full_sessions[stanza.attr.to];
+	if to_session then return false; end
+
+	if not is_contact_subscribed(to_user, to_host, from_jid) then
+		-- Allow all messages from your own jid
+		if from_jid == to_user.."@"..to_host then
+			return false; -- Pass through
+		end
+		if to_resource and stanza.attr.type == "groupchat" then
+			return false; -- Pass through
+		end
+		return true; -- Stranger danger
+	end
+end
+
+function is_spammy_server(session)
+	if spam_source_domains:contains(session.from_host) then
+		return true;
+	end
+	local origin_ip = ip.new(session.ip);
+	if spam_source_ips:contains_ip(origin_ip) then
+		return true;
+	end
+end
+
+function is_spammy_sender(sender_jid)
+	return spam_source_jids:contains(sha256(sender_jid, true));
+end
+
+local spammy_strings = module:get_option_array("anti_spam_block_strings");
+local spammy_patterns = module:get_option_array("anti_spam_block_patterns");
+
+function is_spammy_content(stanza)
+	-- Only support message content
+	if stanza.name ~= "message" then return; end
+	if not (spammy_strings or spammy_patterns) then return; end
+
+	local body = stanza:get_child_text("body");
+	if spammy_strings then
+		for _, s in ipairs(spammy_strings) do
+			if body:find(s, 1, true) then
+				return true;
+			end
+		end
+	end
+	if spammy_patterns then
+		for _, s in ipairs(spammy_patterns) do
+			if body:find(s) then
+				return true;
+			end
+		end
+	end
+end
+
+-- Set up RTBLs
+
+local anti_spam_services = module:get_option_array("anti_spam_services");
+
+for _, rtbl_service_jid in ipairs(anti_spam_services) do
+	new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", {
+		added = function (item)
+			spam_source_domains:add(item);
+		end;
+		removed = function (item)
+			spam_source_domains:remove(item);
+		end;
+	});
+	new_rtbl_subscription(rtbl_service_jid, "spam_source_ips", {
+		added = function (item)
+			spam_source_ips:add_subnet(ip.parse_cidr(item));
+		end;
+		removed = function (item)
+			spam_source_ips:remove_subnet(ip.parse_cidr(item));
+		end;
+	});
+	new_rtbl_subscription(rtbl_service_jid, "spam_source_jids_sha256", {
+		added = function (item)
+			spam_source_jids:add(item);
+		end;
+		removed = function (item)
+			spam_source_jids:remove(item);
+		end;
+	});
+end
+
+module:hook("message/bare", function (event)
+	local to_bare = jid_bare(event.stanza.attr.to);
+
+	if not user_exists(to_bare) then return; end
+
+	local from_bare = jid_bare(event.stanza.attr.from);
+	if not is_from_stranger(from_bare, event) then return; end
+
+	if is_spammy_server(event.origin) then
+		return block_spam(event, "known-spam-source", "drop");
+	end
+
+	if is_spammy_sender(from_bare) then
+		return block_spam(event, "known-spam-jid", "drop");
+	end
+
+	if is_spammy_content(event.stanza) then
+		return block_spam(event, "spam-content", "drop");
+	end
+end, 500);
+
+module:hook("presence/bare", function (event)
+	if event.stanza.type ~= "subscribe" then
+		return;
+	end
+
+	if is_spammy_server(event.origin) then
+		return block_spam(event, "known-spam-source", "drop");
+	end
+
+	if is_spammy_sender(event.stanza) then
+		return block_spam(event, "known-spam-jid", "drop");
+	end
+end, 500);