comparison mod_xhtmlim/mod_xhtmlim.lua @ 2865:f6ed4421167d

mod_xhtmlim: Attempts to sanitize XMTML-IM messages
author Kim Alvefur <zash@zash.se>
date Tue, 30 Jan 2018 18:49:09 +0100
parents
children 276f7af8afd1
comparison
equal deleted inserted replaced
2864:6f3859233515 2865:f6ed4421167d
1 -- XEP-0071: XHTML-IM sanitizing
2
3 local assert = assert;
4
5 local st = require "util.stanza";
6 local url = require "socket.url";
7
8 local no_styles = module:get_option_boolean("strip_xhtml_style", false);
9
10 -- Tables from XEP-0071
11 local xeptables = [[
12 <body/> class, id, title; style
13 <head/> profile
14 <html/> version
15 <title/>
16 <abbr/> class, id, title; style
17 <acronym/> class, id, title; style
18 <address/> class, id, title; style
19 <blockquote/> class, id, title; style; cite
20 <br/> class, id, title; style
21 <cite/> class, id, title; style
22 <code/> class, id, title; style
23 <dfn/> class, id, title; style
24 <div/> class, id, title; style
25 <em/> class, id, title; style
26 <h1/> class, id, title; style
27 <h2/> class, id, title; style
28 <h3/> class, id, title; style
29 <h4/> class, id, title; style
30 <h5/> class, id, title; style
31 <h6/> class, id, title; style
32 <kbd/> class, id, title; style
33 <p/> class, id, title; style
34 <pre/> class, id, title; style
35 <q/> class, id, title; style; cite
36 <samp/> class, id, title; style
37 <span/> class, id, title; style
38 <strong/> class, id, title; style
39 <var/> class, id, title; style
40 <a/> class, id, title; style; accesskey, charset, href, hreflang, rel, rev, tabindex, type
41 <dl/> class, id, title; style
42 <dt/> class, id, title; style
43 <dd/> class, id, title; style
44 <ol/> class, id, title; style
45 <ul/> class, id, title; style
46 <li/> class, id, title; style
47 <img/> class, id, title; style; alt, height, longdesc, src, width
48 ]];
49
50 -- map of whitelisted tag names to set of allowed attributes
51 local tags = {}; -- { string : { string : boolean } }
52
53 for tag, attrs in xeptables:gmatch("<(%w+)/>([^\n]*)") do
54 tags[tag] = { xmlns = true, ["xml:lang"] = true };
55 for attr in attrs:gmatch("%w+") do
56 tags[tag][attr] = true;
57 end
58 if no_styles then
59 tags[tag]["style"] = nil;
60 end
61 end
62
63 -- module:log("debug", "tags = %s;", require "util.serialization".serialize(tags));
64
65 -- TODO Decide if disallowed tags should be bounced or silently discarded.
66 -- XEP says "ignore" and replace tag with text content, but that would
67 -- need a different transform which can't use `maptags`.
68 if not module:get_option_boolean("bounce_invalid_xhtml", false) then
69 assert = function (x) return x end
70 end
71
72 local function sanitize_xhtml(tag)
73 -- module:log("debug", "sanitize_xhtml(<{%s}%s>)", tag.attr.xmlns, tag.name);
74 if tag.attr.xmlns == "http://www.w3.org/1999/xhtml" then
75 local allowed = assert(tags[tag.name], tag.name);
76 if allowed then
77 for attr, value in pairs(tag.attr) do
78 if not allowed[attr] then
79 -- module:log("debug", "Removing disallowed attribute %q from <%s>", attr, tag.name);
80 tag.attr[attr] = nil;
81 elseif attr == "src" or attr == "href" then
82 local urlattr = url.parse(value);
83 local scheme = urlattr and urlattr.scheme;
84 if scheme ~= "http" and scheme ~= "https" and scheme ~= "mailto" and scheme == "xmpp" and scheme ~= "cid" then
85 tag.attr[attr] = "https://url.was.invalid/";
86 end
87 end
88 end
89 else
90 -- Can't happen with the above assert.
91 return nil;
92 end
93 -- Check child tags
94 tag:maptags(sanitize_xhtml);
95 -- This tag is clean!
96 return tag;
97 end
98 -- Not xhtml, probably best to discard it
99 return nil;
100 end
101
102 -- Check for xhtml-im, sanitize if exists
103 local function message_handler(event)
104 local stanza = event.stanza;
105 if stanza:get_child("html", "http://jabber.org/protocol/xhtml-im") then
106 stanza = st.clone(stanza);
107 if pcall(function() -- try
108 stanza:get_child("html", "http://jabber.org/protocol/xhtml-im"):maptags(sanitize_xhtml);
109 end) then
110 event.stanza = stanza;
111 else -- catch
112 if stanza.attr.type ~= "error" then
113 event.origin.send(st.error_reply(stanza, "modify", "not-acceptable", "Stanza contained illegal XHTML-IM tag"));
114 end
115 return true;
116 end
117 end
118 end
119
120 -- Stanzas received from clients
121 module:hook("pre-message/bare", message_handler, 71);
122 module:hook("pre-message/full", message_handler, 71);
123 module:hook("pre-message/host", message_handler, 71);
124
125 -- Stanzas about to be delivered to clients
126 module:hook("message/bare", message_handler, 71);
127 module:hook("message/full", message_handler, 71);