annotate mod_pastebin/mod_pastebin.lua @ 189:16d367e3c04e

mod_pastebin: Fix to not truncate UTF-8 sequences when creating a snippet summary for XHTML-IM (thanks DaiZW for discovering :) )
author Matthew Wild <mwild1@gmail.com>
date Mon, 28 Jun 2010 14:54:30 +0100
parents 0d37d18ea073
children 7a695ee3884b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
1
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
2 local st = require "util.stanza";
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
3 local httpserver = require "net.httpserver";
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
4 local uuid_new = require "util.uuid".generate;
12
316e8437f233 mod_pastebin: Allow configurable message length threshold
Matthew Wild <mwild1@gmail.com>
parents: 5
diff changeset
5 local os_time = os.time;
24
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
6 local t_insert, t_remove = table.insert, table.remove;
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
7 local add_task = require "util.timer".add_task;
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
8
23
92b1e6592d36 mod_pastebin: Allow per-host pastebin_threshold
Matthew Wild <mwild1@gmail.com>
parents: 21
diff changeset
9 local length_threshold = config.get(module.host, "core", "pastebin_threshold") or 500;
167
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
10 local line_threshold = config.get(module.host, "core", "pastebin_line_threshold") or 4;
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
11
21
4f18696f043a mod_pastebin: Small fix to read the pastebin URL from the config
Matthew Wild <mwild1@gmail.com>
parents: 13
diff changeset
12 local base_url = config.get(module.host, "core", "pastebin_url");
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
13
24
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
14 -- Seconds a paste should live for in seconds (config is in hours), default 24 hours
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
15 local expire_after = math.floor((config.get(module.host, "core", "pastebin_expire_after") or 24) * 3600);
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
16
156
b51741b7e86d mod_pastebin: Optionally bin if message starts with a configurable trigger string
Florian Zeitz <florob@babelmonkeys.de>
parents: 76
diff changeset
17 local trigger_string = config.get(module.host, "core", "pastebin_trigger");
167
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
18 trigger_string = (trigger_string and trigger_string .. " ");
156
b51741b7e86d mod_pastebin: Optionally bin if message starts with a configurable trigger string
Florian Zeitz <florob@babelmonkeys.de>
parents: 76
diff changeset
19
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
20 local pastes = {};
76
1fc4e8dc66a6 mod_pastebin: Send Content-Type header to specify plain UTF-8 text
Matthew Wild <mwild1@gmail.com>
parents: 75
diff changeset
21 local default_headers = { ["Content-Type"] = "text/plain; charset=utf-8" };
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
22
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
23 local xmlns_xhtmlim = "http://jabber.org/protocol/xhtml-im";
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
24 local xmlns_xhtml = "http://www.w3.org/1999/xhtml";
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
25
75
3c7189e26848 mod_pastebin: Rename pastebin_message() to pastebin_text() and make it global so it can be called by other plugins
Matthew Wild <mwild1@gmail.com>
parents: 71
diff changeset
26 function pastebin_text(text)
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
27 local uuid = uuid_new();
76
1fc4e8dc66a6 mod_pastebin: Send Content-Type header to specify plain UTF-8 text
Matthew Wild <mwild1@gmail.com>
parents: 75
diff changeset
28 pastes[uuid] = { body = text, time = os_time(), headers = default_headers };
24
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
29 pastes[#pastes+1] = uuid;
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
30 if not pastes[2] then -- No other pastes, give the timer a kick
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
31 add_task(expire_after, expire_pastes);
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
32 end
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
33 return base_url..uuid;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
34 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
35
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
36 function handle_request(method, body, request)
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
37 local pasteid = request.url.path:match("[^/]+$");
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
38 if not pasteid or not pastes[pasteid] then
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
39 return "Invalid paste id, perhaps it expired?";
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
40 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
41
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
42 --module:log("debug", "Received request, replying: %s", pastes[pasteid].text);
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
43
76
1fc4e8dc66a6 mod_pastebin: Send Content-Type header to specify plain UTF-8 text
Matthew Wild <mwild1@gmail.com>
parents: 75
diff changeset
44 return pastes[pasteid];
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
45 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
46
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
47 function check_message(data)
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
48 local origin, stanza = data.origin, data.stanza;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
49
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
50 local body, bodyindex, htmlindex;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
51 for k,v in ipairs(stanza) do
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
52 if v.name == "body" then
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
53 body, bodyindex = v, k;
71
3c18c2d03bc2 mod_pastebin: Fix finding of XHTML content.
Paul Aurich <paul@darkrain42.org>
parents: 25
diff changeset
54 elseif v.name == "html" and v.attr.xmlns == xmlns_xhtmlim then
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
55 htmlindex = k;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
56 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
57 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
58
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
59 if not body then return; end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
60 body = body:get_text();
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
61
25
ea59a8d98b03 mod_pastebin: Comment some debug logging on every message
Matthew Wild <mwild1@gmail.com>
parents: 24
diff changeset
62 --module:log("debug", "Body(%s) length: %d", type(body), #(body or ""));
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
63
167
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
64 if body and (
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
65 (#body > length_threshold) or
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
66 (trigger_string and body:find(trigger_string, 1, true) == 1) or
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
67 (select(2, body:gsub("\n", "%0")) >= line_threshold)
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
68 ) then
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
69 if trigger_string then
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
70 body = body:gsub("^" .. trigger_string, "", 1);
0d37d18ea073 mod_pastebin: Fix trigger_string matching when no trigger is set, and add support for counting lines (pastebin_line_threshold, default: 4)
Matthew Wild <mwild1@gmail.com>
parents: 156
diff changeset
71 end
75
3c7189e26848 mod_pastebin: Rename pastebin_message() to pastebin_text() and make it global so it can be called by other plugins
Matthew Wild <mwild1@gmail.com>
parents: 71
diff changeset
72 local url = pastebin_text(body);
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
73 module:log("debug", "Pasted message as %s", url);
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
74 --module:log("debug", " stanza[bodyindex] = %q", tostring( stanza[bodyindex]));
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
75 stanza[bodyindex][1] = url;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
76 local html = st.stanza("html", { xmlns = xmlns_xhtmlim }):tag("body", { xmlns = xmlns_xhtml });
189
16d367e3c04e mod_pastebin: Fix to not truncate UTF-8 sequences when creating a snippet summary for XHTML-IM (thanks DaiZW for discovering :) )
Matthew Wild <mwild1@gmail.com>
parents: 167
diff changeset
77 html:tag("p"):text(body:sub(1,150):gsub("[\128-\255]+$", "")):up();
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
78 html:tag("a", { href = url }):text("[...]"):up();
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
79 stanza[htmlindex or #stanza+1] = html;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
80 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
81 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
82
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
83 module:hook("message/bare", check_message);
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
84
24
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
85 function expire_pastes(time)
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
86 time = time or os_time(); -- COMPAT with 0.5
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
87 if pastes[1] then
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
88 pastes[pastes[1]] = nil;
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
89 t_remove(pastes, 1);
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
90 if pastes[1] then
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
91 return (expire_after - (time - pastes[pastes[1]].time)) + 1;
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
92 end
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
93 end
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
94 end
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
95
72bcc0475e2f mod_pastebin: Expire pastes after 'pastebin_expire_after' hours, 24 by default
Matthew Wild <mwild1@gmail.com>
parents: 23
diff changeset
96
5
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
97 local ports = config.get(module.host, "core", "pastebin_ports") or { 5280 };
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
98 for _, options in ipairs(ports) do
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
99 local port, base, ssl, interface = 5280, "pastebin", false, nil;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
100 if type(options) == "number" then
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
101 port = options;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
102 elseif type(options) == "table" then
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
103 port, base, ssl, interface = options.port or 5280, options.path or "pastebin", options.ssl or false, options.interface;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
104 elseif type(options) == "string" then
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
105 base = options;
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
106 end
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
107
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
108 base_url = base_url or ("http://"..module:get_host()..(port ~= 80 and (":"..port) or "").."/"..base.."/");
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
109
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
110 httpserver.new{ port = port, base = base, handler = handle_request, ssl = ssl }
9c1c6c5344dc mod_pastebin: Initial commit
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
111 end