comparison mod_pastebin/mod_pastebin.lua @ 190:7a695ee3884b

mod_pastebin: Better fix for stripping truncated UFT-8 sequences
author Matthew Wild <mwild1@gmail.com>
date Mon, 28 Jun 2010 15:34:28 +0100
parents 16d367e3c04e
children 7f0cdde1e42a
comparison
equal deleted inserted replaced
189:16d367e3c04e 190:7a695ee3884b
3 local httpserver = require "net.httpserver"; 3 local httpserver = require "net.httpserver";
4 local uuid_new = require "util.uuid".generate; 4 local uuid_new = require "util.uuid".generate;
5 local os_time = os.time; 5 local os_time = os.time;
6 local t_insert, t_remove = table.insert, table.remove; 6 local t_insert, t_remove = table.insert, table.remove;
7 local add_task = require "util.timer".add_task; 7 local add_task = require "util.timer".add_task;
8
9 local function drop_invalid_utf8(seq)
10 local start = seq:byte();
11 module:log("utf8: %d, %d", start, #seq);
12 if (start <= 223 and #seq < 2)
13 or (start >= 224 and start <= 239 and #seq < 3)
14 or (start >= 240 and start <= 244 and #seq < 4)
15 or (start > 244) then
16 return "";
17 end
18 return seq;
19 end
8 20
9 local length_threshold = config.get(module.host, "core", "pastebin_threshold") or 500; 21 local length_threshold = config.get(module.host, "core", "pastebin_threshold") or 500;
10 local line_threshold = config.get(module.host, "core", "pastebin_line_threshold") or 4; 22 local line_threshold = config.get(module.host, "core", "pastebin_line_threshold") or 4;
11 23
12 local base_url = config.get(module.host, "core", "pastebin_url"); 24 local base_url = config.get(module.host, "core", "pastebin_url");
72 local url = pastebin_text(body); 84 local url = pastebin_text(body);
73 module:log("debug", "Pasted message as %s", url); 85 module:log("debug", "Pasted message as %s", url);
74 --module:log("debug", " stanza[bodyindex] = %q", tostring( stanza[bodyindex])); 86 --module:log("debug", " stanza[bodyindex] = %q", tostring( stanza[bodyindex]));
75 stanza[bodyindex][1] = url; 87 stanza[bodyindex][1] = url;
76 local html = st.stanza("html", { xmlns = xmlns_xhtmlim }):tag("body", { xmlns = xmlns_xhtml }); 88 local html = st.stanza("html", { xmlns = xmlns_xhtmlim }):tag("body", { xmlns = xmlns_xhtml });
77 html:tag("p"):text(body:sub(1,150):gsub("[\128-\255]+$", "")):up(); 89 html:tag("p"):text(body:sub(1,150):gsub("[\194-\244][\128-\191]*$", drop_invalid_utf8)):up();
78 html:tag("a", { href = url }):text("[...]"):up(); 90 html:tag("a", { href = url }):text("[...]"):up();
79 stanza[htmlindex or #stanza+1] = html; 91 stanza[htmlindex or #stanza+1] = html;
80 end 92 end
81 end 93 end
82 94