comparison src/plugins/plugin_exp_lang_detect.py @ 1965:4c5d8cd35690

plugin exp_lang_detect: language detection plugin, first draft This plugin try to autodetect outgoing and ingoing messages language when it is not specified by sender
author Goffi <goffi@goffi.org>
date Mon, 20 Jun 2016 18:44:27 +0200
parents
children d95a6d553bec
comparison
equal deleted inserted replaced
1964:a86e41d9245d 1965:4c5d8cd35690
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
3
4 # SAT plugin to detect language (experimental)
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from sat.core.i18n import _
21 from sat.core.log import getLogger
22 log = getLogger(__name__)
23 from sat.core import exceptions
24
25 try:
26 from langid.langid import LanguageIdentifier, model
27 except ImportError:
28 raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")')
29
30 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)
31
32
33 PLUGIN_INFO = {
34 "name": "Language detection plugin",
35 "import_name": "EXP-LANG-DETECT",
36 "type": "EXP",
37 "protocols": [],
38 "dependencies": [],
39 "main": "LangDetect",
40 "handler": "no",
41 "description": _("""Detect and set message language when unknown""")
42 }
43
44
45 class LangDetect(object):
46
47 def __init__(self, host):
48 log.info(_(u"Language detection plugin initialization"))
49 self.host = host
50 host.trigger.add("MessageReceived", self.MessageReceivedTrigger)
51 host.trigger.add("messageSend", self.MessageSendTrigger)
52
53 def addLanguage(self, mess_data):
54 message = mess_data['message']
55 if len(message) == 1 and message.keys()[0] == '':
56 msg = message.values()[0]
57 lang = identifier.classify(msg)[0]
58 mess_data["message"] = {lang: msg}
59 return mess_data
60
61 def MessageReceivedTrigger(self, client, message_elt, post_treat):
62 """ Check if source is linked and repeat message, else do nothing """
63 post_treat.addCallback(self.addLanguage)
64 return True
65
66 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
67 self.addLanguage(data)
68 return True