annotate src/plugins/plugin_exp_lang_detect.py @ 2182:087eec4c6c07

memory (persistent, sqlite): better private values handling + new LazyPersistentBinaryDict: - merged private values method handling in sqlite, and added a keys arguments to get only some keys - LazyPersistentBinaryDict allow to get values in database only when they are needed, saving memory for big data
author Goffi <goffi@goffi.org>
date Sun, 12 Mar 2017 19:33:17 +0100
parents 33c8c4973743
children 8b37a62336c3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
1 #!/usr/bin/env python2
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SAT plugin to detect language (experimental)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
19
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
20 from sat.core.i18n import _, D_
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
21 from sat.core.constants import Const as C
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.core.log import getLogger
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
23 log = getLogger(__name__)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
24 from sat.core import exceptions
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
25
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
26 try:
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from langid.langid import LanguageIdentifier, model
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
28 except ImportError:
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
29 raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")')
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
30
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
31 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
32
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
33
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
34 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
35 C.PI_NAME: "Language detection plugin",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
36 C.PI_IMPORT_NAME: "EXP-LANG-DETECT",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
37 C.PI_TYPE: "EXP",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
38 C.PI_PROTOCOLS: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
39 C.PI_DEPENDENCIES: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
40 C.PI_MAIN: "LangDetect",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
41 C.PI_HANDLER: "no",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
42 C.PI_DESCRIPTION: _("""Detect and set message language when unknown""")
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
43 }
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
44
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
45 CATEGORY = D_(u"Misc")
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
46 NAME = u"lang_detect"
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
47 LABEL = D_(u"language detection")
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
48 PARAMS = """
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
49 <params>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
50 <individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
51 <category name="{category_name}">
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
52 <param name="{name}" label="{label}" type="bool" value="true" />
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
53 </category>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
54 </individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
55 </params>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
56 """.format(category_name=CATEGORY,
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
57 name=NAME,
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
58 label=_(LABEL),
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
59 )
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
60
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
61
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
62 class LangDetect(object):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
63
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
64 def __init__(self, host):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
65 log.info(_(u"Language detection plugin initialization"))
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
66 self.host = host
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
67 host.memory.updateParams(PARAMS)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
68 host.trigger.add("MessageReceived", self.MessageReceivedTrigger)
2144
1d3f73e065e1 core, jp: component handling + client handling refactoring:
Goffi <goffi@goffi.org>
parents: 2011
diff changeset
69 host.trigger.add("sendMessage", self.MessageSendTrigger)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
70
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
71 def addLanguage(self, mess_data):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
72 message = mess_data['message']
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
73 if len(message) == 1 and message.keys()[0] == '':
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
74 msg = message.values()[0]
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
75 lang = identifier.classify(msg)[0]
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
76 mess_data["message"] = {lang: msg}
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
77 return mess_data
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
78
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
79 def MessageReceivedTrigger(self, client, message_elt, post_treat):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
80 """ Check if source is linked and repeat message, else do nothing """
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
81
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
82 lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile)
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
83 if lang_detect:
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
84 post_treat.addCallback(self.addLanguage)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
85 return True
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
86
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
87 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
88 lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile)
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
89 if lang_detect:
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
90 self.addLanguage(data)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
91 return True