annotate sat/plugins/plugin_exp_lang_detect.py @ 2721:4aaa47f62d8d

core (memory/sqlite): fixed v7 update performance issue: Performance issue was due to bad ordering in table dropping, this has been fixed. "infos" message are also deleted by this update as they are containing only presence data and take a lot of space for a barely useful data. A config option may be available in the future to store presence data in logs.
author Goffi <goffi@goffi.org>
date Tue, 11 Dec 2018 23:53:27 +0100
parents 56f94936df1e
children 003b8b4b56a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
1 #!/usr/bin/env python2
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SAT plugin to detect language (experimental)
2483
0046283a285d dates update
Goffi <goffi@goffi.org>
parents: 2414
diff changeset
5 # Copyright (C) 2009-2018 Jérôme Poisson (goffi@goffi.org)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
19
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
20 from sat.core.i18n import _, D_
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
21 from sat.core.constants import Const as C
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.core.log import getLogger
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
23
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
24 log = getLogger(__name__)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
25 from sat.core import exceptions
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
26
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
27 try:
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
28 from langid.langid import LanguageIdentifier, model
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
29 except ImportError:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
30 raise exceptions.MissingModule(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
31 u'Missing module langid, please download/install it with "pip install langid")'
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
32 )
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
33
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
34 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
35
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
36
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
37 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
38 C.PI_NAME: "Language detection plugin",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
39 C.PI_IMPORT_NAME: "EXP-LANG-DETECT",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
40 C.PI_TYPE: "EXP",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
41 C.PI_PROTOCOLS: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
42 C.PI_DEPENDENCIES: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
43 C.PI_MAIN: "LangDetect",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
44 C.PI_HANDLER: "no",
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
45 C.PI_DESCRIPTION: _("""Detect and set message language when unknown"""),
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
46 }
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
47
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
48 CATEGORY = D_(u"Misc")
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
49 NAME = u"lang_detect"
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
50 LABEL = D_(u"language detection")
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
51 PARAMS = """
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
52 <params>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
53 <individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
54 <category name="{category_name}">
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
55 <param name="{name}" label="{label}" type="bool" value="true" />
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
56 </category>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
57 </individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
58 </params>
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
59 """.format(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
60 category_name=CATEGORY, name=NAME, label=_(LABEL)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
61 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
62
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
63
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
64 class LangDetect(object):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
65 def __init__(self, host):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
66 log.info(_(u"Language detection plugin initialization"))
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
67 self.host = host
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
68 host.memory.updateParams(PARAMS)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
69 host.trigger.add("MessageReceived", self.MessageReceivedTrigger)
2144
1d3f73e065e1 core, jp: component handling + client handling refactoring:
Goffi <goffi@goffi.org>
parents: 2011
diff changeset
70 host.trigger.add("sendMessage", self.MessageSendTrigger)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
71
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
72 def addLanguage(self, mess_data):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
73 message = mess_data["message"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
74 if len(message) == 1 and message.keys()[0] == "":
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
75 msg = message.values()[0]
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
76 lang = identifier.classify(msg)[0]
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
77 mess_data["message"] = {lang: msg}
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
78 return mess_data
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
79
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
80 def MessageReceivedTrigger(self, client, message_elt, post_treat):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
81 """ Check if source is linked and repeat message, else do nothing """
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
82
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
83 lang_detect = self.host.memory.getParamA(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
84 NAME, CATEGORY, profile_key=client.profile
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
85 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
86 if lang_detect:
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
87 post_treat.addCallback(self.addLanguage)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
88 return True
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
89
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
90 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
91 lang_detect = self.host.memory.getParamA(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
92 NAME, CATEGORY, profile_key=client.profile
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
93 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
94 if lang_detect:
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
95 self.addLanguage(data)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
96 return True