Mercurial > libervia-backend
annotate src/plugins/plugin_exp_lang_detect.py @ 2444:30278ea1ca7c
plugin XEP-0060: added node watching methods to bridge:
new methods psNodeWatchAdd and psNodeWatchRemove allows to set a watch for the time of the session on one node, to have a signal called when something change on this node.
This signal (psEventRaw) send raw data (raw XML), in opposition to psEvent which is there to send high level data (e.g. parsed blog data).
Those method are primarely there to let frontends manage local cache for pubsub nodes.
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 19 Nov 2017 16:51:39 +0100 |
parents | 8b37a62336c3 |
children | 0046283a285d |
rev | line source |
---|---|
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
1 #!/usr/bin/env python2 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin to detect language (experimental) |
2414
8b37a62336c3
misc: date update (yes it's a bit late :p )
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
5 # Copyright (C) 2009-2017 Jérôme Poisson (goffi@goffi.org) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
20 from sat.core.i18n import _, D_ |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
21 from sat.core.constants import Const as C |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from sat.core.log import getLogger |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
23 log = getLogger(__name__) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 from sat.core import exceptions |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 try: |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from langid.langid import LanguageIdentifier, model |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 except ImportError: |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")') |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
35 C.PI_NAME: "Language detection plugin", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
36 C.PI_IMPORT_NAME: "EXP-LANG-DETECT", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
37 C.PI_TYPE: "EXP", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
38 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
39 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
40 C.PI_MAIN: "LangDetect", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
41 C.PI_HANDLER: "no", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
42 C.PI_DESCRIPTION: _("""Detect and set message language when unknown""") |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 } |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
45 CATEGORY = D_(u"Misc") |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
46 NAME = u"lang_detect" |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
47 LABEL = D_(u"language detection") |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
48 PARAMS = """ |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
49 <params> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
50 <individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
51 <category name="{category_name}"> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
52 <param name="{name}" label="{label}" type="bool" value="true" /> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
53 </category> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
54 </individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
55 </params> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
56 """.format(category_name=CATEGORY, |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
57 name=NAME, |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
58 label=_(LABEL), |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
59 ) |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
60 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 class LangDetect(object): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 def __init__(self, host): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 log.info(_(u"Language detection plugin initialization")) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 self.host = host |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
67 host.memory.updateParams(PARAMS) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 host.trigger.add("MessageReceived", self.MessageReceivedTrigger) |
2144
1d3f73e065e1
core, jp: component handling + client handling refactoring:
Goffi <goffi@goffi.org>
parents:
2011
diff
changeset
|
69 host.trigger.add("sendMessage", self.MessageSendTrigger) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 def addLanguage(self, mess_data): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 message = mess_data['message'] |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 if len(message) == 1 and message.keys()[0] == '': |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 msg = message.values()[0] |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 lang = identifier.classify(msg)[0] |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 mess_data["message"] = {lang: msg} |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 return mess_data |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 def MessageReceivedTrigger(self, client, message_elt, post_treat): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
80 """ Check if source is linked and repeat message, else do nothing """ |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
81 |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
82 lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile) |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
83 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
84 post_treat.addCallback(self.addLanguage) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 return True |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
86 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
87 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
88 lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile) |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
89 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
90 self.addLanguage(data) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
91 return True |