Mercurial > libervia-backend
annotate sat/plugins/plugin_exp_lang_detect.py @ 4011:74d4c9ff893d
plugin XEP-0277: when publisher is not found, check ancestor in both client and component namespaces
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 16 Mar 2023 16:43:47 +0100 |
parents | be6d91572633 |
children | cdb7de398c85 |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin to detect language (experimental) |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
20 from sat.core.i18n import _, D_ |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
21 from sat.core.constants import Const as C |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from sat.core.log import getLogger |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
23 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 log = getLogger(__name__) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from sat.core import exceptions |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 try: |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 from langid.langid import LanguageIdentifier, model |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
30 raise exceptions.MissingModule( |
3028 | 31 'Missing module langid, please download/install it with "pip install langid")' |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
32 ) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
38 C.PI_NAME: "Language detection plugin", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
39 C.PI_IMPORT_NAME: "EXP-LANG-DETECT", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
40 C.PI_TYPE: "EXP", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
41 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
42 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
43 C.PI_MAIN: "LangDetect", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
44 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
45 C.PI_DESCRIPTION: _("""Detect and set message language when unknown"""), |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 } |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 |
3028 | 48 CATEGORY = D_("Misc") |
49 NAME = "lang_detect" | |
50 LABEL = D_("language detection") | |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
51 PARAMS = """ |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
52 <params> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
53 <individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
54 <category name="{category_name}"> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
55 <param name="{name}" label="{label}" type="bool" value="true" /> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
56 </category> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
57 </individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
58 </params> |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 """.format( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 category_name=CATEGORY, name=NAME, label=_(LABEL) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
62 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 class LangDetect(object): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 def __init__(self, host): |
3028 | 66 log.info(_("Language detection plugin initialization")) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 self.host = host |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
68 host.memory.updateParams(PARAMS) |
3172
dcebc585c29f
core: renamed "MessageReceived" trigger to "messageReceived" for consistency.
Goffi <goffi@goffi.org>
parents:
3137
diff
changeset
|
69 host.trigger.add("messageReceived", self.messageReceivedTrigger) |
2144
1d3f73e065e1
core, jp: component handling + client handling refactoring:
Goffi <goffi@goffi.org>
parents:
2011
diff
changeset
|
70 host.trigger.add("sendMessage", self.MessageSendTrigger) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 def addLanguage(self, mess_data): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 message = mess_data["message"] |
3028 | 74 if len(message) == 1 and list(message.keys())[0] == "": |
75 msg = list(message.values())[0] | |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 lang = identifier.classify(msg)[0] |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 mess_data["message"] = {lang: msg} |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 return mess_data |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 |
3172
dcebc585c29f
core: renamed "MessageReceived" trigger to "messageReceived" for consistency.
Goffi <goffi@goffi.org>
parents:
3137
diff
changeset
|
80 def messageReceivedTrigger(self, client, message_elt, post_treat): |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 """ Check if source is linked and repeat message, else do nothing """ |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
82 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 lang_detect = self.host.memory.getParamA( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 NAME, CATEGORY, profile_key=client.profile |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
86 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
87 post_treat.addCallback(self.addLanguage) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 return True |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
90 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 lang_detect = self.host.memory.getParamA( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 NAME, CATEGORY, profile_key=client.profile |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
94 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
95 self.addLanguage(data) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 return True |