Mercurial > libervia-backend
annotate sat/plugins/plugin_exp_lang_detect.py @ 3888:aa7197b67c26
component AP gateway: AP <=> XMPP reactions conversions:
- Pubsub Attachments plugin has been renamed to XEP-0470 following publication
- XEP-0470 has been updated to follow 0.2 changes
- AP reactions (as implemented in Pleroma) are converted to XEP-0470
- XEP-0470 events are converted to AP reactions (again, using "EmojiReact" from Pleroma)
- AP activities related to attachments (like/reactions) are cached in Libervia because
it's not possible to retrieve them from Pleroma instances once they have been emitted
(doing an HTTP get on their ID returns a 404). For now those cache are not flushed, this
should be improved in the future.
- `sharedInbox` is used when available. Pleroma returns a 500 HTTP error when ``to`` or
``cc`` are used in a direct inbox.
- reactions and like are not currently used for direct messages, because they can't be
emitted from Pleroma in this case, thus there is no point in implementing them for the
moment.
rel 371
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 31 Aug 2022 17:07:03 +0200 |
parents | be6d91572633 |
children | cdb7de398c85 |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin to detect language (experimental) |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
20 from sat.core.i18n import _, D_ |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
21 from sat.core.constants import Const as C |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from sat.core.log import getLogger |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
23 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 log = getLogger(__name__) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from sat.core import exceptions |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 try: |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 from langid.langid import LanguageIdentifier, model |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
30 raise exceptions.MissingModule( |
3028 | 31 'Missing module langid, please download/install it with "pip install langid")' |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
32 ) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False) |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
38 C.PI_NAME: "Language detection plugin", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
39 C.PI_IMPORT_NAME: "EXP-LANG-DETECT", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
40 C.PI_TYPE: "EXP", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
41 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
42 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
43 C.PI_MAIN: "LangDetect", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2144
diff
changeset
|
44 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
45 C.PI_DESCRIPTION: _("""Detect and set message language when unknown"""), |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 } |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 |
3028 | 48 CATEGORY = D_("Misc") |
49 NAME = "lang_detect" | |
50 LABEL = D_("language detection") | |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
51 PARAMS = """ |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
52 <params> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
53 <individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
54 <category name="{category_name}"> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
55 <param name="{name}" label="{label}" type="bool" value="true" /> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
56 </category> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
57 </individual> |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
58 </params> |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 """.format( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 category_name=CATEGORY, name=NAME, label=_(LABEL) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
62 |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 class LangDetect(object): |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 def __init__(self, host): |
3028 | 66 log.info(_("Language detection plugin initialization")) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 self.host = host |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
68 host.memory.updateParams(PARAMS) |
3172
dcebc585c29f
core: renamed "MessageReceived" trigger to "messageReceived" for consistency.
Goffi <goffi@goffi.org>
parents:
3137
diff
changeset
|
69 host.trigger.add("messageReceived", self.messageReceivedTrigger) |
2144
1d3f73e065e1
core, jp: component handling + client handling refactoring:
Goffi <goffi@goffi.org>
parents:
2011
diff
changeset
|
70 host.trigger.add("sendMessage", self.MessageSendTrigger) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 def addLanguage(self, mess_data): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 message = mess_data["message"] |
3028 | 74 if len(message) == 1 and list(message.keys())[0] == "": |
75 msg = list(message.values())[0] | |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 lang = identifier.classify(msg)[0] |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 mess_data["message"] = {lang: msg} |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 return mess_data |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 |
3172
dcebc585c29f
core: renamed "MessageReceived" trigger to "messageReceived" for consistency.
Goffi <goffi@goffi.org>
parents:
3137
diff
changeset
|
80 def messageReceivedTrigger(self, client, message_elt, post_treat): |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 """ Check if source is linked and repeat message, else do nothing """ |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
82 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 lang_detect = self.host.memory.getParamA( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 NAME, CATEGORY, profile_key=client.profile |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
86 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
87 post_treat.addCallback(self.addLanguage) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 return True |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 |
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
90 def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 lang_detect = self.host.memory.getParamA( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 NAME, CATEGORY, profile_key=client.profile |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 ) |
2011
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
94 if lang_detect: |
d95a6d553bec
plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents:
1965
diff
changeset
|
95 self.addLanguage(data) |
1965
4c5d8cd35690
plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 return True |