annotate libervia/backend/plugins/plugin_exp_lang_detect.py @ 4293:9447796408f6

tests (unit): add test for XEP-0298 plugin + fix XEP-0167: fix 447
author Goffi <goffi@goffi.org>
date Mon, 29 Jul 2024 03:49:26 +0200
parents 0d7bb4df2343
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
1 #!/usr/bin/env python3
3137
559a625a236b fixed shebangs
Goffi <goffi@goffi.org>
parents: 3136
diff changeset
2
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SAT plugin to detect language (experimental)
3479
be6d91572633 date update
Goffi <goffi@goffi.org>
parents: 3172
diff changeset
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
19
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4051
diff changeset
20 from libervia.backend.core.i18n import _, D_
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4051
diff changeset
21 from libervia.backend.core.constants import Const as C
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4051
diff changeset
22 from libervia.backend.core.log import getLogger
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
23
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
24 log = getLogger(__name__)
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4051
diff changeset
25 from libervia.backend.core import exceptions
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
26
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
27 try:
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
28 from langid.langid import LanguageIdentifier, model
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
29 except ImportError:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
30 raise exceptions.MissingModule(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
31 'Missing module langid, please download/install it with "pip install langid")'
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
32 )
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
33
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
34 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
35
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
36
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
37 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
38 C.PI_NAME: "Language detection plugin",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
39 C.PI_IMPORT_NAME: "EXP-LANG-DETECT",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
40 C.PI_TYPE: "EXP",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
41 C.PI_PROTOCOLS: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
42 C.PI_DEPENDENCIES: [],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
43 C.PI_MAIN: "LangDetect",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2144
diff changeset
44 C.PI_HANDLER: "no",
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
45 C.PI_DESCRIPTION: _("""Detect and set message language when unknown"""),
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
46 }
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
47
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
48 CATEGORY = D_("Misc")
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
49 NAME = "lang_detect"
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
50 LABEL = D_("language detection")
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
51 PARAMS = """
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
52 <params>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
53 <individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
54 <category name="{category_name}">
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
55 <param name="{name}" label="{label}" type="bool" value="true" />
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
56 </category>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
57 </individual>
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
58 </params>
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
59 """.format(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
60 category_name=CATEGORY, name=NAME, label=_(LABEL)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
61 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
62
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
63
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
64 class LangDetect(object):
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
65 def __init__(self, host):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
66 log.info(_("Language detection plugin initialization"))
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
67 self.host = host
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
68 host.memory.update_params(PARAMS)
4051
c23cad65ae99 core: renamed `messageReceived` trigger to `message_received`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
69 host.trigger.add("message_received", self.message_received_trigger)
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
70 host.trigger.add("sendMessage", self.message_send_trigger)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
71
4022
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
72 def add_language(self, mess_data):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
73 message = mess_data["message"]
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
74 if len(message) == 1 and list(message.keys())[0] == "":
4022
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
75 msg = list(message.values())[0].strip()
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
76 if msg:
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
77 lang = identifier.classify(msg)[0]
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
78 mess_data["message"] = {lang: msg}
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
79 return mess_data
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
80
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
81 def message_received_trigger(self, client, message_elt, post_treat):
4270
0d7bb4df2343 Reformatted code base using black.
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
82 """Check if source is linked and repeat message, else do nothing"""
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
83
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
84 lang_detect = self.host.memory.param_get_a(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
85 NAME, CATEGORY, profile_key=client.profile
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
86 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
87 if lang_detect:
4022
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
88 post_treat.addCallback(self.add_language)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
89 return True
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
90
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
91 def message_send_trigger(self, client, data, pre_xml_treatments, post_xml_treatments):
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 4022
diff changeset
92 lang_detect = self.host.memory.param_get_a(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
93 NAME, CATEGORY, profile_key=client.profile
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
94 )
2011
d95a6d553bec plugin lang detect: added a parameter to (de)activate the detection
Goffi <goffi@goffi.org>
parents: 1965
diff changeset
95 if lang_detect:
4022
cdb7de398c85 plugin lang detect: don't detect the language if the body is empty
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
96 self.add_language(data)
1965
4c5d8cd35690 plugin exp_lang_detect: language detection plugin, first draft
Goffi <goffi@goffi.org>
parents:
diff changeset
97 return True