view sat/plugins/plugin_exp_lang_detect.py @ 3922:0ff265725489

plugin XEP-0447: handle attachment and download: - plugin XEP-0447 can now be used in message attachments and to retrieve an attachment - plugin attach: `attachment` being processed is added to `extra` so the handler can inspect it - plugin attach: `size` is added to attachment - plugin download: a whole attachment dict is now used in `download` and `file_download`/`file_download_complete`. `download_uri` can be used as a shortcut when just a URI is used. In addition to URI scheme handler, whole attachment handlers can now be registered with `register_download_handler` - plugin XEP-0363: `file_http_upload` `XEP-0363_upload_size` triggers have been renamed to `XEP-0363_upload_pre_slot` and is now using a dict with arguments, allowing for the size but also the filename to be modified, which is necessary for encryption (filename may be hidden from URL this way). - plugin XEP-0446: fix wrong element name - plugin XEP-0447: source handler can now be registered (`url-data` is registered by default) - plugin XEP-0447: source parsing has been put in a separated `parse_sources_elt` method, as it may be useful to do it independently (notably with XEP-0448) - plugin XEP-0447: parse received message and complete attachments when suitable - plugin XEP-0447: can now be used with message attachments - plugin XEP-0447: can now be used with attachments download - renamed `options` arguments to `extra` for consistency - some style change (progressive move from legacy camelCase to PEP8 snake_case) - some typing rel 379
author Goffi <goffi@goffi.org>
date Thu, 06 Oct 2022 16:02:05 +0200
parents be6d91572633
children cdb7de398c85
line wrap: on
line source

#!/usr/bin/env python3


# SAT plugin to detect language (experimental)
# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat.core.i18n import _, D_
from sat.core.constants import Const as C
from sat.core.log import getLogger

log = getLogger(__name__)
from sat.core import exceptions

try:
    from langid.langid import LanguageIdentifier, model
except ImportError:
    raise exceptions.MissingModule(
        'Missing module langid, please download/install it with "pip install langid")'
    )

identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)


PLUGIN_INFO = {
    C.PI_NAME: "Language detection plugin",
    C.PI_IMPORT_NAME: "EXP-LANG-DETECT",
    C.PI_TYPE: "EXP",
    C.PI_PROTOCOLS: [],
    C.PI_DEPENDENCIES: [],
    C.PI_MAIN: "LangDetect",
    C.PI_HANDLER: "no",
    C.PI_DESCRIPTION: _("""Detect and set message language when unknown"""),
}

CATEGORY = D_("Misc")
NAME = "lang_detect"
LABEL = D_("language detection")
PARAMS = """
    <params>
    <individual>
    <category name="{category_name}">
        <param name="{name}" label="{label}" type="bool" value="true" />
    </category>
    </individual>
    </params>
    """.format(
    category_name=CATEGORY, name=NAME, label=_(LABEL)
)


class LangDetect(object):
    def __init__(self, host):
        log.info(_("Language detection plugin initialization"))
        self.host = host
        host.memory.updateParams(PARAMS)
        host.trigger.add("messageReceived", self.messageReceivedTrigger)
        host.trigger.add("sendMessage", self.MessageSendTrigger)

    def addLanguage(self, mess_data):
        message = mess_data["message"]
        if len(message) == 1 and list(message.keys())[0] == "":
            msg = list(message.values())[0]
            lang = identifier.classify(msg)[0]
            mess_data["message"] = {lang: msg}
        return mess_data

    def messageReceivedTrigger(self, client, message_elt, post_treat):
        """ Check if source is linked and repeat message, else do nothing  """

        lang_detect = self.host.memory.getParamA(
            NAME, CATEGORY, profile_key=client.profile
        )
        if lang_detect:
            post_treat.addCallback(self.addLanguage)
        return True

    def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
        lang_detect = self.host.memory.getParamA(
            NAME, CATEGORY, profile_key=client.profile
        )
        if lang_detect:
            self.addLanguage(data)
        return True