view src/plugins/plugin_exp_lang_detect.py @ 2144:1d3f73e065e1

core, jp: component handling + client handling refactoring: - SàT can now handle components - plugin have now a "modes" key in PLUGIN_INFO where they declare if they can be used with clients and or components. They default to be client only. - components are really similar to clients, but with some changes in behaviour: * component has "entry point", which is a special plugin with a componentStart method, which is called just after component is connected * trigger end with a different suffixes (e.g. profileConnected vs profileConnectedComponent), so a plugin which manage both clients and components can have different workflow * for clients, only triggers of plugins handling client mode are launched * for components, only triggers of plugins needed in dependencies are launched. They all must handle component mode. * component have a sendHistory attribute (False by default) which can be set to True to allow saving sent messages into history * for convenience, "client" is still used in method even if it can now be a component * a new "component" boolean attribute tells if we have a component or a client * components have to add themselve Message protocol * roster and presence protocols are not added for components * component default port is 5347 (which is Prosody's default port) - asyncCreateProfile has been renamed for profileCreate, both to follow new naming convention and to prepare the transition to fully asynchronous bridge - createProfile has a new "component" attribute. When used to create a component, it must be set to a component entry point - jp: added --component argument to profile/create - disconnect bridge method is now asynchronous, this way frontends can know when disconnection is finished - new PI_* constants for PLUGIN_INFO values (not used everywhere yet) - client/component connection workflow has been moved to their classes instead of being a host methods - host.messageSend is now client.sendMessage, and former client.sendMessage is now client.sendMessageData. - identities are now handled in client.identities list, so it can be updated dynamically by plugins (in the future, frontends should be able to update them too through bridge) - profileConnecting* profileConnected* profileDisconnected* and getHandler now all use client instead of profile
author Goffi <goffi@goffi.org>
date Sun, 12 Feb 2017 17:55:43 +0100
parents d95a6d553bec
children 33c8c4973743
line wrap: on
line source

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

# SAT plugin to detect language (experimental)
# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat.core.i18n import _, D_
from sat.core.log import getLogger
log = getLogger(__name__)
from sat.core import exceptions

try:
    from langid.langid import LanguageIdentifier, model
except ImportError:
    raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")')

identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)


PLUGIN_INFO = {
    "name": "Language detection plugin",
    "import_name": "EXP-LANG-DETECT",
    "type": "EXP",
    "protocols": [],
    "dependencies": [],
    "main": "LangDetect",
    "handler": "no",
    "description": _("""Detect and set message language when unknown""")
}

CATEGORY = D_(u"Misc")
NAME = u"lang_detect"
LABEL = D_(u"language detection")
PARAMS = """
    <params>
    <individual>
    <category name="{category_name}">
        <param name="{name}" label="{label}" type="bool" value="true" />
    </category>
    </individual>
    </params>
    """.format(category_name=CATEGORY,
               name=NAME,
               label=_(LABEL),
              )


class LangDetect(object):

    def __init__(self, host):
        log.info(_(u"Language detection plugin initialization"))
        self.host = host
        host.memory.updateParams(PARAMS)
        host.trigger.add("MessageReceived", self.MessageReceivedTrigger)
        host.trigger.add("sendMessage", self.MessageSendTrigger)

    def addLanguage(self, mess_data):
        message = mess_data['message']
        if len(message) == 1 and message.keys()[0] == '':
            msg = message.values()[0]
            lang = identifier.classify(msg)[0]
            mess_data["message"] = {lang: msg}
        return mess_data

    def MessageReceivedTrigger(self, client, message_elt, post_treat):
        """ Check if source is linked and repeat message, else do nothing  """

        lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile)
        if lang_detect:
            post_treat.addCallback(self.addLanguage)
        return True

    def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
        lang_detect = self.host.memory.getParamA(NAME, CATEGORY, profile_key=client.profile)
        if lang_detect:
            self.addLanguage(data)
        return True