view src/plugins/plugin_exp_lang_detect.py @ 1965:4c5d8cd35690

plugin exp_lang_detect: language detection plugin, first draft This plugin try to autodetect outgoing and ingoing messages language when it is not specified by sender
author Goffi <goffi@goffi.org>
date Mon, 20 Jun 2016 18:44:27 +0200
parents
children d95a6d553bec
line wrap: on
line source

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

# SAT plugin to detect language (experimental)
# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat.core.i18n import _
from sat.core.log import getLogger
log = getLogger(__name__)
from sat.core import exceptions

try:
    from langid.langid import LanguageIdentifier, model
except ImportError:
    raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")')

identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False)


PLUGIN_INFO = {
    "name": "Language detection plugin",
    "import_name": "EXP-LANG-DETECT",
    "type": "EXP",
    "protocols": [],
    "dependencies": [],
    "main": "LangDetect",
    "handler": "no",
    "description": _("""Detect and set message language when unknown""")
}


class LangDetect(object):

    def __init__(self, host):
        log.info(_(u"Language detection plugin initialization"))
        self.host = host
        host.trigger.add("MessageReceived", self.MessageReceivedTrigger)
        host.trigger.add("messageSend", self.MessageSendTrigger)

    def addLanguage(self, mess_data):
        message = mess_data['message']
        if len(message) == 1 and message.keys()[0] == '':
            msg = message.values()[0]
            lang = identifier.classify(msg)[0]
            mess_data["message"] = {lang: msg}
        return mess_data

    def MessageReceivedTrigger(self, client, message_elt, post_treat):
        """ Check if source is linked and repeat message, else do nothing  """
        post_treat.addCallback(self.addLanguage)
        return True

    def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
        self.addLanguage(data)
        return True