# HG changeset patch # User Goffi # Date 1466441067 -7200 # Node ID 4c5d8cd35690ee416ebaf040d8009782ee1516b5 # Parent a86e41d9245dfe77d959e4b8f1d2053407bbe225 plugin exp_lang_detect: language detection plugin, first draft This plugin try to autodetect outgoing and ingoing messages language when it is not specified by sender diff -r a86e41d9245d -r 4c5d8cd35690 src/plugins/plugin_exp_lang_detect.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/plugin_exp_lang_detect.py Mon Jun 20 18:44:27 2016 +0200 @@ -0,0 +1,68 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +# SAT plugin to detect language (experimental) +# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from sat.core.i18n import _ +from sat.core.log import getLogger +log = getLogger(__name__) +from sat.core import exceptions + +try: + from langid.langid import LanguageIdentifier, model +except ImportError: + raise exceptions.MissingModule(u'Missing module langid, please download/install it with "pip install langid")') + +identifier = LanguageIdentifier.from_modelstring(model, norm_probs=False) + + +PLUGIN_INFO = { + "name": "Language detection plugin", + "import_name": "EXP-LANG-DETECT", + "type": "EXP", + "protocols": [], + "dependencies": [], + "main": "LangDetect", + "handler": "no", + "description": _("""Detect and set message language when unknown""") +} + + +class LangDetect(object): + + def __init__(self, host): + log.info(_(u"Language detection plugin initialization")) + self.host = host + host.trigger.add("MessageReceived", self.MessageReceivedTrigger) + host.trigger.add("messageSend", self.MessageSendTrigger) + + def addLanguage(self, mess_data): + message = mess_data['message'] + if len(message) == 1 and message.keys()[0] == '': + msg = message.values()[0] + lang = identifier.classify(msg)[0] + mess_data["message"] = {lang: msg} + return mess_data + + def MessageReceivedTrigger(self, client, message_elt, post_treat): + """ Check if source is linked and repeat message, else do nothing """ + post_treat.addCallback(self.addLanguage) + return True + + def MessageSendTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): + self.addLanguage(data) + return True