Mercurial > libervia-backend
diff src/plugins/plugin_xep_0277.py @ 1446:e8c8e467964b
plugins xep-0060, xep-0277: code simplification/cleaning/fix:
- plugin xep-0060: moved rsm data to a more general metadata dict, which will contain all data relative to the node/items set. RSM metadata are prefixed with "rsm_"
- plugin xep-0060: minor docstring fixes
- plugin xep-0060: removed cache to simplify code base
- fixed broken getLastMicroblogs
- added _getLastMicroblogs as wrapper to getLastMicroblogs, for bridge
- removed lxml dependecy for this plugin, use native twisted instead
- several improvments/fixes in item2mbdata
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 15 Aug 2015 22:13:27 +0200 |
parents | 16b1ba7ccaaa |
children | 7797dda847ae |
line wrap: on
line diff
--- a/src/plugins/plugin_xep_0277.py Wed Jul 22 11:42:37 2015 +0200 +++ b/src/plugins/plugin_xep_0277.py Sat Aug 15 22:13:27 2015 +0200 @@ -23,20 +23,21 @@ log = getLogger(__name__) from twisted.words.protocols.jabber import jid from twisted.internet import defer +from twisted.python import failure from sat.core import exceptions from sat.tools.xml_tools import ElementParser from wokkel import pubsub from feed import atom, date -from lxml import etree import uuid from time import time import urlparse from cgi import escape NS_MICROBLOG = 'urn:xmpp:microblog:0' +NS_ATOM = 'http://www.w3.org/2005/Atom' NS_XHTML = 'http://www.w3.org/1999/xhtml' -NS_PUBSUB = 'http://jabber.org/protocol/pubsub' +NS_PUBSUB_EVENT = "{}{}".format(pubsub.NS_PUBSUB, "#event") PLUGIN_INFO = { "name": "Microblogging over XMPP Plugin", @@ -60,10 +61,10 @@ def __init__(self, host): log.info(_("Microblogging plugin initialization")) self.host = host - self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog) + self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog, notify=False) host.bridge.addMethod("getLastMicroblogs", ".plugin", - in_sign='sis', out_sign='aa{ss}', - method=self.getLastMicroblogs, + in_sign='sis', out_sign='(aa{ss}a{ss})', + method=self._getLastMicroblogs, async=True, doc={'summary': 'retrieve items', 'param_0': 'jid: publisher of wanted microblog', @@ -87,16 +88,6 @@ ## data/item transformation ## - def _getDomishInnerContent(self, elt): - """Return the inner content of a domish.Element.""" - result = '' - for child in elt.children: - try: - result += child.toXml() # child id a domish.Element - except AttributeError: - result += child # child is unicode - return result - def _removeXHTMLMarkups(self, xhtml): """Remove XHTML markups from the given string. @@ -109,107 +100,131 @@ False) @defer.inlineCallbacks - def item2mbdata(self, item): + def item2mbdata(self, item_elt): """Convert an XML Item to microblog data used in bridge API - @param item: domish.Element of microblog item + @param item_elt: domish.Element of microblog item @return: microblog data (dictionary) """ - - def xpath(elt, path): - """Return the XPATH result of an entry element or its descendance.""" - # XXX: use a wildcard to work with all and even undefined namespaces - return elt.xpath('/'.join(["*[local-name() = '%s']" % tag for tag in path.split('/')])) - - def date2float(elt, path): - """Convert a date string to float without dealing with the date format.""" - return unicode(date.rfc3339.tf_from_timestamp(xpath(elt, path)[0].text)) - - item_elt = etree.fromstring(item.toXml().encode('utf-8')) - item_id = item_elt.get('id', '') - - # XXX: when you raise an exception from inline callbacks, do defer.returnValue(Exception()) - # to make it catchable by an eventual errback. If you do raise Exception, raise Exception() - # or defer.returnValue(Exception), it will explode and then the normal callback is ran. - - if item.uri not in (NS_PUBSUB, NS_PUBSUB + "#event"): - log.error(_(u"Unsupported namespace {ns} in pubsub item {id}").format(ns=item.uri, id=item_id)) - defer.returnValue(exceptions.DataError()) - - try: - entry_elt = xpath(item_elt, 'entry')[0] - except IndexError: - log.error(_(u'No atom entry found in the pubsub item %s') % item_id) - defer.returnValue(exceptions.DataError()) - microblog_data = {} - for key in ['title', 'content']: # process the textual elements - for attr_elt in xpath(entry_elt, key): - # Return the inner content of a lxml.etree.Element. It is not - # trivial because the lxml tostring method would return the full - # content including elt's tag and attributes, and elt.getchildren() - # would skip a text value which is not within an element... - attr_content = self._getDomishInnerContent(ElementParser()(etree.tostring(attr_elt))) - if not attr_content.strip(): - continue # element with empty value - content_type = attr_elt.get('type', 'text').lower() - if content_type == 'xhtml': - # Check for XHTML namespace and decapsulate the content so the user - # who wants to modify an entry will see the text that he entered. Also - # this avoids successive encapsulation with a new <div>...</div> at - # each modification (encapsulation is done in self.data2entry) - elt = ElementParser()(attr_content) - if elt.uri != NS_XHTML: - raise exceptions.DataError(_('Content of type XHTML must declare its namespace!')) - text = self._getDomishInnerContent(elt) - microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(text) - else: - microblog_data[key] = attr_content - if key not in microblog_data and ('%s_xhtml' % key) in microblog_data: - microblog_data[key] = yield self._removeXHTMLMarkups(microblog_data['%s_xhtml' % key]) + def check_conflict(key): + if key in microblog_data: + raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) + return key + + @defer.inlineCallbacks + def parseElement(elem): + """Parse title/content elements and fill microblog_data accordingly""" + type_ = elem.getAttribute('type') + if type_ == 'xhtml': + data_elt = elem.firstChildElement() + if data_elt.uri != NS_XHTML: + raise failure.Failure(exceptions.DataError(_('Content of type XHTML must declare its namespace!'))) + key = check_conflict(u'{}_xhtml'.format(elem.name)) + data = unicode(data_elt) + microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(data) + else: + key = check_conflict(elem.name) + microblog_data[key] = unicode(elem) + + + id_ = item_elt.getAttribute('id', '') # there can be no id for transient nodes + microblog_data['id'] = id_ + if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT): + msg = u"Unsupported namespace {ns} in pubsub item {id_}".format(ns=item_elt.uri, id_=id_) + log.warning(msg) + raise failure.Failure(exceptions.DataError(msg)) + + try: + entry_elt = item_elt.elements(NS_ATOM, 'entry').next() + except StopIteration: + msg = u'No atom entry found in the pubsub item {}'.format(id_) + raise failure.Failure(exceptions.DataError(msg)) - try: # check for mandatory elements - microblog_data['id'] = xpath(entry_elt, 'id')[0].text - microblog_data['updated'] = date2float(entry_elt, 'updated') - assert('title' in microblog_data) # has been processed already - except IndexError: - log.error(_(u"Atom entry of pubsub item %s misses a required element") % item_id) - defer.returnValue(exceptions.DataError()) + try: + title_elt = entry_elt.elements(NS_ATOM, 'title').next() + except StopIteration: + msg = u'No atom title found in the pubsub item {}'.format(id_) + raise failure.Failure(exceptions.DataError(msg)) + + yield parseElement(title_elt) + + for content_elt in entry_elt.elements(NS_ATOM, 'content'): + yield parseElement(content_elt) - if 'content' not in microblog_data: # use the atom title data as the microblog body content + # we check that text content is present + for key in ('title', 'content'): + if key not in microblog_data and ('{}_xhtml'.format(key)) in microblog_data: + log.warning(u"item {id_} provide a {key}_xhtml data but not a text one".format(id_, key)) + # ... and do the conversion if it's not + microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].\ + convert(microblog_data['{}_xhtml'.format(key)], + self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, + self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, + False) + + try: + id_elt = entry_elt.elements(NS_ATOM, 'id').next() + except StopIteration: + msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_) + log.warning(msg) + microblog_data['atom_id'] = "" + else: + microblog_data['atom_id'] = unicode(id_elt) + + try: + updated_elt = entry_elt.elements(NS_ATOM, 'updated').next() + except StopIteration: + msg = u'No atom updated element found in the pubsub item {}'.format(id_) + raise failure.Failure(exceptions.DataError(msg)) + microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt))) + + if 'content' not in microblog_data: + # use the atom title data as the microblog body content microblog_data['content'] = microblog_data['title'] del microblog_data['title'] if 'title_xhtml' in microblog_data: microblog_data['content_xhtml'] = microblog_data['title_xhtml'] del microblog_data['title_xhtml'] - # recommended and optional elements with a fallback value try: - microblog_data['published'] = date2float(entry_elt, 'published') - except IndexError: + published_elt = entry_elt.elements(NS_ATOM, 'published').next() + except StopIteration: microblog_data['published'] = microblog_data['updated'] + else: + microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt))) - # other recommended and optional elements + + for link_elt in entry_elt.elements(NS_ATOM, 'link'): + if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments': + key = check_conflict('comments') + microblog_data[key] = link_elt['href'] + try: + service, node = self.parseCommentUrl(microblog_data[key]) + except: + log.warning(u"Can't parse url {}".format(microblog_data[key])) + del microblog_data[key] + else: + microblog_data['comments_service'] = service.full() + microblog_data['comments_node'] = node + else: + rel = link_elt.getAttribute('rel','') + title = link_elt.getAttribute('title','') + href = link_elt.getAttribute('href','') + log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href)) + try: - link_elt = xpath(entry_elt, "link")[0] + author_elt = entry_elt.elements(NS_ATOM, 'author').next() + except StopIteration: + log.warning("Can't find author element in item {}".format(id_)) + else: try: - assert(link_elt.attrib['title'] == "comments") - microblog_data['comments'] = link_elt.attrib['href'] - service, node = self.parseCommentUrl(microblog_data["comments"]) - microblog_data['comments_service'] = service.full() - microblog_data['comments_node'] = node - except (exceptions.DataError, RuntimeError, KeyError): - log.warning(_(u"Can't parse the link element of atom entry %s") % microblog_data['id']) - except: - pass - try: - microblog_data['author'] = xpath(entry_elt, 'author/name')[0].text - except IndexError: - try: # XXX: workaround for Jappix behaviour - microblog_data['author'] = xpath(entry_elt, 'author/nick')[0].text - except IndexError: - log.warning(_(u"Can't find author element in atom entry %s") % microblog_data['id']) + name_elt = author_elt.elements(NS_ATOM, 'name').next() + except StopIteration: + log.warning("No name element found in author element of item {}".format(id_)) + else: + microblog_data['author'] = unicode(name_elt) defer.returnValue(microblog_data) @@ -220,6 +235,7 @@ @param data: data dict as given by bridge method. @return: deferred which fire domish.Element """ + #TODO: rewrite this directly with twisted (i.e. without atom / reparsing) _uuid = unicode(uuid.uuid1()) _entry = atom.Entry() _entry.title = '' # reset the default value which is not empty @@ -240,7 +256,7 @@ elem = elems[key]((u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)).encode('utf-8')) elem.attrs['type'] = 'xhtml' if hasattr(_entry, '%s_xhtml' % key): - raise exceptions.DataError(_("Can't have xhtml and rich content at the same time")) + raise failure.Failure(exceptions.DataError(_("Can't have xhtml and rich content at the same time"))) setattr(_entry, '%s_xhtml' % key, elem) else: # raw text only needs to be escaped to get HTML-safe sequence elem = elems[key](escape(data[attr]).encode('utf-8')) @@ -286,34 +302,45 @@ @param data: must include content @param profile: profile which send the mood""" if 'content' not in data: - log.error(_("Microblog data must contain at least 'content' key")) - raise exceptions.DataError('no "content" key found') + log.error("Microblog data must contain at least 'content' key") + raise failure.Failure(exceptions.DataError('no "content" key found')) content = data['content'] if not content: - log.error(_("Microblog data's content value must not be empty")) - raise exceptions.DataError('empty content') + log.error("Microblog data's content value must not be empty") + raise failure.Failure(exceptions.DataError('empty content')) item = yield self.data2entry(data, profile) ret = yield self.host.plugins["XEP-0060"].publish(None, NS_MICROBLOG, [item], profile_key=profile) defer.returnValue(ret) ## get ## + def _getLastMicroblogs(self, pub_jid_s, max_items=10, profile_key=C.PROF_KEY_NONE): + return self.getLastMicroblogs(jid.JID(pub_jid_s), max_items, profile_key) + + @defer.inlineCallbacks def getLastMicroblogs(self, pub_jid, max_items=10, profile_key=C.PROF_KEY_NONE): """Get the last published microblogs - @param pub_jid: jid of the publisher + @param pub_jid(jid.JID): jid of the publisher @param max_items: how many microblogs we want to get @param profile_key: profile key - @return: a deferred couple with the list of items and RSM information. + @return: a deferred couple with the list of items and metadatas. """ - d = self.host.plugins["XEP-0060"].getItems(jid.JID(pub_jid), NS_MICROBLOG, max_items=max_items, profile_key=profile_key) - d.addCallback(lambda res: (defer.DeferredList(map(self.item2mbdata, res[0]), consumeErrors=True), res[1])) - d.addCallback(lambda res: ([value for (success, value) in res[0] if success], res[1])) - return d + items, metadata = yield self.host.plugins["XEP-0060"].getItems(pub_jid, NS_MICROBLOG, max_items=max_items, profile_key=profile_key) + dlist_result = yield defer.DeferredList(map(self.item2mbdata, items), consumeErrors=True) + items_data = [] + for success, value in dlist_result: + if success: + items_data.append(value) + else: + log.warning(u"Error while parsing microblog data: {}".format(value.value)) + defer.returnValue((items_data, metadata)) def parseCommentUrl(self, node_url): - """Determine the fields comments_service and comments_node of a microblog data + """Parse a XMPP URI + + Determine the fields comments_service and comments_node of a microblog data from the href attribute of an entry's link element. For example this input: xmpp:sat-pubsub.libervia.org?node=urn%3Axmpp%3Acomments%3A_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn%3Axmpp%3Agroupblog%3Asouliane%40libervia.org will return (JID(u'sat-pubsub.libervia.org'), 'urn:xmpp:comments:_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn:xmpp:groupblog:souliane@libervia.org') @@ -328,7 +355,7 @@ node = parsed_queries.get('node', [''])[0] if not node: - raise exceptions.DataError('Invalid comments link') + raise failure.Failure(exceptions.DataError('Invalid comments link')) return (service, node)