Mercurial > libervia-backend
changeset 1453:d5e72362ee91
plugin XEP-0277: better parsing of atom:author element + item2mbdata minor reorganisation for better readability
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 15 Aug 2015 22:22:36 +0200 |
parents | 5116d70ddd1c |
children | 4e2fab4de195 |
files | src/plugins/plugin_xep_0277.py |
diffstat | 1 files changed, 66 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/plugin_xep_0277.py Sat Aug 15 22:22:34 2015 +0200 +++ b/src/plugins/plugin_xep_0277.py Sat Aug 15 22:22:36 2015 +0200 @@ -115,9 +115,26 @@ """ microblog_data = {} - def check_conflict(key): + def check_conflict(key, increment=False): + """Check if key is already in microblog data + + @param key(unicode): key to check + @param increment(bool): if suffix the key with an increment + instead of raising an exception + @raise exceptions.DataError: the key already exists + (not raised if increment is True) + """ if key in microblog_data: - raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) + if not increment: + raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) + else: + idx=1 # the idx 0 is the key without suffix + fmt = "{}#{}" + new_key = fmt.format(key, idx) + while new_key in microblog_data: + idx+=1 + new_key = fmt.format(key, idx) + key = new_key return key @defer.inlineCallbacks @@ -149,6 +166,17 @@ msg = u'No atom entry found in the pubsub item {}'.format(id_) raise failure.Failure(exceptions.DataError(msg)) + # atom:id + try: + id_elt = entry_elt.elements(NS_ATOM, 'id').next() + except StopIteration: + msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_) + log.warning(msg) + microblog_data['atom_id'] = "" + else: + microblog_data['atom_id'] = unicode(id_elt) + + # title/content(s) try: title_elt = entry_elt.elements(NS_ATOM, 'title').next() except StopIteration: @@ -171,22 +199,6 @@ self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, False) - try: - id_elt = entry_elt.elements(NS_ATOM, 'id').next() - except StopIteration: - msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_) - log.warning(msg) - microblog_data['atom_id'] = "" - else: - microblog_data['atom_id'] = unicode(id_elt) - - try: - updated_elt = entry_elt.elements(NS_ATOM, 'updated').next() - except StopIteration: - msg = u'No atom updated element found in the pubsub item {}'.format(id_) - raise failure.Failure(exceptions.DataError(msg)) - microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt))) - if 'content' not in microblog_data: # use the atom title data as the microblog body content microblog_data['content'] = microblog_data['title'] @@ -195,6 +207,13 @@ microblog_data['content_xhtml'] = microblog_data['title_xhtml'] del microblog_data['title_xhtml'] + # published/updated dates + try: + updated_elt = entry_elt.elements(NS_ATOM, 'updated').next() + except StopIteration: + msg = u'No atom updated element found in the pubsub item {}'.format(id_) + raise failure.Failure(exceptions.DataError(msg)) + microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt))) try: published_elt = entry_elt.elements(NS_ATOM, 'published').next() except StopIteration: @@ -202,10 +221,10 @@ else: microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt))) - + # links for link_elt in entry_elt.elements(NS_ATOM, 'link'): if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments': - key = check_conflict('comments') + key = check_conflict('comments', True) microblog_data[key] = link_elt['href'] try: service, node = self.parseCommentUrl(microblog_data[key]) @@ -213,25 +232,49 @@ log.warning(u"Can't parse url {}".format(microblog_data[key])) del microblog_data[key] else: - microblog_data['comments_service'] = service.full() - microblog_data['comments_node'] = node + microblog_data['{}_service'.format(key)] = service.full() + microblog_data['{}_node'.format(key)] = node else: rel = link_elt.getAttribute('rel','') title = link_elt.getAttribute('title','') href = link_elt.getAttribute('href','') log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href)) + # author try: author_elt = entry_elt.elements(NS_ATOM, 'author').next() except StopIteration: - log.warning("Can't find author element in item {}".format(id_)) + log.debug("Can't find author element in item {}".format(id_)) else: + # name try: name_elt = author_elt.elements(NS_ATOM, 'name').next() except StopIteration: log.warning("No name element found in author element of item {}".format(id_)) else: microblog_data['author'] = unicode(name_elt) + # uri + try: + uri_elt = author_elt.elements(NS_ATOM, 'uri').next() + except StopIteration: + log.debug("No uri element found in author element of item {}".format(id_)) + else: + uri = unicode(uri_elt) + if uri.startswith("xmpp:"): + uri = uri[5:] + microblog_data['author_uri'] = uri + if item_elt.getAttribute("publisher") == uri: + microblog_data['author_uri_verified'] = C.BOOL_TRUE + else: + log.warning("item atom:uri differ from publisher attribute, spoofing attempt ? atom:uri = {} publisher = {}".format(uri, item_elt.getAttribute("publisher"))) + microblog_data['author_uri_verified'] = C.BOOL_FALSE + # email + try: + email_elt = author_elt.elements(NS_ATOM, 'email').next() + except StopIteration: + pass + else: + microblog_data['author_email'] = unicode(email_elt) defer.returnValue(microblog_data)