Mercurial > libervia-backend
diff src/plugins/plugin_xep_0277.py @ 832:c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
Implementation should follow the following formal specification:
"title" and "content" data can be passed in raw, xhtml or rich format.
When we receive from a frontend a new/updated microblog item:
- keys "title" or "content" have to be escaped (disable HTML tags)
- keys "title_rich" or "content_rich" have to be converted from the current syntax to XHTML
- keys "title_xhtml" or "content_xhtml" have to be cleaned from unwanted XHTML content
Rules to deal with concurrent keys:
- existence of both "*_xhtml" and "*_rich" keys must raise an exception
- existence of both raw and ("*_xhtml" or "*_rich") is OK
As the storage always need raw data, if it is not given by the user it can be
extracted from the "*_rich" or "*_xhtml" data (remove the XHTML tags).
When a frontend wants to edit a blog post that contains XHTML title or content,
the conversion is made from XHTML to the current user-defined syntax.
- plugin text_syntaxes: added "text" syntax (using lxml)
author | souliane <souliane@mailoo.org> |
---|---|
date | Wed, 05 Feb 2014 16:36:51 +0100 |
parents | 1fe00f0c9a91 |
children | eff944ff3e13 |
line wrap: on
line diff
--- a/src/plugins/plugin_xep_0277.py Wed Jan 22 17:10:28 2014 +0100 +++ b/src/plugins/plugin_xep_0277.py Wed Feb 05 16:36:51 2014 +0100 @@ -84,6 +84,17 @@ return (service, node) + def __removeXHTMLMarkups(self, xhtml): + """ + Remove XHTML markups from the given string. + @param xhtml: the XHTML string to be cleaned + @return: a Deferred instance for the cleaned string + """ + return self.host.plugins["TEXT-SYNTAXES"].convert(xhtml, + self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, + self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, + False) + @defer.inlineCallbacks def item2mbdata(self, item): """Convert an XML Item to microblog data used in bridge API @@ -96,17 +107,36 @@ raise exceptions.DataError('no entry found') _entry = atom.Entry().import_xml(entry_elt.toXml().encode('utf-8')) microblog_data = {} + + for key in ['title', 'content']: + for type_ in ['', 'xhtml']: + try: + attr = getattr(_entry, "%s_%s" % (key, type_) if type_ else key) + except AttributeError: + continue + if not attr.text: + continue + try: + content_type = attr.attrs['type'].lower() + except KeyError: + content_type = 'text' + if content_type == 'xhtml': + # TODO: proper check of body namespace + microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(attr.text) + else: + microblog_data[key] = attr.text + if key not in microblog_data and ('%s_xhtml' % key) in microblog_data: + microblog_data[key] = yield self.__removeXHTMLMarkups(microblog_data['%s_xhtml' % key]) + if 'title' not in microblog_data: + raise exceptions.DataError(_("Atom entry misses a title element")) + if 'content' not in microblog_data: + microblog_data['content'] = microblog_data['title'] + del microblog_data['title'] + if 'title_xhtml' in microblog_data: + microblog_data['content_xhtml'] = microblog_data['title_xhtml'] + del microblog_data['title_xhtml'] + try: - try: - content_type =_entry.title.attrs['type'].lower() - except KeyError: - content_type = 'text' - if content_type == 'xhtml': - # TODO: proper check of body namespace - microblog_data['xhtml'] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(_entry.title.text) - microblog_data['content'] = _entry.title.text # FIXME: must use text version of the microblog, or convert XHTML to text if not available - else: - microblog_data['content'] = _entry.title.text if len(_entry.authors): microblog_data['author'] = _entry.authors[0].name.text microblog_data['updated'] = str(int(_entry.updated.tf)) @@ -126,7 +156,6 @@ except (KeyError, exceptions.DataError, RuntimeError): warning("Can't parse link") continue - except (AttributeError, KeyError): error(_('Error while parsing atom entry for microblogging event')) raise exceptions.DataError @@ -160,20 +189,48 @@ @defer.inlineCallbacks def data2entry(self, data, profile): """Convert a data dict to en entry usable to create an item - @param data: data dict as given by bridge method + @param data: data dict as given by bridge method. @return: deferred which fire domish.Element""" _uuid = unicode(uuid.uuid1()) _entry = atom.Entry() + _entry.title = '' # reset the default value which is not empty - if "rich" in data: - synt = self.host.plugins["TEXT-SYNTAXES"] - converted = yield synt.convert(data['rich'], synt.getCurrentSyntax(profile), "XHTML") - content = u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted) - _entry.title.attrs['type'] = 'xhtml' - else: - content = escape(data['content']) - _entry.title.attrs['type'] = 'text' - _entry.title = unicode(content).encode('utf-8') + elems = {'title': atom.Title, 'content': atom.Content} + synt = self.host.plugins["TEXT-SYNTAXES"] + + # loop on ('title', 'title_rich', 'title_xhtml', 'content', 'content_rich', 'content_xhtml') + for key in elems.keys(): + for type_ in ['', 'rich', 'xhtml']: + attr = "%s_%s" % (key, type_) if type_ else key + if attr in data: + if type_: + if type_ == 'rich': # convert input from current syntax to XHTML + converted = yield synt.convert(data[attr], synt.getCurrentSyntax(profile), "XHTML") + else: # clean the XHTML input + converted = yield synt.clean_xhtml(data[attr]) + elem = elems[key](u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)) + elem.attrs['type'] = 'xhtml' + if hasattr(_entry, '%s_xhtml' % key): + raise exceptions.DataError(_("Can't have xhtml and rich content at the same time")) + setattr(_entry, '%s_xhtml' % key, elem) + else: # raw text only needs to be escaped to get HTML-safe sequence + elem = elems[key](escape(data[attr])) + elem.attrs['type'] = 'text' + setattr(_entry, key, elem) + if not getattr(_entry, key).text: + if hasattr(_entry, '%s_xhtml' % key): + text = yield self.__removeXHTMLMarkups(getattr(_entry, '%s_xhtml' % key).text) + setattr(_entry, key, text) + if not _entry.title.text: # eventually move the data from content to title + _entry.title = _entry.content.text + _entry.title.attrs['type'] = _entry.content.attrs['type'] + _entry.content.text = '' + _entry.content.attrs['type'] = '' + if hasattr(_entry, 'content_xhtml'): + _entry.title_xhtml = atom.Title(_entry.content_xhtml.text) + _entry.title_xhtml.attrs['type'] = _entry.content_xhtml.attrs['type'] + _entry.content_xhtml.text = '' + _entry.content_xhtml.attrs['type'] = '' _entry.author = atom.Author() _entry.author.name = data.get('author', self.host.getJidNStream(profile)[0].userhost()).encode('utf-8')