diff src/plugins/plugin_xep_0277.py @ 832:c4b22aedb7d7

plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title: Implementation should follow the following formal specification: "title" and "content" data can be passed in raw, xhtml or rich format. When we receive from a frontend a new/updated microblog item: - keys "title" or "content" have to be escaped (disable HTML tags) - keys "title_rich" or "content_rich" have to be converted from the current syntax to XHTML - keys "title_xhtml" or "content_xhtml" have to be cleaned from unwanted XHTML content Rules to deal with concurrent keys: - existence of both "*_xhtml" and "*_rich" keys must raise an exception - existence of both raw and ("*_xhtml" or "*_rich") is OK As the storage always need raw data, if it is not given by the user it can be extracted from the "*_rich" or "*_xhtml" data (remove the XHTML tags). When a frontend wants to edit a blog post that contains XHTML title or content, the conversion is made from XHTML to the current user-defined syntax. - plugin text_syntaxes: added "text" syntax (using lxml)
author souliane <souliane@mailoo.org>
date Wed, 05 Feb 2014 16:36:51 +0100
parents 1fe00f0c9a91
children eff944ff3e13
line wrap: on
line diff
--- a/src/plugins/plugin_xep_0277.py	Wed Jan 22 17:10:28 2014 +0100
+++ b/src/plugins/plugin_xep_0277.py	Wed Feb 05 16:36:51 2014 +0100
@@ -84,6 +84,17 @@
 
         return (service, node)
 
+    def __removeXHTMLMarkups(self, xhtml):
+        """
+        Remove XHTML markups from the given string.
+        @param xhtml: the XHTML string to be cleaned
+        @return: a Deferred instance for the cleaned string
+        """
+        return self.host.plugins["TEXT-SYNTAXES"].convert(xhtml,
+                                                          self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
+                                                          self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
+                                                          False)
+
     @defer.inlineCallbacks
     def item2mbdata(self, item):
         """Convert an XML Item to microblog data used in bridge API
@@ -96,17 +107,36 @@
             raise exceptions.DataError('no entry found')
         _entry = atom.Entry().import_xml(entry_elt.toXml().encode('utf-8'))
         microblog_data = {}
+
+        for key in ['title', 'content']:
+            for type_ in ['', 'xhtml']:
+                try:
+                    attr = getattr(_entry, "%s_%s" % (key, type_) if type_ else key)
+                except AttributeError:
+                    continue
+                if not attr.text:
+                    continue
+                try:
+                    content_type = attr.attrs['type'].lower()
+                except KeyError:
+                    content_type = 'text'
+                if content_type == 'xhtml':
+                    # TODO: proper check of body namespace
+                    microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(attr.text)
+                else:
+                    microblog_data[key] = attr.text
+            if key not in microblog_data and ('%s_xhtml' % key) in microblog_data:
+                microblog_data[key] = yield self.__removeXHTMLMarkups(microblog_data['%s_xhtml' % key])
+        if 'title' not in microblog_data:
+            raise exceptions.DataError(_("Atom entry misses a title element"))
+        if 'content' not in microblog_data:
+            microblog_data['content'] = microblog_data['title']
+            del microblog_data['title']
+            if 'title_xhtml' in microblog_data:
+                microblog_data['content_xhtml'] = microblog_data['title_xhtml']
+                del microblog_data['title_xhtml']
+
         try:
-            try:
-                content_type =_entry.title.attrs['type'].lower()
-            except KeyError:
-                content_type = 'text'
-            if content_type == 'xhtml':
-                # TODO: proper check of body namespace
-                microblog_data['xhtml'] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(_entry.title.text)
-                microblog_data['content'] = _entry.title.text # FIXME: must use text version of the microblog, or convert XHTML to text if not available
-            else:
-                microblog_data['content'] = _entry.title.text
             if len(_entry.authors):
                 microblog_data['author'] = _entry.authors[0].name.text
             microblog_data['updated'] = str(int(_entry.updated.tf))
@@ -126,7 +156,6 @@
                 except (KeyError, exceptions.DataError, RuntimeError):
                     warning("Can't parse link")
                     continue
-
         except (AttributeError, KeyError):
             error(_('Error while parsing atom entry for microblogging event'))
             raise exceptions.DataError
@@ -160,20 +189,48 @@
     @defer.inlineCallbacks
     def data2entry(self, data, profile):
         """Convert a data dict to en entry usable to create an item
-        @param data: data dict as given by bridge method
+        @param data: data dict as given by bridge method.
         @return: deferred which fire domish.Element"""
         _uuid = unicode(uuid.uuid1())
         _entry = atom.Entry()
+        _entry.title = ''  # reset the default value which is not empty
 
-        if "rich" in data:
-            synt = self.host.plugins["TEXT-SYNTAXES"]
-            converted = yield synt.convert(data['rich'], synt.getCurrentSyntax(profile), "XHTML")
-            content = u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)
-            _entry.title.attrs['type'] = 'xhtml'
-        else:
-            content = escape(data['content'])
-            _entry.title.attrs['type'] = 'text'
-        _entry.title = unicode(content).encode('utf-8')
+        elems = {'title': atom.Title, 'content': atom.Content}
+        synt = self.host.plugins["TEXT-SYNTAXES"]
+
+        # loop on ('title', 'title_rich', 'title_xhtml', 'content', 'content_rich', 'content_xhtml')
+        for key in elems.keys():
+            for type_ in ['', 'rich', 'xhtml']:
+                attr = "%s_%s" % (key, type_) if type_ else key
+                if attr in data:
+                    if type_:
+                        if type_ == 'rich':  # convert input from current syntax to XHTML
+                            converted = yield synt.convert(data[attr], synt.getCurrentSyntax(profile), "XHTML")
+                        else:  # clean the XHTML input
+                            converted = yield synt.clean_xhtml(data[attr])
+                        elem = elems[key](u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted))
+                        elem.attrs['type'] = 'xhtml'
+                        if hasattr(_entry, '%s_xhtml' % key):
+                            raise exceptions.DataError(_("Can't have xhtml and rich content at the same time"))
+                        setattr(_entry, '%s_xhtml' % key, elem)
+                    else:  # raw text only needs to be escaped to get HTML-safe sequence
+                        elem = elems[key](escape(data[attr]))
+                        elem.attrs['type'] = 'text'
+                        setattr(_entry, key, elem)
+            if not getattr(_entry, key).text:
+                if hasattr(_entry, '%s_xhtml' % key):
+                    text = yield self.__removeXHTMLMarkups(getattr(_entry, '%s_xhtml' % key).text)
+                    setattr(_entry, key, text)
+        if not _entry.title.text:  # eventually move the data from content to title
+            _entry.title = _entry.content.text
+            _entry.title.attrs['type'] = _entry.content.attrs['type']
+            _entry.content.text = ''
+            _entry.content.attrs['type'] = ''
+            if hasattr(_entry, 'content_xhtml'):
+                _entry.title_xhtml = atom.Title(_entry.content_xhtml.text)
+                _entry.title_xhtml.attrs['type'] = _entry.content_xhtml.attrs['type']
+                _entry.content_xhtml.text = ''
+                _entry.content_xhtml.attrs['type'] = ''
 
         _entry.author = atom.Author()
         _entry.author.name = data.get('author', self.host.getJidNStream(profile)[0].userhost()).encode('utf-8')