comparison src/plugins/plugin_xep_0277.py @ 832:c4b22aedb7d7

plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title: Implementation should follow the following formal specification: "title" and "content" data can be passed in raw, xhtml or rich format. When we receive from a frontend a new/updated microblog item: - keys "title" or "content" have to be escaped (disable HTML tags) - keys "title_rich" or "content_rich" have to be converted from the current syntax to XHTML - keys "title_xhtml" or "content_xhtml" have to be cleaned from unwanted XHTML content Rules to deal with concurrent keys: - existence of both "*_xhtml" and "*_rich" keys must raise an exception - existence of both raw and ("*_xhtml" or "*_rich") is OK As the storage always need raw data, if it is not given by the user it can be extracted from the "*_rich" or "*_xhtml" data (remove the XHTML tags). When a frontend wants to edit a blog post that contains XHTML title or content, the conversion is made from XHTML to the current user-defined syntax. - plugin text_syntaxes: added "text" syntax (using lxml)
author souliane <souliane@mailoo.org>
date Wed, 05 Feb 2014 16:36:51 +0100
parents 1fe00f0c9a91
children eff944ff3e13
comparison
equal deleted inserted replaced
831:d7f9cd8a08cd 832:c4b22aedb7d7
82 if not node: 82 if not node:
83 raise exceptions.DataError('Invalid comments link') 83 raise exceptions.DataError('Invalid comments link')
84 84
85 return (service, node) 85 return (service, node)
86 86
87 def __removeXHTMLMarkups(self, xhtml):
88 """
89 Remove XHTML markups from the given string.
90 @param xhtml: the XHTML string to be cleaned
91 @return: a Deferred instance for the cleaned string
92 """
93 return self.host.plugins["TEXT-SYNTAXES"].convert(xhtml,
94 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
95 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
96 False)
97
87 @defer.inlineCallbacks 98 @defer.inlineCallbacks
88 def item2mbdata(self, item): 99 def item2mbdata(self, item):
89 """Convert an XML Item to microblog data used in bridge API 100 """Convert an XML Item to microblog data used in bridge API
90 @param item: domish.Element of microblog item 101 @param item: domish.Element of microblog item
91 @return: microblog data (dictionary)""" 102 @return: microblog data (dictionary)"""
94 except IndexError: 105 except IndexError:
95 warning(_('No entry element in microblog item')) 106 warning(_('No entry element in microblog item'))
96 raise exceptions.DataError('no entry found') 107 raise exceptions.DataError('no entry found')
97 _entry = atom.Entry().import_xml(entry_elt.toXml().encode('utf-8')) 108 _entry = atom.Entry().import_xml(entry_elt.toXml().encode('utf-8'))
98 microblog_data = {} 109 microblog_data = {}
110
111 for key in ['title', 'content']:
112 for type_ in ['', 'xhtml']:
113 try:
114 attr = getattr(_entry, "%s_%s" % (key, type_) if type_ else key)
115 except AttributeError:
116 continue
117 if not attr.text:
118 continue
119 try:
120 content_type = attr.attrs['type'].lower()
121 except KeyError:
122 content_type = 'text'
123 if content_type == 'xhtml':
124 # TODO: proper check of body namespace
125 microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(attr.text)
126 else:
127 microblog_data[key] = attr.text
128 if key not in microblog_data and ('%s_xhtml' % key) in microblog_data:
129 microblog_data[key] = yield self.__removeXHTMLMarkups(microblog_data['%s_xhtml' % key])
130 if 'title' not in microblog_data:
131 raise exceptions.DataError(_("Atom entry misses a title element"))
132 if 'content' not in microblog_data:
133 microblog_data['content'] = microblog_data['title']
134 del microblog_data['title']
135 if 'title_xhtml' in microblog_data:
136 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
137 del microblog_data['title_xhtml']
138
99 try: 139 try:
100 try:
101 content_type =_entry.title.attrs['type'].lower()
102 except KeyError:
103 content_type = 'text'
104 if content_type == 'xhtml':
105 # TODO: proper check of body namespace
106 microblog_data['xhtml'] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(_entry.title.text)
107 microblog_data['content'] = _entry.title.text # FIXME: must use text version of the microblog, or convert XHTML to text if not available
108 else:
109 microblog_data['content'] = _entry.title.text
110 if len(_entry.authors): 140 if len(_entry.authors):
111 microblog_data['author'] = _entry.authors[0].name.text 141 microblog_data['author'] = _entry.authors[0].name.text
112 microblog_data['updated'] = str(int(_entry.updated.tf)) 142 microblog_data['updated'] = str(int(_entry.updated.tf))
113 try: 143 try:
114 microblog_data['published'] = str(int(_entry.published.tf)) 144 microblog_data['published'] = str(int(_entry.published.tf))
124 microblog_data['comments_node'] = node 154 microblog_data['comments_node'] = node
125 break 155 break
126 except (KeyError, exceptions.DataError, RuntimeError): 156 except (KeyError, exceptions.DataError, RuntimeError):
127 warning("Can't parse link") 157 warning("Can't parse link")
128 continue 158 continue
129
130 except (AttributeError, KeyError): 159 except (AttributeError, KeyError):
131 error(_('Error while parsing atom entry for microblogging event')) 160 error(_('Error while parsing atom entry for microblogging event'))
132 raise exceptions.DataError 161 raise exceptions.DataError
133 162
134 ##XXX: workaround for Jappix behaviour 163 ##XXX: workaround for Jappix behaviour
158 return d 187 return d
159 188
160 @defer.inlineCallbacks 189 @defer.inlineCallbacks
161 def data2entry(self, data, profile): 190 def data2entry(self, data, profile):
162 """Convert a data dict to en entry usable to create an item 191 """Convert a data dict to en entry usable to create an item
163 @param data: data dict as given by bridge method 192 @param data: data dict as given by bridge method.
164 @return: deferred which fire domish.Element""" 193 @return: deferred which fire domish.Element"""
165 _uuid = unicode(uuid.uuid1()) 194 _uuid = unicode(uuid.uuid1())
166 _entry = atom.Entry() 195 _entry = atom.Entry()
167 196 _entry.title = '' # reset the default value which is not empty
168 if "rich" in data: 197
169 synt = self.host.plugins["TEXT-SYNTAXES"] 198 elems = {'title': atom.Title, 'content': atom.Content}
170 converted = yield synt.convert(data['rich'], synt.getCurrentSyntax(profile), "XHTML") 199 synt = self.host.plugins["TEXT-SYNTAXES"]
171 content = u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted) 200
172 _entry.title.attrs['type'] = 'xhtml' 201 # loop on ('title', 'title_rich', 'title_xhtml', 'content', 'content_rich', 'content_xhtml')
173 else: 202 for key in elems.keys():
174 content = escape(data['content']) 203 for type_ in ['', 'rich', 'xhtml']:
175 _entry.title.attrs['type'] = 'text' 204 attr = "%s_%s" % (key, type_) if type_ else key
176 _entry.title = unicode(content).encode('utf-8') 205 if attr in data:
206 if type_:
207 if type_ == 'rich': # convert input from current syntax to XHTML
208 converted = yield synt.convert(data[attr], synt.getCurrentSyntax(profile), "XHTML")
209 else: # clean the XHTML input
210 converted = yield synt.clean_xhtml(data[attr])
211 elem = elems[key](u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted))
212 elem.attrs['type'] = 'xhtml'
213 if hasattr(_entry, '%s_xhtml' % key):
214 raise exceptions.DataError(_("Can't have xhtml and rich content at the same time"))
215 setattr(_entry, '%s_xhtml' % key, elem)
216 else: # raw text only needs to be escaped to get HTML-safe sequence
217 elem = elems[key](escape(data[attr]))
218 elem.attrs['type'] = 'text'
219 setattr(_entry, key, elem)
220 if not getattr(_entry, key).text:
221 if hasattr(_entry, '%s_xhtml' % key):
222 text = yield self.__removeXHTMLMarkups(getattr(_entry, '%s_xhtml' % key).text)
223 setattr(_entry, key, text)
224 if not _entry.title.text: # eventually move the data from content to title
225 _entry.title = _entry.content.text
226 _entry.title.attrs['type'] = _entry.content.attrs['type']
227 _entry.content.text = ''
228 _entry.content.attrs['type'] = ''
229 if hasattr(_entry, 'content_xhtml'):
230 _entry.title_xhtml = atom.Title(_entry.content_xhtml.text)
231 _entry.title_xhtml.attrs['type'] = _entry.content_xhtml.attrs['type']
232 _entry.content_xhtml.text = ''
233 _entry.content_xhtml.attrs['type'] = ''
177 234
178 _entry.author = atom.Author() 235 _entry.author = atom.Author()
179 _entry.author.name = data.get('author', self.host.getJidNStream(profile)[0].userhost()).encode('utf-8') 236 _entry.author.name = data.get('author', self.host.getJidNStream(profile)[0].userhost()).encode('utf-8')
180 _entry.updated = float(data.get('updated', time())) 237 _entry.updated = float(data.get('updated', time()))
181 _entry.published = float(data.get('published', time())) 238 _entry.published = float(data.get('published', time()))