comparison src/plugins/plugin_xep_0277.py @ 1453:d5e72362ee91

plugin XEP-0277: better parsing of atom:author element + item2mbdata minor reorganisation for better readability
author Goffi <goffi@goffi.org>
date Sat, 15 Aug 2015 22:22:36 +0200
parents 5116d70ddd1c
children 4e2fab4de195
comparison
equal deleted inserted replaced
1452:5116d70ddd1c 1453:d5e72362ee91
113 @param item_elt: domish.Element of microblog item 113 @param item_elt: domish.Element of microblog item
114 @return: microblog data (dictionary) 114 @return: microblog data (dictionary)
115 """ 115 """
116 microblog_data = {} 116 microblog_data = {}
117 117
118 def check_conflict(key): 118 def check_conflict(key, increment=False):
119 """Check if key is already in microblog data
120
121 @param key(unicode): key to check
122 @param increment(bool): if suffix the key with an increment
123 instead of raising an exception
124 @raise exceptions.DataError: the key already exists
125 (not raised if increment is True)
126 """
119 if key in microblog_data: 127 if key in microblog_data:
120 raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) 128 if not increment:
129 raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
130 else:
131 idx=1 # the idx 0 is the key without suffix
132 fmt = "{}#{}"
133 new_key = fmt.format(key, idx)
134 while new_key in microblog_data:
135 idx+=1
136 new_key = fmt.format(key, idx)
137 key = new_key
121 return key 138 return key
122 139
123 @defer.inlineCallbacks 140 @defer.inlineCallbacks
124 def parseElement(elem): 141 def parseElement(elem):
125 """Parse title/content elements and fill microblog_data accordingly""" 142 """Parse title/content elements and fill microblog_data accordingly"""
147 entry_elt = item_elt.elements(NS_ATOM, 'entry').next() 164 entry_elt = item_elt.elements(NS_ATOM, 'entry').next()
148 except StopIteration: 165 except StopIteration:
149 msg = u'No atom entry found in the pubsub item {}'.format(id_) 166 msg = u'No atom entry found in the pubsub item {}'.format(id_)
150 raise failure.Failure(exceptions.DataError(msg)) 167 raise failure.Failure(exceptions.DataError(msg))
151 168
169 # atom:id
170 try:
171 id_elt = entry_elt.elements(NS_ATOM, 'id').next()
172 except StopIteration:
173 msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
174 log.warning(msg)
175 microblog_data['atom_id'] = ""
176 else:
177 microblog_data['atom_id'] = unicode(id_elt)
178
179 # title/content(s)
152 try: 180 try:
153 title_elt = entry_elt.elements(NS_ATOM, 'title').next() 181 title_elt = entry_elt.elements(NS_ATOM, 'title').next()
154 except StopIteration: 182 except StopIteration:
155 msg = u'No atom title found in the pubsub item {}'.format(id_) 183 msg = u'No atom title found in the pubsub item {}'.format(id_)
156 raise failure.Failure(exceptions.DataError(msg)) 184 raise failure.Failure(exceptions.DataError(msg))
169 convert(microblog_data['{}_xhtml'.format(key)], 197 convert(microblog_data['{}_xhtml'.format(key)],
170 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, 198 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
171 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, 199 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
172 False) 200 False)
173 201
174 try:
175 id_elt = entry_elt.elements(NS_ATOM, 'id').next()
176 except StopIteration:
177 msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
178 log.warning(msg)
179 microblog_data['atom_id'] = ""
180 else:
181 microblog_data['atom_id'] = unicode(id_elt)
182
183 try:
184 updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
185 except StopIteration:
186 msg = u'No atom updated element found in the pubsub item {}'.format(id_)
187 raise failure.Failure(exceptions.DataError(msg))
188 microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
189
190 if 'content' not in microblog_data: 202 if 'content' not in microblog_data:
191 # use the atom title data as the microblog body content 203 # use the atom title data as the microblog body content
192 microblog_data['content'] = microblog_data['title'] 204 microblog_data['content'] = microblog_data['title']
193 del microblog_data['title'] 205 del microblog_data['title']
194 if 'title_xhtml' in microblog_data: 206 if 'title_xhtml' in microblog_data:
195 microblog_data['content_xhtml'] = microblog_data['title_xhtml'] 207 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
196 del microblog_data['title_xhtml'] 208 del microblog_data['title_xhtml']
197 209
210 # published/updated dates
211 try:
212 updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
213 except StopIteration:
214 msg = u'No atom updated element found in the pubsub item {}'.format(id_)
215 raise failure.Failure(exceptions.DataError(msg))
216 microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
198 try: 217 try:
199 published_elt = entry_elt.elements(NS_ATOM, 'published').next() 218 published_elt = entry_elt.elements(NS_ATOM, 'published').next()
200 except StopIteration: 219 except StopIteration:
201 microblog_data['published'] = microblog_data['updated'] 220 microblog_data['published'] = microblog_data['updated']
202 else: 221 else:
203 microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt))) 222 microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt)))
204 223
205 224 # links
206 for link_elt in entry_elt.elements(NS_ATOM, 'link'): 225 for link_elt in entry_elt.elements(NS_ATOM, 'link'):
207 if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments': 226 if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments':
208 key = check_conflict('comments') 227 key = check_conflict('comments', True)
209 microblog_data[key] = link_elt['href'] 228 microblog_data[key] = link_elt['href']
210 try: 229 try:
211 service, node = self.parseCommentUrl(microblog_data[key]) 230 service, node = self.parseCommentUrl(microblog_data[key])
212 except: 231 except:
213 log.warning(u"Can't parse url {}".format(microblog_data[key])) 232 log.warning(u"Can't parse url {}".format(microblog_data[key]))
214 del microblog_data[key] 233 del microblog_data[key]
215 else: 234 else:
216 microblog_data['comments_service'] = service.full() 235 microblog_data['{}_service'.format(key)] = service.full()
217 microblog_data['comments_node'] = node 236 microblog_data['{}_node'.format(key)] = node
218 else: 237 else:
219 rel = link_elt.getAttribute('rel','') 238 rel = link_elt.getAttribute('rel','')
220 title = link_elt.getAttribute('title','') 239 title = link_elt.getAttribute('title','')
221 href = link_elt.getAttribute('href','') 240 href = link_elt.getAttribute('href','')
222 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href)) 241 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href))
223 242
243 # author
224 try: 244 try:
225 author_elt = entry_elt.elements(NS_ATOM, 'author').next() 245 author_elt = entry_elt.elements(NS_ATOM, 'author').next()
226 except StopIteration: 246 except StopIteration:
227 log.warning("Can't find author element in item {}".format(id_)) 247 log.debug("Can't find author element in item {}".format(id_))
228 else: 248 else:
249 # name
229 try: 250 try:
230 name_elt = author_elt.elements(NS_ATOM, 'name').next() 251 name_elt = author_elt.elements(NS_ATOM, 'name').next()
231 except StopIteration: 252 except StopIteration:
232 log.warning("No name element found in author element of item {}".format(id_)) 253 log.warning("No name element found in author element of item {}".format(id_))
233 else: 254 else:
234 microblog_data['author'] = unicode(name_elt) 255 microblog_data['author'] = unicode(name_elt)
256 # uri
257 try:
258 uri_elt = author_elt.elements(NS_ATOM, 'uri').next()
259 except StopIteration:
260 log.debug("No uri element found in author element of item {}".format(id_))
261 else:
262 uri = unicode(uri_elt)
263 if uri.startswith("xmpp:"):
264 uri = uri[5:]
265 microblog_data['author_uri'] = uri
266 if item_elt.getAttribute("publisher") == uri:
267 microblog_data['author_uri_verified'] = C.BOOL_TRUE
268 else:
269 log.warning("item atom:uri differ from publisher attribute, spoofing attempt ? atom:uri = {} publisher = {}".format(uri, item_elt.getAttribute("publisher")))
270 microblog_data['author_uri_verified'] = C.BOOL_FALSE
271 # email
272 try:
273 email_elt = author_elt.elements(NS_ATOM, 'email').next()
274 except StopIteration:
275 pass
276 else:
277 microblog_data['author_email'] = unicode(email_elt)
235 278
236 defer.returnValue(microblog_data) 279 defer.returnValue(microblog_data)
237 280
238 @defer.inlineCallbacks 281 @defer.inlineCallbacks
239 def data2entry(self, data, profile): 282 def data2entry(self, data, profile):