Mercurial > libervia-backend
comparison src/plugins/plugin_xep_0277.py @ 1453:d5e72362ee91
plugin XEP-0277: better parsing of atom:author element + item2mbdata minor reorganisation for better readability
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 15 Aug 2015 22:22:36 +0200 |
parents | 5116d70ddd1c |
children | 4e2fab4de195 |
comparison
equal
deleted
inserted
replaced
1452:5116d70ddd1c | 1453:d5e72362ee91 |
---|---|
113 @param item_elt: domish.Element of microblog item | 113 @param item_elt: domish.Element of microblog item |
114 @return: microblog data (dictionary) | 114 @return: microblog data (dictionary) |
115 """ | 115 """ |
116 microblog_data = {} | 116 microblog_data = {} |
117 | 117 |
118 def check_conflict(key): | 118 def check_conflict(key, increment=False): |
119 """Check if key is already in microblog data | |
120 | |
121 @param key(unicode): key to check | |
122 @param increment(bool): if suffix the key with an increment | |
123 instead of raising an exception | |
124 @raise exceptions.DataError: the key already exists | |
125 (not raised if increment is True) | |
126 """ | |
119 if key in microblog_data: | 127 if key in microblog_data: |
120 raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) | 128 if not increment: |
129 raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id'])) | |
130 else: | |
131 idx=1 # the idx 0 is the key without suffix | |
132 fmt = "{}#{}" | |
133 new_key = fmt.format(key, idx) | |
134 while new_key in microblog_data: | |
135 idx+=1 | |
136 new_key = fmt.format(key, idx) | |
137 key = new_key | |
121 return key | 138 return key |
122 | 139 |
123 @defer.inlineCallbacks | 140 @defer.inlineCallbacks |
124 def parseElement(elem): | 141 def parseElement(elem): |
125 """Parse title/content elements and fill microblog_data accordingly""" | 142 """Parse title/content elements and fill microblog_data accordingly""" |
147 entry_elt = item_elt.elements(NS_ATOM, 'entry').next() | 164 entry_elt = item_elt.elements(NS_ATOM, 'entry').next() |
148 except StopIteration: | 165 except StopIteration: |
149 msg = u'No atom entry found in the pubsub item {}'.format(id_) | 166 msg = u'No atom entry found in the pubsub item {}'.format(id_) |
150 raise failure.Failure(exceptions.DataError(msg)) | 167 raise failure.Failure(exceptions.DataError(msg)) |
151 | 168 |
169 # atom:id | |
170 try: | |
171 id_elt = entry_elt.elements(NS_ATOM, 'id').next() | |
172 except StopIteration: | |
173 msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_) | |
174 log.warning(msg) | |
175 microblog_data['atom_id'] = "" | |
176 else: | |
177 microblog_data['atom_id'] = unicode(id_elt) | |
178 | |
179 # title/content(s) | |
152 try: | 180 try: |
153 title_elt = entry_elt.elements(NS_ATOM, 'title').next() | 181 title_elt = entry_elt.elements(NS_ATOM, 'title').next() |
154 except StopIteration: | 182 except StopIteration: |
155 msg = u'No atom title found in the pubsub item {}'.format(id_) | 183 msg = u'No atom title found in the pubsub item {}'.format(id_) |
156 raise failure.Failure(exceptions.DataError(msg)) | 184 raise failure.Failure(exceptions.DataError(msg)) |
169 convert(microblog_data['{}_xhtml'.format(key)], | 197 convert(microblog_data['{}_xhtml'.format(key)], |
170 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, | 198 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, |
171 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, | 199 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, |
172 False) | 200 False) |
173 | 201 |
174 try: | |
175 id_elt = entry_elt.elements(NS_ATOM, 'id').next() | |
176 except StopIteration: | |
177 msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_) | |
178 log.warning(msg) | |
179 microblog_data['atom_id'] = "" | |
180 else: | |
181 microblog_data['atom_id'] = unicode(id_elt) | |
182 | |
183 try: | |
184 updated_elt = entry_elt.elements(NS_ATOM, 'updated').next() | |
185 except StopIteration: | |
186 msg = u'No atom updated element found in the pubsub item {}'.format(id_) | |
187 raise failure.Failure(exceptions.DataError(msg)) | |
188 microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt))) | |
189 | |
190 if 'content' not in microblog_data: | 202 if 'content' not in microblog_data: |
191 # use the atom title data as the microblog body content | 203 # use the atom title data as the microblog body content |
192 microblog_data['content'] = microblog_data['title'] | 204 microblog_data['content'] = microblog_data['title'] |
193 del microblog_data['title'] | 205 del microblog_data['title'] |
194 if 'title_xhtml' in microblog_data: | 206 if 'title_xhtml' in microblog_data: |
195 microblog_data['content_xhtml'] = microblog_data['title_xhtml'] | 207 microblog_data['content_xhtml'] = microblog_data['title_xhtml'] |
196 del microblog_data['title_xhtml'] | 208 del microblog_data['title_xhtml'] |
197 | 209 |
210 # published/updated dates | |
211 try: | |
212 updated_elt = entry_elt.elements(NS_ATOM, 'updated').next() | |
213 except StopIteration: | |
214 msg = u'No atom updated element found in the pubsub item {}'.format(id_) | |
215 raise failure.Failure(exceptions.DataError(msg)) | |
216 microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt))) | |
198 try: | 217 try: |
199 published_elt = entry_elt.elements(NS_ATOM, 'published').next() | 218 published_elt = entry_elt.elements(NS_ATOM, 'published').next() |
200 except StopIteration: | 219 except StopIteration: |
201 microblog_data['published'] = microblog_data['updated'] | 220 microblog_data['published'] = microblog_data['updated'] |
202 else: | 221 else: |
203 microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt))) | 222 microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt))) |
204 | 223 |
205 | 224 # links |
206 for link_elt in entry_elt.elements(NS_ATOM, 'link'): | 225 for link_elt in entry_elt.elements(NS_ATOM, 'link'): |
207 if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments': | 226 if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments': |
208 key = check_conflict('comments') | 227 key = check_conflict('comments', True) |
209 microblog_data[key] = link_elt['href'] | 228 microblog_data[key] = link_elt['href'] |
210 try: | 229 try: |
211 service, node = self.parseCommentUrl(microblog_data[key]) | 230 service, node = self.parseCommentUrl(microblog_data[key]) |
212 except: | 231 except: |
213 log.warning(u"Can't parse url {}".format(microblog_data[key])) | 232 log.warning(u"Can't parse url {}".format(microblog_data[key])) |
214 del microblog_data[key] | 233 del microblog_data[key] |
215 else: | 234 else: |
216 microblog_data['comments_service'] = service.full() | 235 microblog_data['{}_service'.format(key)] = service.full() |
217 microblog_data['comments_node'] = node | 236 microblog_data['{}_node'.format(key)] = node |
218 else: | 237 else: |
219 rel = link_elt.getAttribute('rel','') | 238 rel = link_elt.getAttribute('rel','') |
220 title = link_elt.getAttribute('title','') | 239 title = link_elt.getAttribute('title','') |
221 href = link_elt.getAttribute('href','') | 240 href = link_elt.getAttribute('href','') |
222 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href)) | 241 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href)) |
223 | 242 |
243 # author | |
224 try: | 244 try: |
225 author_elt = entry_elt.elements(NS_ATOM, 'author').next() | 245 author_elt = entry_elt.elements(NS_ATOM, 'author').next() |
226 except StopIteration: | 246 except StopIteration: |
227 log.warning("Can't find author element in item {}".format(id_)) | 247 log.debug("Can't find author element in item {}".format(id_)) |
228 else: | 248 else: |
249 # name | |
229 try: | 250 try: |
230 name_elt = author_elt.elements(NS_ATOM, 'name').next() | 251 name_elt = author_elt.elements(NS_ATOM, 'name').next() |
231 except StopIteration: | 252 except StopIteration: |
232 log.warning("No name element found in author element of item {}".format(id_)) | 253 log.warning("No name element found in author element of item {}".format(id_)) |
233 else: | 254 else: |
234 microblog_data['author'] = unicode(name_elt) | 255 microblog_data['author'] = unicode(name_elt) |
256 # uri | |
257 try: | |
258 uri_elt = author_elt.elements(NS_ATOM, 'uri').next() | |
259 except StopIteration: | |
260 log.debug("No uri element found in author element of item {}".format(id_)) | |
261 else: | |
262 uri = unicode(uri_elt) | |
263 if uri.startswith("xmpp:"): | |
264 uri = uri[5:] | |
265 microblog_data['author_uri'] = uri | |
266 if item_elt.getAttribute("publisher") == uri: | |
267 microblog_data['author_uri_verified'] = C.BOOL_TRUE | |
268 else: | |
269 log.warning("item atom:uri differ from publisher attribute, spoofing attempt ? atom:uri = {} publisher = {}".format(uri, item_elt.getAttribute("publisher"))) | |
270 microblog_data['author_uri_verified'] = C.BOOL_FALSE | |
271 # email | |
272 try: | |
273 email_elt = author_elt.elements(NS_ATOM, 'email').next() | |
274 except StopIteration: | |
275 pass | |
276 else: | |
277 microblog_data['author_email'] = unicode(email_elt) | |
235 | 278 |
236 defer.returnValue(microblog_data) | 279 defer.returnValue(microblog_data) |
237 | 280 |
238 @defer.inlineCallbacks | 281 @defer.inlineCallbacks |
239 def data2entry(self, data, profile): | 282 def data2entry(self, data, profile): |