comparison libervia/backend/plugins/plugin_xep_0277.py @ 4175:30f7513e5590

plugin XEP-0277: generate and parse altertate links with the new `alt_links` data in `extra`
author Goffi <goffi@goffi.org>
date Tue, 05 Dec 2023 13:14:03 +0100
parents 0e48181d50ab
children cf0ea77f9537
comparison
equal deleted inserted replaced
4174:6929dabf3a7e 4175:30f7513e5590
15 15
16 # You should have received a copy of the GNU Affero General Public License 16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. 17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18
19 import time 19 import time
20 from urllib.parse import quote, urlparse
20 import dateutil 21 import dateutil
21 import calendar 22 import calendar
22 from mimetypes import guess_type 23 from mimetypes import guess_type
23 from secrets import token_urlsafe 24 from secrets import token_urlsafe
24 from typing import List, Optional, Dict, Tuple, Any, Dict 25 from typing import List, Optional, Dict, Tuple, Any, Dict
280 """ 281 """
281 if service is None: 282 if service is None:
282 service = client.jid.userhostJID() 283 service = client.jid.userhostJID()
283 284
284 extra: Dict[str, Any] = {} 285 extra: Dict[str, Any] = {}
285 microblog_data: Dict[str, Any] = { 286 mb_data: Dict[str, Any] = {
286 "service": service.full(), 287 "service": service.full(),
287 "extra": extra 288 "extra": extra
288 } 289 }
289 290
290 def check_conflict(key, increment=False): 291 def check_conflict(key, increment=False):
294 @param increment(bool): if suffix the key with an increment 295 @param increment(bool): if suffix the key with an increment
295 instead of raising an exception 296 instead of raising an exception
296 @raise exceptions.DataError: the key already exists 297 @raise exceptions.DataError: the key already exists
297 (not raised if increment is True) 298 (not raised if increment is True)
298 """ 299 """
299 if key in microblog_data: 300 if key in mb_data:
300 if not increment: 301 if not increment:
301 raise failure.Failure( 302 raise failure.Failure(
302 exceptions.DataError( 303 exceptions.DataError(
303 "key {} is already present for item {}" 304 "key {} is already present for item {}"
304 ).format(key, item_elt["id"]) 305 ).format(key, item_elt["id"])
305 ) 306 )
306 else: 307 else:
307 idx = 1 # the idx 0 is the key without suffix 308 idx = 1 # the idx 0 is the key without suffix
308 fmt = "{}#{}" 309 fmt = "{}#{}"
309 new_key = fmt.format(key, idx) 310 new_key = fmt.format(key, idx)
310 while new_key in microblog_data: 311 while new_key in mb_data:
311 idx += 1 312 idx += 1
312 new_key = fmt.format(key, idx) 313 new_key = fmt.format(key, idx)
313 key = new_key 314 key = new_key
314 return key 315 return key
315 316
332 _("Content of type XHTML must declare its namespace!") 333 _("Content of type XHTML must declare its namespace!")
333 ) 334 )
334 ) 335 )
335 key = check_conflict("{}_xhtml".format(elem.name)) 336 key = check_conflict("{}_xhtml".format(elem.name))
336 data = data_elt.toXml() 337 data = data_elt.toXml()
337 microblog_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].clean_xhtml( 338 mb_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].clean_xhtml(
338 data 339 data
339 ) 340 )
340 else: 341 else:
341 key = check_conflict(elem.name) 342 key = check_conflict(elem.name)
342 microblog_data[key] = str(elem) 343 mb_data[key] = str(elem)
343 344
344 id_ = item_elt.getAttribute("id", "") # there can be no id for transient nodes 345 id_ = item_elt.getAttribute("id", "") # there can be no id for transient nodes
345 microblog_data["id"] = id_ 346 mb_data["id"] = id_
346 if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT): 347 if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT):
347 msg = "Unsupported namespace {ns} in pubsub item {id_}".format( 348 msg = "Unsupported namespace {ns} in pubsub item {id_}".format(
348 ns=item_elt.uri, id_=id_ 349 ns=item_elt.uri, id_=id_
349 ) 350 )
350 log.warning(msg) 351 log.warning(msg)
357 raise failure.Failure(exceptions.DataError(msg)) 358 raise failure.Failure(exceptions.DataError(msg))
358 359
359 # uri 360 # uri
360 # FIXME: node should alway be set in the future, check FIXME in method signature 361 # FIXME: node should alway be set in the future, check FIXME in method signature
361 if node is not None: 362 if node is not None:
362 microblog_data["node"] = node 363 mb_data["node"] = node
363 microblog_data['uri'] = xmpp_uri.build_xmpp_uri( 364 mb_data['uri'] = xmpp_uri.build_xmpp_uri(
364 "pubsub", 365 "pubsub",
365 path=service.full(), 366 path=service.full(),
366 node=node, 367 node=node,
367 item=id_, 368 item=id_,
368 ) 369 )
369 370
370 # language 371 # language
371 try: 372 try:
372 microblog_data["language"] = entry_elt[(C.NS_XML, "lang")].strip() 373 mb_data["language"] = entry_elt[(C.NS_XML, "lang")].strip()
373 except KeyError: 374 except KeyError:
374 pass 375 pass
375 376
376 # atom:id 377 # atom:id
377 try: 378 try:
378 id_elt = next(entry_elt.elements(NS_ATOM, "id")) 379 id_elt = next(entry_elt.elements(NS_ATOM, "id"))
379 except StopIteration: 380 except StopIteration:
380 msg = ("No atom id found in the pubsub item {}, this is not standard !" 381 msg = ("No atom id found in the pubsub item {}, this is not standard !"
381 .format(id_)) 382 .format(id_))
382 log.warning(msg) 383 log.warning(msg)
383 microblog_data["atom_id"] = "" 384 mb_data["atom_id"] = ""
384 else: 385 else:
385 microblog_data["atom_id"] = str(id_elt) 386 mb_data["atom_id"] = str(id_elt)
386 387
387 # title/content(s) 388 # title/content(s)
388 389
389 # FIXME: ATOM and XEP-0277 only allow 1 <title/> element 390 # FIXME: ATOM and XEP-0277 only allow 1 <title/> element
390 # but in the wild we have some blogs with several ones 391 # but in the wild we have some blogs with several ones
408 for content_elt in entry_elt.elements(NS_ATOM, "content"): 409 for content_elt in entry_elt.elements(NS_ATOM, "content"):
409 yield parseElement(content_elt) 410 yield parseElement(content_elt)
410 411
411 # we check that text content is present 412 # we check that text content is present
412 for key in ("title", "content"): 413 for key in ("title", "content"):
413 if key not in microblog_data and ("{}_xhtml".format(key)) in microblog_data: 414 if key not in mb_data and ("{}_xhtml".format(key)) in mb_data:
414 log.warning( 415 log.warning(
415 "item {id_} provide a {key}_xhtml data but not a text one".format( 416 "item {id_} provide a {key}_xhtml data but not a text one".format(
416 id_=id_, key=key 417 id_=id_, key=key
417 ) 418 )
418 ) 419 )
419 # ... and do the conversion if it's not 420 # ... and do the conversion if it's not
420 microblog_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].convert( 421 mb_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].convert(
421 microblog_data["{}_xhtml".format(key)], 422 mb_data["{}_xhtml".format(key)],
422 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_XHTML, 423 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_XHTML,
423 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_TEXT, 424 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_TEXT,
424 False, 425 False,
425 ) 426 )
426 427
427 if "content" not in microblog_data: 428 if "content" not in mb_data:
428 # use the atom title data as the microblog body content 429 # use the atom title data as the microblog body content
429 microblog_data["content"] = microblog_data["title"] 430 mb_data["content"] = mb_data["title"]
430 del microblog_data["title"] 431 del mb_data["title"]
431 if "title_xhtml" in microblog_data: 432 if "title_xhtml" in mb_data:
432 microblog_data["content_xhtml"] = microblog_data["title_xhtml"] 433 mb_data["content_xhtml"] = mb_data["title_xhtml"]
433 del microblog_data["title_xhtml"] 434 del mb_data["title_xhtml"]
434 435
435 # published/updated dates 436 # published/updated dates
436 try: 437 try:
437 updated_elt = next(entry_elt.elements(NS_ATOM, "updated")) 438 updated_elt = next(entry_elt.elements(NS_ATOM, "updated"))
438 except StopIteration: 439 except StopIteration:
439 msg = "No atom updated element found in the pubsub item {}".format(id_) 440 msg = "No atom updated element found in the pubsub item {}".format(id_)
440 raise failure.Failure(exceptions.DataError(msg)) 441 raise failure.Failure(exceptions.DataError(msg))
441 microblog_data["updated"] = calendar.timegm( 442 mb_data["updated"] = calendar.timegm(
442 dateutil.parser.parse(str(updated_elt)).utctimetuple() 443 dateutil.parser.parse(str(updated_elt)).utctimetuple()
443 ) 444 )
444 try: 445 try:
445 published_elt = next(entry_elt.elements(NS_ATOM, "published")) 446 published_elt = next(entry_elt.elements(NS_ATOM, "published"))
446 except StopIteration: 447 except StopIteration:
447 microblog_data["published"] = microblog_data["updated"] 448 mb_data["published"] = mb_data["updated"]
448 else: 449 else:
449 microblog_data["published"] = calendar.timegm( 450 mb_data["published"] = calendar.timegm(
450 dateutil.parser.parse(str(published_elt)).utctimetuple() 451 dateutil.parser.parse(str(published_elt)).utctimetuple()
451 ) 452 )
452 453
453 # links 454 # links
454 comments = microblog_data['comments'] = [] 455 comments = mb_data['comments'] = []
455 for link_elt in entry_elt.elements(NS_ATOM, "link"): 456 for link_elt in entry_elt.elements(NS_ATOM, "link"):
456 href = link_elt.getAttribute("href") 457 href = link_elt.getAttribute("href")
457 if not href: 458 if not href:
458 log.warning( 459 log.warning(
459 f'missing href in <link> element: {link_elt.toXml()}' 460 f'missing href in <link> element: {link_elt.toXml()}'
519 if media_type is not None: 520 if media_type is not None:
520 attachment["media_type"] = media_type 521 attachment["media_type"] = media_type
521 522
522 attachments = extra.setdefault("attachments", []) 523 attachments = extra.setdefault("attachments", [])
523 attachments.append(attachment) 524 attachments.append(attachment)
525 elif rel == "alternate":
526 link_data = {"url": href}
527 media_type = link_elt.getAttribute("type") or guess_type(href)[0]
528 if media_type:
529 link_data["media_type"] = media_type
530 else:
531 log.warning(
532 f"Invalid or missing media type for alternate link: {href}"
533 )
534 extra.setdefault("alt_links", []).append(link_data)
524 else: 535 else:
525 log.warning( 536 log.warning(
526 f"Unmanaged link element: {link_elt.toXml()}" 537 f"Unmanaged link element: {link_elt.toXml()}"
527 ) 538 )
528 539
540 log.warning( 551 log.warning(
541 "No name element found in author element of item {}".format(id_) 552 "No name element found in author element of item {}".format(id_)
542 ) 553 )
543 author = None 554 author = None
544 else: 555 else:
545 author = microblog_data["author"] = str(name_elt).strip() 556 author = mb_data["author"] = str(name_elt).strip()
546 # uri 557 # uri
547 try: 558 try:
548 uri_elt = next(author_elt.elements(NS_ATOM, "uri")) 559 uri_elt = next(author_elt.elements(NS_ATOM, "uri"))
549 except StopIteration: 560 except StopIteration:
550 log.debug( 561 log.debug(
551 "No uri element found in author element of item {}".format(id_) 562 "No uri element found in author element of item {}".format(id_)
552 ) 563 )
553 if publisher: 564 if publisher:
554 microblog_data["author_jid"] = publisher 565 mb_data["author_jid"] = publisher
555 else: 566 else:
556 uri = str(uri_elt) 567 uri = str(uri_elt)
557 if uri.startswith("xmpp:"): 568 if uri.startswith("xmpp:"):
558 uri = uri[5:] 569 uri = uri[5:]
559 microblog_data["author_jid"] = uri 570 mb_data["author_jid"] = uri
560 else: 571 else:
561 microblog_data["author_jid"] = ( 572 mb_data["author_jid"] = (
562 item_elt.getAttribute("publisher") or "" 573 item_elt.getAttribute("publisher") or ""
563 ) 574 )
564 if not author and microblog_data["author_jid"]: 575 if not author and mb_data["author_jid"]:
565 # FIXME: temporary workaround for missing author name, would be 576 # FIXME: temporary workaround for missing author name, would be
566 # better to use directly JID's identity (to be done from frontends?) 577 # better to use directly JID's identity (to be done from frontends?)
567 try: 578 try:
568 microblog_data["author"] = jid.JID(microblog_data["author_jid"]).user 579 mb_data["author"] = jid.JID(mb_data["author_jid"]).user
569 except Exception as e: 580 except Exception as e:
570 log.warning(f"No author name found, and can't parse author jid: {e}") 581 log.warning(f"No author name found, and can't parse author jid: {e}")
571 582
572 if not publisher: 583 if not publisher:
573 log.debug("No publisher attribute, we can't verify author jid") 584 log.debug("No publisher attribute, we can't verify author jid")
574 microblog_data["author_jid_verified"] = False 585 mb_data["author_jid_verified"] = False
575 elif jid.JID(publisher).userhostJID() == jid.JID(uri).userhostJID(): 586 elif jid.JID(publisher).userhostJID() == jid.JID(uri).userhostJID():
576 microblog_data["author_jid_verified"] = True 587 mb_data["author_jid_verified"] = True
577 else: 588 else:
578 if "repeated" not in extra: 589 if "repeated" not in extra:
579 log.warning( 590 log.warning(
580 "item atom:uri differ from publisher attribute, spoofing " 591 "item atom:uri differ from publisher attribute, spoofing "
581 "attempt ? atom:uri = {} publisher = {}".format( 592 "attempt ? atom:uri = {} publisher = {}".format(
582 uri, item_elt.getAttribute("publisher") 593 uri, item_elt.getAttribute("publisher")
583 ) 594 )
584 ) 595 )
585 microblog_data["author_jid_verified"] = False 596 mb_data["author_jid_verified"] = False
586 # email 597 # email
587 try: 598 try:
588 email_elt = next(author_elt.elements(NS_ATOM, "email")) 599 email_elt = next(author_elt.elements(NS_ATOM, "email"))
589 except StopIteration: 600 except StopIteration:
590 pass 601 pass
591 else: 602 else:
592 microblog_data["author_email"] = str(email_elt) 603 mb_data["author_email"] = str(email_elt)
593 604
594 if not microblog_data.get("author_jid"): 605 if not mb_data.get("author_jid"):
595 if publisher: 606 if publisher:
596 microblog_data["author_jid"] = publisher 607 mb_data["author_jid"] = publisher
597 microblog_data["author_jid_verified"] = True 608 mb_data["author_jid_verified"] = True
598 else: 609 else:
599 iq_elt = xml_tools.find_ancestor(item_elt, "iq", C.NS_STREAM) 610 iq_elt = xml_tools.find_ancestor(item_elt, "iq", C.NS_STREAM)
600 microblog_data["author_jid"] = iq_elt["from"] 611 mb_data["author_jid"] = iq_elt["from"]
601 microblog_data["author_jid_verified"] = False 612 mb_data["author_jid_verified"] = False
602 613
603 # categories 614 # categories
604 categories = [ 615 categories = [
605 category_elt.getAttribute("term", "") 616 category_elt.getAttribute("term", "")
606 for category_elt in entry_elt.elements(NS_ATOM, "category") 617 for category_elt in entry_elt.elements(NS_ATOM, "category")
607 ] 618 ]
608 microblog_data["tags"] = categories 619 mb_data["tags"] = categories
609 620
610 ## the trigger ## 621 ## the trigger ##
611 # if other plugins have things to add or change 622 # if other plugins have things to add or change
612 yield self.host.trigger.point( 623 yield self.host.trigger.point(
613 "XEP-0277_item2data", item_elt, entry_elt, microblog_data 624 "XEP-0277_item2data", item_elt, entry_elt, mb_data
614 ) 625 )
615 626
616 defer.returnValue(microblog_data) 627 defer.returnValue(mb_data)
617 628
618 async def mb_data_2_entry_elt(self, client, mb_data, item_id, service, node): 629 async def mb_data_2_entry_elt(self, client, mb_data, item_id, service, node):
619 """Convert a data dict to en entry usable to create an item 630 """Convert a data dict to en entry usable to create an item
620 631
621 @param mb_data: data dict as given by bridge method. 632 @param mb_data: data dict as given by bridge method.
738 ("size", "lenght") 749 ("size", "lenght")
739 ): 750 ):
740 value = attachment.get(key) 751 value = attachment.get(key)
741 if value: 752 if value:
742 link_elt[attr] = str(value) 753 link_elt[attr] = str(value)
754
755 ## alternate links ##
756 alt_links = extra.get("alt_links")
757 if alt_links:
758 for link_data in alt_links:
759 url_template = link_data["url"]
760 url = url_template.format(
761 service=quote(service.full(), safe=""),
762 node=quote(node, safe=""),
763 item=quote(item_id, safe="")
764 )
765
766 link_elt = entry_elt.addElement("link")
767 link_elt["href"] = url
768 link_elt["rel"] = "alternate"
769
770 media_type = link_data.get("media_type")
771 if not media_type:
772 parsed_url = urlparse(url)
773 if parsed_url.scheme in ["http", "https"]:
774 media_type = "text/html"
775 else:
776 media_type = guess_type(url)[0] or "application/octet-stream"
777
778 link_elt["type"] = media_type
743 779
744 ## author ## 780 ## author ##
745 author_elt = entry_elt.addElement("author") 781 author_elt = entry_elt.addElement("author")
746 try: 782 try:
747 author_name = mb_data["author"] 783 author_name = mb_data["author"]