Mercurial > libervia-backend
comparison libervia/backend/plugins/plugin_xep_0277.py @ 4175:30f7513e5590
plugin XEP-0277: generate and parse altertate links with the new `alt_links` data in `extra`
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 05 Dec 2023 13:14:03 +0100 |
parents | 0e48181d50ab |
children | cf0ea77f9537 |
comparison
equal
deleted
inserted
replaced
4174:6929dabf3a7e | 4175:30f7513e5590 |
---|---|
15 | 15 |
16 # You should have received a copy of the GNU Affero General Public License | 16 # You should have received a copy of the GNU Affero General Public License |
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 17 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 |
19 import time | 19 import time |
20 from urllib.parse import quote, urlparse | |
20 import dateutil | 21 import dateutil |
21 import calendar | 22 import calendar |
22 from mimetypes import guess_type | 23 from mimetypes import guess_type |
23 from secrets import token_urlsafe | 24 from secrets import token_urlsafe |
24 from typing import List, Optional, Dict, Tuple, Any, Dict | 25 from typing import List, Optional, Dict, Tuple, Any, Dict |
280 """ | 281 """ |
281 if service is None: | 282 if service is None: |
282 service = client.jid.userhostJID() | 283 service = client.jid.userhostJID() |
283 | 284 |
284 extra: Dict[str, Any] = {} | 285 extra: Dict[str, Any] = {} |
285 microblog_data: Dict[str, Any] = { | 286 mb_data: Dict[str, Any] = { |
286 "service": service.full(), | 287 "service": service.full(), |
287 "extra": extra | 288 "extra": extra |
288 } | 289 } |
289 | 290 |
290 def check_conflict(key, increment=False): | 291 def check_conflict(key, increment=False): |
294 @param increment(bool): if suffix the key with an increment | 295 @param increment(bool): if suffix the key with an increment |
295 instead of raising an exception | 296 instead of raising an exception |
296 @raise exceptions.DataError: the key already exists | 297 @raise exceptions.DataError: the key already exists |
297 (not raised if increment is True) | 298 (not raised if increment is True) |
298 """ | 299 """ |
299 if key in microblog_data: | 300 if key in mb_data: |
300 if not increment: | 301 if not increment: |
301 raise failure.Failure( | 302 raise failure.Failure( |
302 exceptions.DataError( | 303 exceptions.DataError( |
303 "key {} is already present for item {}" | 304 "key {} is already present for item {}" |
304 ).format(key, item_elt["id"]) | 305 ).format(key, item_elt["id"]) |
305 ) | 306 ) |
306 else: | 307 else: |
307 idx = 1 # the idx 0 is the key without suffix | 308 idx = 1 # the idx 0 is the key without suffix |
308 fmt = "{}#{}" | 309 fmt = "{}#{}" |
309 new_key = fmt.format(key, idx) | 310 new_key = fmt.format(key, idx) |
310 while new_key in microblog_data: | 311 while new_key in mb_data: |
311 idx += 1 | 312 idx += 1 |
312 new_key = fmt.format(key, idx) | 313 new_key = fmt.format(key, idx) |
313 key = new_key | 314 key = new_key |
314 return key | 315 return key |
315 | 316 |
332 _("Content of type XHTML must declare its namespace!") | 333 _("Content of type XHTML must declare its namespace!") |
333 ) | 334 ) |
334 ) | 335 ) |
335 key = check_conflict("{}_xhtml".format(elem.name)) | 336 key = check_conflict("{}_xhtml".format(elem.name)) |
336 data = data_elt.toXml() | 337 data = data_elt.toXml() |
337 microblog_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].clean_xhtml( | 338 mb_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].clean_xhtml( |
338 data | 339 data |
339 ) | 340 ) |
340 else: | 341 else: |
341 key = check_conflict(elem.name) | 342 key = check_conflict(elem.name) |
342 microblog_data[key] = str(elem) | 343 mb_data[key] = str(elem) |
343 | 344 |
344 id_ = item_elt.getAttribute("id", "") # there can be no id for transient nodes | 345 id_ = item_elt.getAttribute("id", "") # there can be no id for transient nodes |
345 microblog_data["id"] = id_ | 346 mb_data["id"] = id_ |
346 if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT): | 347 if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT): |
347 msg = "Unsupported namespace {ns} in pubsub item {id_}".format( | 348 msg = "Unsupported namespace {ns} in pubsub item {id_}".format( |
348 ns=item_elt.uri, id_=id_ | 349 ns=item_elt.uri, id_=id_ |
349 ) | 350 ) |
350 log.warning(msg) | 351 log.warning(msg) |
357 raise failure.Failure(exceptions.DataError(msg)) | 358 raise failure.Failure(exceptions.DataError(msg)) |
358 | 359 |
359 # uri | 360 # uri |
360 # FIXME: node should alway be set in the future, check FIXME in method signature | 361 # FIXME: node should alway be set in the future, check FIXME in method signature |
361 if node is not None: | 362 if node is not None: |
362 microblog_data["node"] = node | 363 mb_data["node"] = node |
363 microblog_data['uri'] = xmpp_uri.build_xmpp_uri( | 364 mb_data['uri'] = xmpp_uri.build_xmpp_uri( |
364 "pubsub", | 365 "pubsub", |
365 path=service.full(), | 366 path=service.full(), |
366 node=node, | 367 node=node, |
367 item=id_, | 368 item=id_, |
368 ) | 369 ) |
369 | 370 |
370 # language | 371 # language |
371 try: | 372 try: |
372 microblog_data["language"] = entry_elt[(C.NS_XML, "lang")].strip() | 373 mb_data["language"] = entry_elt[(C.NS_XML, "lang")].strip() |
373 except KeyError: | 374 except KeyError: |
374 pass | 375 pass |
375 | 376 |
376 # atom:id | 377 # atom:id |
377 try: | 378 try: |
378 id_elt = next(entry_elt.elements(NS_ATOM, "id")) | 379 id_elt = next(entry_elt.elements(NS_ATOM, "id")) |
379 except StopIteration: | 380 except StopIteration: |
380 msg = ("No atom id found in the pubsub item {}, this is not standard !" | 381 msg = ("No atom id found in the pubsub item {}, this is not standard !" |
381 .format(id_)) | 382 .format(id_)) |
382 log.warning(msg) | 383 log.warning(msg) |
383 microblog_data["atom_id"] = "" | 384 mb_data["atom_id"] = "" |
384 else: | 385 else: |
385 microblog_data["atom_id"] = str(id_elt) | 386 mb_data["atom_id"] = str(id_elt) |
386 | 387 |
387 # title/content(s) | 388 # title/content(s) |
388 | 389 |
389 # FIXME: ATOM and XEP-0277 only allow 1 <title/> element | 390 # FIXME: ATOM and XEP-0277 only allow 1 <title/> element |
390 # but in the wild we have some blogs with several ones | 391 # but in the wild we have some blogs with several ones |
408 for content_elt in entry_elt.elements(NS_ATOM, "content"): | 409 for content_elt in entry_elt.elements(NS_ATOM, "content"): |
409 yield parseElement(content_elt) | 410 yield parseElement(content_elt) |
410 | 411 |
411 # we check that text content is present | 412 # we check that text content is present |
412 for key in ("title", "content"): | 413 for key in ("title", "content"): |
413 if key not in microblog_data and ("{}_xhtml".format(key)) in microblog_data: | 414 if key not in mb_data and ("{}_xhtml".format(key)) in mb_data: |
414 log.warning( | 415 log.warning( |
415 "item {id_} provide a {key}_xhtml data but not a text one".format( | 416 "item {id_} provide a {key}_xhtml data but not a text one".format( |
416 id_=id_, key=key | 417 id_=id_, key=key |
417 ) | 418 ) |
418 ) | 419 ) |
419 # ... and do the conversion if it's not | 420 # ... and do the conversion if it's not |
420 microblog_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].convert( | 421 mb_data[key] = yield self.host.plugins["TEXT_SYNTAXES"].convert( |
421 microblog_data["{}_xhtml".format(key)], | 422 mb_data["{}_xhtml".format(key)], |
422 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_XHTML, | 423 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_XHTML, |
423 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_TEXT, | 424 self.host.plugins["TEXT_SYNTAXES"].SYNTAX_TEXT, |
424 False, | 425 False, |
425 ) | 426 ) |
426 | 427 |
427 if "content" not in microblog_data: | 428 if "content" not in mb_data: |
428 # use the atom title data as the microblog body content | 429 # use the atom title data as the microblog body content |
429 microblog_data["content"] = microblog_data["title"] | 430 mb_data["content"] = mb_data["title"] |
430 del microblog_data["title"] | 431 del mb_data["title"] |
431 if "title_xhtml" in microblog_data: | 432 if "title_xhtml" in mb_data: |
432 microblog_data["content_xhtml"] = microblog_data["title_xhtml"] | 433 mb_data["content_xhtml"] = mb_data["title_xhtml"] |
433 del microblog_data["title_xhtml"] | 434 del mb_data["title_xhtml"] |
434 | 435 |
435 # published/updated dates | 436 # published/updated dates |
436 try: | 437 try: |
437 updated_elt = next(entry_elt.elements(NS_ATOM, "updated")) | 438 updated_elt = next(entry_elt.elements(NS_ATOM, "updated")) |
438 except StopIteration: | 439 except StopIteration: |
439 msg = "No atom updated element found in the pubsub item {}".format(id_) | 440 msg = "No atom updated element found in the pubsub item {}".format(id_) |
440 raise failure.Failure(exceptions.DataError(msg)) | 441 raise failure.Failure(exceptions.DataError(msg)) |
441 microblog_data["updated"] = calendar.timegm( | 442 mb_data["updated"] = calendar.timegm( |
442 dateutil.parser.parse(str(updated_elt)).utctimetuple() | 443 dateutil.parser.parse(str(updated_elt)).utctimetuple() |
443 ) | 444 ) |
444 try: | 445 try: |
445 published_elt = next(entry_elt.elements(NS_ATOM, "published")) | 446 published_elt = next(entry_elt.elements(NS_ATOM, "published")) |
446 except StopIteration: | 447 except StopIteration: |
447 microblog_data["published"] = microblog_data["updated"] | 448 mb_data["published"] = mb_data["updated"] |
448 else: | 449 else: |
449 microblog_data["published"] = calendar.timegm( | 450 mb_data["published"] = calendar.timegm( |
450 dateutil.parser.parse(str(published_elt)).utctimetuple() | 451 dateutil.parser.parse(str(published_elt)).utctimetuple() |
451 ) | 452 ) |
452 | 453 |
453 # links | 454 # links |
454 comments = microblog_data['comments'] = [] | 455 comments = mb_data['comments'] = [] |
455 for link_elt in entry_elt.elements(NS_ATOM, "link"): | 456 for link_elt in entry_elt.elements(NS_ATOM, "link"): |
456 href = link_elt.getAttribute("href") | 457 href = link_elt.getAttribute("href") |
457 if not href: | 458 if not href: |
458 log.warning( | 459 log.warning( |
459 f'missing href in <link> element: {link_elt.toXml()}' | 460 f'missing href in <link> element: {link_elt.toXml()}' |
519 if media_type is not None: | 520 if media_type is not None: |
520 attachment["media_type"] = media_type | 521 attachment["media_type"] = media_type |
521 | 522 |
522 attachments = extra.setdefault("attachments", []) | 523 attachments = extra.setdefault("attachments", []) |
523 attachments.append(attachment) | 524 attachments.append(attachment) |
525 elif rel == "alternate": | |
526 link_data = {"url": href} | |
527 media_type = link_elt.getAttribute("type") or guess_type(href)[0] | |
528 if media_type: | |
529 link_data["media_type"] = media_type | |
530 else: | |
531 log.warning( | |
532 f"Invalid or missing media type for alternate link: {href}" | |
533 ) | |
534 extra.setdefault("alt_links", []).append(link_data) | |
524 else: | 535 else: |
525 log.warning( | 536 log.warning( |
526 f"Unmanaged link element: {link_elt.toXml()}" | 537 f"Unmanaged link element: {link_elt.toXml()}" |
527 ) | 538 ) |
528 | 539 |
540 log.warning( | 551 log.warning( |
541 "No name element found in author element of item {}".format(id_) | 552 "No name element found in author element of item {}".format(id_) |
542 ) | 553 ) |
543 author = None | 554 author = None |
544 else: | 555 else: |
545 author = microblog_data["author"] = str(name_elt).strip() | 556 author = mb_data["author"] = str(name_elt).strip() |
546 # uri | 557 # uri |
547 try: | 558 try: |
548 uri_elt = next(author_elt.elements(NS_ATOM, "uri")) | 559 uri_elt = next(author_elt.elements(NS_ATOM, "uri")) |
549 except StopIteration: | 560 except StopIteration: |
550 log.debug( | 561 log.debug( |
551 "No uri element found in author element of item {}".format(id_) | 562 "No uri element found in author element of item {}".format(id_) |
552 ) | 563 ) |
553 if publisher: | 564 if publisher: |
554 microblog_data["author_jid"] = publisher | 565 mb_data["author_jid"] = publisher |
555 else: | 566 else: |
556 uri = str(uri_elt) | 567 uri = str(uri_elt) |
557 if uri.startswith("xmpp:"): | 568 if uri.startswith("xmpp:"): |
558 uri = uri[5:] | 569 uri = uri[5:] |
559 microblog_data["author_jid"] = uri | 570 mb_data["author_jid"] = uri |
560 else: | 571 else: |
561 microblog_data["author_jid"] = ( | 572 mb_data["author_jid"] = ( |
562 item_elt.getAttribute("publisher") or "" | 573 item_elt.getAttribute("publisher") or "" |
563 ) | 574 ) |
564 if not author and microblog_data["author_jid"]: | 575 if not author and mb_data["author_jid"]: |
565 # FIXME: temporary workaround for missing author name, would be | 576 # FIXME: temporary workaround for missing author name, would be |
566 # better to use directly JID's identity (to be done from frontends?) | 577 # better to use directly JID's identity (to be done from frontends?) |
567 try: | 578 try: |
568 microblog_data["author"] = jid.JID(microblog_data["author_jid"]).user | 579 mb_data["author"] = jid.JID(mb_data["author_jid"]).user |
569 except Exception as e: | 580 except Exception as e: |
570 log.warning(f"No author name found, and can't parse author jid: {e}") | 581 log.warning(f"No author name found, and can't parse author jid: {e}") |
571 | 582 |
572 if not publisher: | 583 if not publisher: |
573 log.debug("No publisher attribute, we can't verify author jid") | 584 log.debug("No publisher attribute, we can't verify author jid") |
574 microblog_data["author_jid_verified"] = False | 585 mb_data["author_jid_verified"] = False |
575 elif jid.JID(publisher).userhostJID() == jid.JID(uri).userhostJID(): | 586 elif jid.JID(publisher).userhostJID() == jid.JID(uri).userhostJID(): |
576 microblog_data["author_jid_verified"] = True | 587 mb_data["author_jid_verified"] = True |
577 else: | 588 else: |
578 if "repeated" not in extra: | 589 if "repeated" not in extra: |
579 log.warning( | 590 log.warning( |
580 "item atom:uri differ from publisher attribute, spoofing " | 591 "item atom:uri differ from publisher attribute, spoofing " |
581 "attempt ? atom:uri = {} publisher = {}".format( | 592 "attempt ? atom:uri = {} publisher = {}".format( |
582 uri, item_elt.getAttribute("publisher") | 593 uri, item_elt.getAttribute("publisher") |
583 ) | 594 ) |
584 ) | 595 ) |
585 microblog_data["author_jid_verified"] = False | 596 mb_data["author_jid_verified"] = False |
586 # email | 597 # email |
587 try: | 598 try: |
588 email_elt = next(author_elt.elements(NS_ATOM, "email")) | 599 email_elt = next(author_elt.elements(NS_ATOM, "email")) |
589 except StopIteration: | 600 except StopIteration: |
590 pass | 601 pass |
591 else: | 602 else: |
592 microblog_data["author_email"] = str(email_elt) | 603 mb_data["author_email"] = str(email_elt) |
593 | 604 |
594 if not microblog_data.get("author_jid"): | 605 if not mb_data.get("author_jid"): |
595 if publisher: | 606 if publisher: |
596 microblog_data["author_jid"] = publisher | 607 mb_data["author_jid"] = publisher |
597 microblog_data["author_jid_verified"] = True | 608 mb_data["author_jid_verified"] = True |
598 else: | 609 else: |
599 iq_elt = xml_tools.find_ancestor(item_elt, "iq", C.NS_STREAM) | 610 iq_elt = xml_tools.find_ancestor(item_elt, "iq", C.NS_STREAM) |
600 microblog_data["author_jid"] = iq_elt["from"] | 611 mb_data["author_jid"] = iq_elt["from"] |
601 microblog_data["author_jid_verified"] = False | 612 mb_data["author_jid_verified"] = False |
602 | 613 |
603 # categories | 614 # categories |
604 categories = [ | 615 categories = [ |
605 category_elt.getAttribute("term", "") | 616 category_elt.getAttribute("term", "") |
606 for category_elt in entry_elt.elements(NS_ATOM, "category") | 617 for category_elt in entry_elt.elements(NS_ATOM, "category") |
607 ] | 618 ] |
608 microblog_data["tags"] = categories | 619 mb_data["tags"] = categories |
609 | 620 |
610 ## the trigger ## | 621 ## the trigger ## |
611 # if other plugins have things to add or change | 622 # if other plugins have things to add or change |
612 yield self.host.trigger.point( | 623 yield self.host.trigger.point( |
613 "XEP-0277_item2data", item_elt, entry_elt, microblog_data | 624 "XEP-0277_item2data", item_elt, entry_elt, mb_data |
614 ) | 625 ) |
615 | 626 |
616 defer.returnValue(microblog_data) | 627 defer.returnValue(mb_data) |
617 | 628 |
618 async def mb_data_2_entry_elt(self, client, mb_data, item_id, service, node): | 629 async def mb_data_2_entry_elt(self, client, mb_data, item_id, service, node): |
619 """Convert a data dict to en entry usable to create an item | 630 """Convert a data dict to en entry usable to create an item |
620 | 631 |
621 @param mb_data: data dict as given by bridge method. | 632 @param mb_data: data dict as given by bridge method. |
738 ("size", "lenght") | 749 ("size", "lenght") |
739 ): | 750 ): |
740 value = attachment.get(key) | 751 value = attachment.get(key) |
741 if value: | 752 if value: |
742 link_elt[attr] = str(value) | 753 link_elt[attr] = str(value) |
754 | |
755 ## alternate links ## | |
756 alt_links = extra.get("alt_links") | |
757 if alt_links: | |
758 for link_data in alt_links: | |
759 url_template = link_data["url"] | |
760 url = url_template.format( | |
761 service=quote(service.full(), safe=""), | |
762 node=quote(node, safe=""), | |
763 item=quote(item_id, safe="") | |
764 ) | |
765 | |
766 link_elt = entry_elt.addElement("link") | |
767 link_elt["href"] = url | |
768 link_elt["rel"] = "alternate" | |
769 | |
770 media_type = link_data.get("media_type") | |
771 if not media_type: | |
772 parsed_url = urlparse(url) | |
773 if parsed_url.scheme in ["http", "https"]: | |
774 media_type = "text/html" | |
775 else: | |
776 media_type = guess_type(url)[0] or "application/octet-stream" | |
777 | |
778 link_elt["type"] = media_type | |
743 | 779 |
744 ## author ## | 780 ## author ## |
745 author_elt = entry_elt.addElement("author") | 781 author_elt = entry_elt.addElement("author") |
746 try: | 782 try: |
747 author_name = mb_data["author"] | 783 author_name = mb_data["author"] |