Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_misc_url_preview.py @ 4212:5f2d496c633f
core: get rid of `pickle`:
Use of `pickle` to serialise data was a technical legacy that was causing trouble to store
in database, to update (if a class was serialised, a change could break update), and to
security (pickle can lead to code execution).
This patch remove all use of Pickle in favour in JSON, notably:
- for caching data, a Pydantic model is now used instead
- for SQLAlchemy model, the LegacyPickle is replaced by JSON serialisation
- in XEP-0373 a class `PublicKeyMetadata` was serialised. New method `from_dict` and
`to_dict` method have been implemented to do serialisation.
- new methods to (de)serialise data can now be specified with Identity data types. It is
notably used to (de)serialise `path` of avatars.
A migration script has been created to convert data (for upgrade or downgrade), with
special care for XEP-0373 case. Depending of size of database, this migration script can
be long to run.
rel 443
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 23 Feb 2024 13:31:04 +0100 |
parents | eaa0daa7f834 |
children | 0d7bb4df2343 |
rev | line source |
---|---|
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # Libervia plugin to handle events |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
5 # Copyright (C) 2009-2022 Jérôme Poisson (goffi@goffi.org) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 from dataclasses import dataclass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 import json |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from textwrap import dedent |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
23 from typing import Callable, Dict, List, Optional, Union |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 from urllib import parse |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 import fnmatch |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from lxml import etree |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 import treq |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 from twisted.internet import defer |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 from libervia.backend.core.constants import Const as C |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 from libervia.backend.core.core_types import SatXMPPEntity |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 from libervia.backend.core.exceptions import ConflictError |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 from libervia.backend.core.i18n import _ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 from libervia.backend.core.log import getLogger |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 from libervia.backend.tools.common import data_format |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 from libervia.backend.tools.common.async_utils import async_lru |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
38 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 log = getLogger(__name__) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
41 PLUGIN_INFO = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 C.PI_NAME: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 C.PI_IMPORT_NAME: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 C.PI_TYPE: C.PLUG_TYPE_MISC, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 C.PI_PROTOCOLS: ["Open Graph", "oEmbed"], |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 C.PI_DEPENDENCIES: ["TEXT_SYNTAXES"], |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 C.PI_MAIN: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 C.PI_HANDLER: "no", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 C.PI_DESCRIPTION: dedent( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 _( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 """\ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 Retrieves and provides a preview of URLs using various protocols. Initially, it |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
53 uses the Open Graph protocol for most web pages. Specialized handlers are |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 implemented for YouTube using the oEmbed protocol. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
56 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 ), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 OG_TAGS = [ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 "title", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 "type", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 "image", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 "url", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 "audio", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 "description", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 "determiner", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 "locale", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 "locale:alternate", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 "site_name", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 "video", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 ] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 class PreviewFetchError(Exception): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 pass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 @dataclass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
80 class Protocol: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 name: str |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 callback: Callable |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 priority: int |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
86 class Preview: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
87 protocols: Dict[str, Protocol] = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 domain_protocols: Dict[str, str] = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
90 def __init__(self, host): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
91 log.info(_("Preview plugin initialization")) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
92 self.host = host |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
94 # generic protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 self.register("open_graph", self.fetch_open_graph_data, priority=100) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
97 self.register("oembed", self.fetch_generic_oembed_data, priority=50) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 self.register("generic", self.fetch_generic_data, priority=0) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 # domain specific protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
101 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
102 self.register("oembed-youtube", self.fetch_youtube_oembed_data, priority=-100) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 self.register_domain_protocol( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 ["www.youtube.com", "youtu.be", "m.youtube.com"], "oembed-youtube" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
105 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
106 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 self.register("wikipedia", self.fetch_wikipedia_data, priority=-80) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
108 self.register_domain_protocol(["*.wikipedia.org"], "wikipedia") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
109 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
110 self.register("invidious", self.fetch_invidious_data, priority=-90) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 self.register_domain_protocol( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
112 ["yewtu.be", "www.yewtu.be", "invidious.fdn.fr"], |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
113 "invidious" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 # bridge methods |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 host.bridge.add_method( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 "url_preview_get", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 ".plugin", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 in_sign="sss", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 out_sign="s", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 method=self._url_preview_get, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 async_=True, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 # API |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
128 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
129 def _url_preview_get(self, url: str, options: str, profile_key: str) -> defer.Deferred: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
130 client = self.host.get_client(profile_key) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 d = defer.ensureDeferred( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 self.get_preview_data(client, url, data_format.deserialise(options)) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 d.addCallback(data_format.serialise) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
135 return d |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 @async_lru() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
138 async def get_preview_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
139 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
141 """Fetch preview data from a url using registered protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 @param url: The url to fetch the preview data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 @param options: Additional options that may be used while fetching preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 @return: A dictionary containing the preview data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
147 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 parsed_url = parse.urlparse(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 domain = parsed_url.netloc |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
151 preview_data: Optional[dict] = None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 matched_protocol = None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
153 for registered_domain, registered_protocol in self.domain_protocols.items(): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 if fnmatch.fnmatch(domain, registered_domain): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
155 matched_protocol = registered_protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
156 break |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
157 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
158 if matched_protocol is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
159 callback = self.protocols[matched_protocol].callback |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
160 preview_data = await callback(client, url, options) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
161 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
162 for name, protocol in sorted( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
163 self.protocols.items(), key=lambda item: item[1].priority, reverse=True |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
164 ): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
165 try: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
166 preview_data = await protocol.callback(client, url, options) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
167 except Exception as e: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
168 log.warning(f"Can't run protocol {name} for {url}: {e}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
169 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
170 if preview_data is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
171 matched_protocol = protocol.name |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
172 break |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
174 if preview_data is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
175 preview_data["protocol"] = matched_protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
176 # we don't clean html for youtube as we need Javascript to make it work, and |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
177 # for invidious as we generate it ourself |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
178 if "html" in preview_data: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
179 if matched_protocol in ("oembed-youtube", "invidious"): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
180 # this flag indicate that we know the source of HTML and we should be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
181 # able to trust it. This will add `allow-scripts` and |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
182 # `allow-same-origin` in the preview <iframe> "sandbox" attribute |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
183 preview_data["html_known"] = True |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
185 preview_data["html_known"] = False |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
186 clean_xhtml = self.host.plugins["TEXT_SYNTAXES"].clean_xhtml |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
187 try: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
188 preview_data["html"] = clean_xhtml(preview_data["html"]) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
189 except Exception as e: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
190 log.warning(f"Can't clean html data: {e}\n{preview_data}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
191 del preview_data["html"] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
192 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
194 return preview_data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
195 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
196 @classmethod |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
197 def register(cls, name: str, callback: Callable, priority: int = 0): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
198 """Register a protocol to retrieve preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
199 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
200 The registered callback should return a dictionary of preview data if available, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
201 or None otherwise. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
202 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
203 @param name: Unique name of the protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
204 @param callback: Async callback function to fetch preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
205 @param priority: Priority of the protocol, with higher numbers indicating higher |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
206 priority |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 @return: None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 if name in cls.protocols: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 raise ConflictError(f"Protocol with the name {name} is already registered.") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
211 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 cls.protocols[name] = Protocol(name=name, callback=callback, priority=priority) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
213 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 @classmethod |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 def register_domain_protocol(cls, domains: Union[str, List[str]], protocol_name: str): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
216 """Register a protocol for a specific domain or list of domains |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 @param domains: The domain name or list of domain names |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 @param protocol_name: The name of the protocol to be associated with the domain(s) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 @return: None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 protocol_name = protocol_name.replace(" ", "").lower() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 if protocol_name not in cls.protocols: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
224 raise ConflictError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 f"Protocol with the name {protocol_name} is not registered." |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 if isinstance(domains, str): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 domains = [domains] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 for domain in domains: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 domain = domain.strip() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 if not domain: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
234 log.warning("empty string used as domain, ignoring") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 continue |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 cls.domain_protocols[domain] = protocol_name |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 # Open Graph |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 async def fetch_open_graph_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
243 """Fetch Open Graph data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
245 This method implements the Open Graph protocol, details of which can be found at: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 http://ogp.me/ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 @param url: The url to fetch the Open Graph data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
250 @return: A dictionary containing the Open Graph data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
253 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
255 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
259 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 # Extract Open Graph data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
261 metadata = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
262 for tag in OG_TAGS: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
263 og_el = tree.find('.//meta[@property="og:{tag}"]'.format(tag=tag)) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
264 if og_el is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
265 metadata[tag] = og_el.get("content") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
266 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
267 if metadata: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
268 if "site_name" in metadata and not "provider_name" in metadata: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
269 metadata["provider_name"] = metadata["site_name"] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
270 return metadata |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
271 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
272 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
273 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
274 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
275 f"Failed to fetch preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
276 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
278 # oEmbed |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
279 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
280 async def _fetch_oembed_data(self, oembed_url: str) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
281 """Fetch oEmbed data from a given oEmbed URL |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
282 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
283 @param oembed_url: The url to fetch the oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
284 @return: A dictionary containing the oEmbed data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
285 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
286 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
287 resp = await treq.get(oembed_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
288 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
289 return json.loads(await resp.text()) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
290 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
291 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
292 f"Failed to fetch oEmbed preview for {oembed_url}, status code: " |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
293 f"{resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
294 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
295 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
296 async def fetch_youtube_oembed_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
297 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
298 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
299 """Fetch YouTube oEmbed data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
300 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
301 @param url: The url to fetch the YouTube oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
302 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
303 @return: A dictionary containing the YouTube oEmbed data or None if no data could |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
304 be fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
305 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
306 oembed_url = f"https://www.youtube.com/oembed?url={parse.quote(url)}&format=json" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
307 data = await self._fetch_oembed_data(oembed_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
308 if data is not None and 'html' in data: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
309 html = data['html'] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
310 root = etree.HTML(html) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
311 iframe_elt = root.xpath('//iframe') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
312 if iframe_elt: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
313 iframe_elt[0].attrib['style'] = ( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
314 'position: absolute; top: 0; left: 0; width: 100%; height: 100%;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
315 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
316 data['html'] = etree.tostring(root, method='html', encoding='unicode') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
317 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
318 log.warning("No <iframe> found in the YouTube oEmbed response") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
319 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
320 return data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
321 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
322 async def fetch_generic_oembed_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
323 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
324 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
325 """Fetch generic oEmbed data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
326 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
327 @param url: The url to fetch the oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
328 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
329 @return: A dictionary containing the oEmbed data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
330 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
331 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
332 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
333 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
334 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
335 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
336 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
338 # Find oEmbed URL |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
339 oembed_link = tree.find('.//link[@type="application/json+oembed"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 if oembed_link is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
341 oembed_url = oembed_link.get("href") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
342 return await self._fetch_oembed_data(oembed_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
343 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
344 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
346 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 f"Failed to fetch preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
348 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
349 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
351 async def fetch_generic_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
352 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
354 """Fetch generic data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
355 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
356 This method attempts to extract the title, description, and author metadata from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 the HTML of the page. If these data cannot be found, the method will return None. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
358 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
359 @param url: The url to fetch the generic data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
360 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
361 @return: A dictionary containing the generic data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
362 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
363 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
364 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
365 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
366 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
367 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
368 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
369 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
370 # Find title, description, and author metadata |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
371 title_el = tree.find(".//title") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
372 desc_el = tree.find('.//meta[@name="description"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
373 author_el = tree.find('.//meta[@name="author"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
374 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
375 metadata = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
376 "title": title_el.text if title_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
377 "description": desc_el.get("content") if desc_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
378 "author_name": author_el.get("content") if author_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
379 "url": url, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
380 "provider_name": parse.urlparse(url).netloc, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
381 "provider_url": f"{parse.urlparse(url).scheme}://{parse.urlparse(url).netloc}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
382 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
383 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
384 return metadata if any(metadata.values()) else None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
385 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
386 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
387 f"Failed to fetch generic preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
388 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
389 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
390 # Wikipedia |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
391 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
392 async def fetch_wikipedia_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
393 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
394 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
395 """Fetch Wikipedia data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
396 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
397 This method implements the Wikipedia API, details of which can be found at: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
398 https://www.mediawiki.org/wiki/API:Main_page |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
399 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
400 @param url: The url to fetch the Wikipedia data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
401 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
402 @return: A dictionary containing the Wikipedia data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
403 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
404 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
405 parsed_url = parse.urlparse(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
406 page_name = parsed_url.path.split("/")[-1] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
407 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
408 # Use the Wikipedia API to get a summary of the page and a preview image |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
409 api_url = ( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
410 f"https://{parsed_url.netloc}/w/api.php?format=json&action=query&" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
411 f"prop=extracts|pageimages&exintro&explaintext&redirects=1&piprop=thumbnail" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
412 f"&pithumbsize=300&titles={page_name}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
413 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
414 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
415 resp = await treq.get(api_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
416 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
417 data = json.loads(await resp.text()) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
418 # The page ID is the first key in the "pages" dictionary |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
419 page_id = next(iter(data["query"]["pages"].keys())) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
420 page = data["query"]["pages"][page_id] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
421 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
422 # The API may return a page with a missing title or extract if the page does |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
423 # not exist |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
424 if "missing" in page: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
425 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
426 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
427 return { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
428 "provider_name": "Wikipedia", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
429 "provider_url": "https://www.wikipedia.org", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
430 "title": page.get("title"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
431 "description": page.get("extract"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
432 "url": url, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
433 "image": page.get("thumbnail", {}).get("source") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
434 if "thumbnail" in page |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
435 else None, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
436 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
437 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
438 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
439 f"Failed to fetch Wikipedia preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
440 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
441 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
442 # Invidious |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
443 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
444 async def fetch_invidious_data(self, client: SatXMPPEntity, url: str, options: dict) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
445 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
446 Fetch Invidious data from a url and generate HTML iframe. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
447 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
448 @param url: The url to fetch the Invidious data from. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
449 @param options: Additional options that may be used while fetching data. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
450 @return: A dictionary containing the Invidious data or None if no data could be fetched. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
451 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
452 parsed_url = parse.urlparse(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
453 if 'watch' in parsed_url.path: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
454 video_id = parse.parse_qs(parsed_url.query).get('v', [None])[0] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
455 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
456 video_id = parsed_url.path.strip('/') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
457 if not video_id: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
458 log.warning(f"Can't extract video ID from {url}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
459 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
460 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
461 invidious_api_url = f"https://{parsed_url.netloc}/api/v1/videos/{video_id}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
462 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
463 resp = await treq.get(invidious_api_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
464 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
465 video_data = await resp.json() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
466 # construct the iframe html code |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
467 html = ( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
468 f'<iframe' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
469 f' width="100%"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
470 f' height="auto"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
471 f' src="https://{parsed_url.netloc}/embed/{video_id}"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
472 f' frameborder="0" ' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
473 f' allow="' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
474 f' accelerometer;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
475 f' autoplay;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
476 f' clipboard-write;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
477 f' encrypted-media;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
478 f' gyroscope;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
479 f' picture-in-picture"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
480 f' style="' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
481 f' position: absolute;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
482 f' top: 0;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
483 f' left: 0;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
484 f' width: 100%;' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
485 f' height: 100%;"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
486 f' allowfullscreen></iframe>' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
487 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
488 # structure the data to be returned |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
489 data = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
490 "title": video_data.get("title"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
491 "description": video_data.get("description"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
492 "url": url, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
493 "image": video_data.get("videoThumbnails", [{}])[0].get("url"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
494 "provider_name": "Invidious", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
495 "provider_url": f"https://{parsed_url.netloc}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
496 "html": html, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
497 "author_name": video_data.get("author"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
498 "author_url": f"https://{parsed_url.netloc}/channel/{video_data.get('authorId')}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
499 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
500 return data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
501 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
502 log.warning(f"Unable to fetch video data from Invidious API for {video_id}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
503 return None |