Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_misc_url_preview.py @ 4351:6a0a081485b8
plugin autocrypt: Autocrypt protocol implementation:
Implementation of autocrypt: `autocrypt` header is checked, and if present and no public
key is known for the peer, the key is imported.
`autocrypt` header is also added to outgoing message (only if an email gateway is
detected).
For the moment, the JID is use as identifier, but the real email used by gateway should be
used in the future.
rel 456
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 28 Feb 2025 09:23:35 +0100 |
parents | 0d7bb4df2343 |
children |
rev | line source |
---|---|
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # Libervia plugin to handle events |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
5 # Copyright (C) 2009-2022 Jérôme Poisson (goffi@goffi.org) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 from dataclasses import dataclass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 import json |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from textwrap import dedent |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
23 from typing import Callable, Dict, List, Optional, Union |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 from urllib import parse |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 import fnmatch |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from lxml import etree |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 import treq |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 from twisted.internet import defer |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 from libervia.backend.core.constants import Const as C |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 from libervia.backend.core.core_types import SatXMPPEntity |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 from libervia.backend.core.exceptions import ConflictError |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 from libervia.backend.core.i18n import _ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 from libervia.backend.core.log import getLogger |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 from libervia.backend.tools.common import data_format |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 from libervia.backend.tools.common.async_utils import async_lru |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
38 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 log = getLogger(__name__) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
41 PLUGIN_INFO = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 C.PI_NAME: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 C.PI_IMPORT_NAME: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 C.PI_TYPE: C.PLUG_TYPE_MISC, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 C.PI_PROTOCOLS: ["Open Graph", "oEmbed"], |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 C.PI_DEPENDENCIES: ["TEXT_SYNTAXES"], |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 C.PI_MAIN: "Preview", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 C.PI_HANDLER: "no", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 C.PI_DESCRIPTION: dedent( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 _( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 """\ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 Retrieves and provides a preview of URLs using various protocols. Initially, it |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
53 uses the Open Graph protocol for most web pages. Specialized handlers are |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 implemented for YouTube using the oEmbed protocol. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
56 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 ), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 OG_TAGS = [ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 "title", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 "type", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 "image", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 "url", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 "audio", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 "description", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 "determiner", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 "locale", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 "locale:alternate", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 "site_name", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 "video", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 ] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 class PreviewFetchError(Exception): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 pass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 @dataclass |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
80 class Protocol: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 name: str |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 callback: Callable |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 priority: int |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
86 class Preview: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
87 protocols: Dict[str, Protocol] = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 domain_protocols: Dict[str, str] = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
90 def __init__(self, host): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
91 log.info(_("Preview plugin initialization")) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
92 self.host = host |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
94 # generic protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 self.register("open_graph", self.fetch_open_graph_data, priority=100) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
97 self.register("oembed", self.fetch_generic_oembed_data, priority=50) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 self.register("generic", self.fetch_generic_data, priority=0) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 # domain specific protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
101 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
102 self.register("oembed-youtube", self.fetch_youtube_oembed_data, priority=-100) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 self.register_domain_protocol( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 ["www.youtube.com", "youtu.be", "m.youtube.com"], "oembed-youtube" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
105 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
106 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 self.register("wikipedia", self.fetch_wikipedia_data, priority=-80) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
108 self.register_domain_protocol(["*.wikipedia.org"], "wikipedia") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
109 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
110 self.register("invidious", self.fetch_invidious_data, priority=-90) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 self.register_domain_protocol( |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
112 ["yewtu.be", "www.yewtu.be", "invidious.fdn.fr"], "invidious" |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
113 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 # bridge methods |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 host.bridge.add_method( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 "url_preview_get", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 ".plugin", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 in_sign="sss", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 out_sign="s", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 method=self._url_preview_get, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 async_=True, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 # API |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
128 def _url_preview_get( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
129 self, url: str, options: str, profile_key: str |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
130 ) -> defer.Deferred: |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 client = self.host.get_client(profile_key) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 d = defer.ensureDeferred( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 self.get_preview_data(client, url, data_format.deserialise(options)) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
135 d.addCallback(data_format.serialise) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 return d |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
138 @async_lru() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
139 async def get_preview_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
141 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 """Fetch preview data from a url using registered protocols |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 @param url: The url to fetch the preview data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 @param options: Additional options that may be used while fetching preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 @return: A dictionary containing the preview data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
147 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 parsed_url = parse.urlparse(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 domain = parsed_url.netloc |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
151 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 preview_data: Optional[dict] = None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
153 matched_protocol = None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 for registered_domain, registered_protocol in self.domain_protocols.items(): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
155 if fnmatch.fnmatch(domain, registered_domain): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
156 matched_protocol = registered_protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
157 break |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
158 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
159 if matched_protocol is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
160 callback = self.protocols[matched_protocol].callback |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
161 preview_data = await callback(client, url, options) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
162 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
163 for name, protocol in sorted( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
164 self.protocols.items(), key=lambda item: item[1].priority, reverse=True |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
165 ): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
166 try: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
167 preview_data = await protocol.callback(client, url, options) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
168 except Exception as e: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
169 log.warning(f"Can't run protocol {name} for {url}: {e}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
170 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
171 if preview_data is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
172 matched_protocol = protocol.name |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 break |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
174 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
175 if preview_data is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
176 preview_data["protocol"] = matched_protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
177 # we don't clean html for youtube as we need Javascript to make it work, and |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
178 # for invidious as we generate it ourself |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
179 if "html" in preview_data: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
180 if matched_protocol in ("oembed-youtube", "invidious"): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
181 # this flag indicate that we know the source of HTML and we should be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
182 # able to trust it. This will add `allow-scripts` and |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
183 # `allow-same-origin` in the preview <iframe> "sandbox" attribute |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 preview_data["html_known"] = True |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
185 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
186 preview_data["html_known"] = False |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
187 clean_xhtml = self.host.plugins["TEXT_SYNTAXES"].clean_xhtml |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
188 try: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
189 preview_data["html"] = clean_xhtml(preview_data["html"]) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
190 except Exception as e: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
191 log.warning(f"Can't clean html data: {e}\n{preview_data}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
192 del preview_data["html"] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
194 return preview_data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
195 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
196 @classmethod |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
197 def register(cls, name: str, callback: Callable, priority: int = 0): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
198 """Register a protocol to retrieve preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
199 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
200 The registered callback should return a dictionary of preview data if available, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
201 or None otherwise. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
202 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
203 @param name: Unique name of the protocol |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
204 @param callback: Async callback function to fetch preview data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
205 @param priority: Priority of the protocol, with higher numbers indicating higher |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
206 priority |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 @return: None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 if name in cls.protocols: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 raise ConflictError(f"Protocol with the name {name} is already registered.") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
211 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 cls.protocols[name] = Protocol(name=name, callback=callback, priority=priority) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
213 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 @classmethod |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 def register_domain_protocol(cls, domains: Union[str, List[str]], protocol_name: str): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
216 """Register a protocol for a specific domain or list of domains |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 @param domains: The domain name or list of domain names |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 @param protocol_name: The name of the protocol to be associated with the domain(s) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 @return: None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 protocol_name = protocol_name.replace(" ", "").lower() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 if protocol_name not in cls.protocols: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
224 raise ConflictError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 f"Protocol with the name {protocol_name} is not registered." |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 if isinstance(domains, str): |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 domains = [domains] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 for domain in domains: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 domain = domain.strip() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 if not domain: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
234 log.warning("empty string used as domain, ignoring") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 continue |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 cls.domain_protocols[domain] = protocol_name |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 # Open Graph |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 async def fetch_open_graph_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
243 """Fetch Open Graph data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
245 This method implements the Open Graph protocol, details of which can be found at: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 http://ogp.me/ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 @param url: The url to fetch the Open Graph data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
250 @return: A dictionary containing the Open Graph data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
253 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
255 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
259 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 # Extract Open Graph data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
261 metadata = {} |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
262 for tag in OG_TAGS: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
263 og_el = tree.find('.//meta[@property="og:{tag}"]'.format(tag=tag)) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
264 if og_el is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
265 metadata[tag] = og_el.get("content") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
266 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
267 if metadata: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
268 if "site_name" in metadata and not "provider_name" in metadata: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
269 metadata["provider_name"] = metadata["site_name"] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
270 return metadata |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
271 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
272 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
273 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
274 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
275 f"Failed to fetch preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
276 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
278 # oEmbed |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
279 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
280 async def _fetch_oembed_data(self, oembed_url: str) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
281 """Fetch oEmbed data from a given oEmbed URL |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
282 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
283 @param oembed_url: The url to fetch the oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
284 @return: A dictionary containing the oEmbed data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
285 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
286 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
287 resp = await treq.get(oembed_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
288 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
289 return json.loads(await resp.text()) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
290 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
291 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
292 f"Failed to fetch oEmbed preview for {oembed_url}, status code: " |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
293 f"{resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
294 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
295 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
296 async def fetch_youtube_oembed_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
297 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
298 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
299 """Fetch YouTube oEmbed data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
300 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
301 @param url: The url to fetch the YouTube oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
302 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
303 @return: A dictionary containing the YouTube oEmbed data or None if no data could |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
304 be fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
305 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
306 oembed_url = f"https://www.youtube.com/oembed?url={parse.quote(url)}&format=json" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
307 data = await self._fetch_oembed_data(oembed_url) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
308 if data is not None and "html" in data: |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
309 html = data["html"] |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
310 root = etree.HTML(html) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
311 iframe_elt = root.xpath("//iframe") |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
312 if iframe_elt: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
313 iframe_elt[0].attrib[ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
314 "style" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
315 ] = "position: absolute; top: 0; left: 0; width: 100%; height: 100%;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
316 data["html"] = etree.tostring(root, method="html", encoding="unicode") |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
317 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
318 log.warning("No <iframe> found in the YouTube oEmbed response") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
319 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
320 return data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
321 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
322 async def fetch_generic_oembed_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
323 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
324 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
325 """Fetch generic oEmbed data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
326 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
327 @param url: The url to fetch the oEmbed data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
328 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
329 @return: A dictionary containing the oEmbed data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
330 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
331 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
332 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
333 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
334 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
335 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
336 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
338 # Find oEmbed URL |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
339 oembed_link = tree.find('.//link[@type="application/json+oembed"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 if oembed_link is not None: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
341 oembed_url = oembed_link.get("href") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
342 return await self._fetch_oembed_data(oembed_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
343 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
344 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
346 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 f"Failed to fetch preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
348 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
349 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 async def fetch_generic_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
351 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
352 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 """Fetch generic data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
354 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
355 This method attempts to extract the title, description, and author metadata from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
356 the HTML of the page. If these data cannot be found, the method will return None. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
358 @param url: The url to fetch the generic data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
359 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
360 @return: A dictionary containing the generic data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
361 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
362 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
363 resp = await treq.get(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
364 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
365 html = await resp.text() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
366 parser = etree.HTMLParser() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
367 tree = etree.fromstring(html, parser) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
368 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
369 # Find title, description, and author metadata |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
370 title_el = tree.find(".//title") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
371 desc_el = tree.find('.//meta[@name="description"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
372 author_el = tree.find('.//meta[@name="author"]') |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
373 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
374 metadata = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
375 "title": title_el.text if title_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
376 "description": desc_el.get("content") if desc_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
377 "author_name": author_el.get("content") if author_el is not None else "", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
378 "url": url, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
379 "provider_name": parse.urlparse(url).netloc, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
380 "provider_url": f"{parse.urlparse(url).scheme}://{parse.urlparse(url).netloc}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
381 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
382 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
383 return metadata if any(metadata.values()) else None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
384 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
385 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
386 f"Failed to fetch generic preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
387 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
388 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
389 # Wikipedia |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
390 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
391 async def fetch_wikipedia_data( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
392 self, client: SatXMPPEntity, url: str, options: dict |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
393 ) -> Optional[dict]: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
394 """Fetch Wikipedia data from a url |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
395 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
396 This method implements the Wikipedia API, details of which can be found at: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
397 https://www.mediawiki.org/wiki/API:Main_page |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
398 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
399 @param url: The url to fetch the Wikipedia data from |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
400 @param options: Additional options that may be used while fetching data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
401 @return: A dictionary containing the Wikipedia data or None if no data could be |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
402 fetched |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
403 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
404 parsed_url = parse.urlparse(url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
405 page_name = parsed_url.path.split("/")[-1] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
406 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
407 # Use the Wikipedia API to get a summary of the page and a preview image |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
408 api_url = ( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
409 f"https://{parsed_url.netloc}/w/api.php?format=json&action=query&" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
410 f"prop=extracts|pageimages&exintro&explaintext&redirects=1&piprop=thumbnail" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
411 f"&pithumbsize=300&titles={page_name}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
412 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
413 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
414 resp = await treq.get(api_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
415 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
416 data = json.loads(await resp.text()) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
417 # The page ID is the first key in the "pages" dictionary |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
418 page_id = next(iter(data["query"]["pages"].keys())) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
419 page = data["query"]["pages"][page_id] |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
420 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
421 # The API may return a page with a missing title or extract if the page does |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
422 # not exist |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
423 if "missing" in page: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
424 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
425 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
426 return { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
427 "provider_name": "Wikipedia", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
428 "provider_url": "https://www.wikipedia.org", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
429 "title": page.get("title"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
430 "description": page.get("extract"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
431 "url": url, |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
432 "image": ( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
433 page.get("thumbnail", {}).get("source") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
434 if "thumbnail" in page |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
435 else None |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
436 ), |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
437 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
438 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
439 raise PreviewFetchError( |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
440 f"Failed to fetch Wikipedia preview for {url}, status code: {resp.code}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
441 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
442 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
443 # Invidious |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
444 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
445 async def fetch_invidious_data( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
446 self, client: SatXMPPEntity, url: str, options: dict |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
447 ) -> Optional[dict]: |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
448 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
449 Fetch Invidious data from a url and generate HTML iframe. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
450 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
451 @param url: The url to fetch the Invidious data from. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
452 @param options: Additional options that may be used while fetching data. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
453 @return: A dictionary containing the Invidious data or None if no data could be fetched. |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
454 """ |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
455 parsed_url = parse.urlparse(url) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
456 if "watch" in parsed_url.path: |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
457 video_id = parse.parse_qs(parsed_url.query).get("v", [None])[0] |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
458 else: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
459 video_id = parsed_url.path.strip("/") |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
460 if not video_id: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
461 log.warning(f"Can't extract video ID from {url}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
462 return None |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
463 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
464 invidious_api_url = f"https://{parsed_url.netloc}/api/v1/videos/{video_id}" |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
465 |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
466 resp = await treq.get(invidious_api_url) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
467 if resp.code == 200: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
468 video_data = await resp.json() |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
469 # construct the iframe html code |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
470 html = ( |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
471 f"<iframe" |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
472 f' width="100%"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
473 f' height="auto"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
474 f' src="https://{parsed_url.netloc}/embed/{video_id}"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
475 f' frameborder="0" ' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
476 f' allow="' |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
477 f" accelerometer;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
478 f" autoplay;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
479 f" clipboard-write;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
480 f" encrypted-media;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
481 f" gyroscope;" |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
482 f' picture-in-picture"' |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
483 f' style="' |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
484 f" position: absolute;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
485 f" top: 0;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
486 f" left: 0;" |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
487 f" width: 100%;" |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
488 f' height: 100%;"' |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4103
diff
changeset
|
489 f" allowfullscreen></iframe>" |
4103
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
490 ) |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
491 # structure the data to be returned |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
492 data = { |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
493 "title": video_data.get("title"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
494 "description": video_data.get("description"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
495 "url": url, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
496 "image": video_data.get("videoThumbnails", [{}])[0].get("url"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
497 "provider_name": "Invidious", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
498 "provider_url": f"https://{parsed_url.netloc}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
499 "html": html, |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
500 "author_name": video_data.get("author"), |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
501 "author_url": f"https://{parsed_url.netloc}/channel/{video_data.get('authorId')}", |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
502 } |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
503 return data |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
504 else: |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
505 log.warning(f"Unable to fetch video data from Invidious API for {video_id}") |
eaa0daa7f834
plugin URL preview: URL preview first draft
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
506 return None |