Mercurial > libervia-backend
comparison libervia/backend/plugins/plugin_misc_jid_search.py @ 4108:238e305f2306
plugin JID Search: JID search plugin, first draft
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 06 Jul 2023 11:34:51 +0200 |
parents | |
children | 0d7bb4df2343 |
comparison
equal
deleted
inserted
replaced
4107:bc7d45dedeb0 | 4108:238e305f2306 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 # Libervia plugin to handle XMPP entities search | |
4 # Copyright (C) 2009-2023 Jérôme Poisson (goffi@goffi.org) | |
5 | |
6 # This program is free software: you can redistribute it and/or modify | |
7 # it under the terms of the GNU Affero General Public License as published by | |
8 # the Free Software Foundation, either version 3 of the License, or | |
9 # (at your option) any later version. | |
10 | |
11 # This program is distributed in the hope that it will be useful, | |
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 # GNU Affero General Public License for more details. | |
15 | |
16 # You should have received a copy of the GNU Affero General Public License | |
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | |
19 from collections import OrderedDict | |
20 from dataclasses import dataclass, asdict | |
21 import difflib | |
22 from typing import List, Optional | |
23 | |
24 from twisted.internet import defer | |
25 from twisted.words.protocols.jabber import jid | |
26 | |
27 from libervia.backend.core.constants import Const as C | |
28 from libervia.backend.core.core_types import SatXMPPEntity | |
29 from libervia.backend.core.i18n import _ | |
30 from libervia.backend.core.log import getLogger | |
31 from libervia.backend.tools.common import data_format | |
32 | |
33 log = getLogger(__name__) | |
34 | |
35 | |
36 PLUGIN_INFO = { | |
37 C.PI_NAME: "JID Search", | |
38 C.PI_IMPORT_NAME: "JID_SEARCH", | |
39 C.PI_TYPE: C.PLUG_TYPE_MISC, | |
40 C.PI_MODES: C.PLUG_MODE_BOTH, | |
41 C.PI_PROTOCOLS: [], | |
42 C.PI_DEPENDENCIES: [], | |
43 C.PI_RECOMMENDATIONS: [], | |
44 C.PI_MAIN: "JidSearch", | |
45 C.PI_HANDLER: "no", | |
46 C.PI_DESCRIPTION: _("""Search for XMPP entities"""), | |
47 } | |
48 RATIO_CUTOFF = 0.6 | |
49 MAX_CACHE_SIZE = 10 | |
50 | |
51 | |
52 @dataclass | |
53 class JidSearchItem: | |
54 entity: jid.JID | |
55 name: str = "" | |
56 in_roster: bool = False | |
57 groups: list[str] | None = None | |
58 exact_match: bool = False | |
59 relevance: float | None = None | |
60 | |
61 | |
62 JidSearchCache = OrderedDict[str, list[JidSearchItem]] | |
63 | |
64 | |
65 class JidSearch: | |
66 def __init__(self, host) -> None: | |
67 log.info(f"plugin {PLUGIN_INFO[C.PI_NAME]!r} initialization") | |
68 self.host = host | |
69 host.bridge.add_method( | |
70 "jid_search", | |
71 ".plugin", | |
72 in_sign="sss", | |
73 out_sign="s", | |
74 method=self._search, | |
75 async_=True, | |
76 ) | |
77 | |
78 def profile_connecting(self, client: SatXMPPEntity) -> None: | |
79 client._jid_search_cache = JidSearchCache() | |
80 | |
81 def _search(self, search_term: str, options_s: str, profile: str) -> defer.Deferred: | |
82 client = self.host.get_client(profile) | |
83 d = defer.ensureDeferred( | |
84 self.search(client, search_term, data_format.deserialise(options_s)) | |
85 ) | |
86 d.addCallback( | |
87 lambda search_items: data_format.serialise([asdict(i) for i in search_items]) | |
88 ) | |
89 return d | |
90 | |
91 async def search( | |
92 self, client: SatXMPPEntity, search_term: str, options: Optional[dict] = None | |
93 ) -> List[JidSearchItem]: | |
94 """Searches for entities in various locations. | |
95 | |
96 @param client: The SatXMPPEntity client where the search is to be performed. | |
97 @param search_term: The query to be searched. | |
98 @param options: Additional search options. | |
99 @return: A list of matches found. | |
100 """ | |
101 search_term = search_term.strip().lower() | |
102 sequence_matcher = difflib.SequenceMatcher() | |
103 sequence_matcher.set_seq1(search_term) | |
104 # FIXME: cache can give different results due to the filtering mechanism (if a | |
105 # cached search term match the beginning of current search term, its results a | |
106 # re-used and filtered, and sometimes items can be missing in compraison to the | |
107 # results without caching). This may need to be fixed. | |
108 cache: JidSearchCache = client._jid_search_cache | |
109 | |
110 # Look for a match in the cache | |
111 for cache_key in cache: | |
112 if search_term.startswith(cache_key): | |
113 log.debug( | |
114 f"Match found in cache for {search_term!r} in [{client.profile}]." | |
115 ) | |
116 # If an exact match is found, return the results as is | |
117 if search_term == cache_key: | |
118 log.debug("Exact match found in cache, reusing results.") | |
119 matches = cache[cache_key] | |
120 else: | |
121 # If only the beginning matches, filter the cache results | |
122 log.debug("Prefix match found in cache, filtering results.") | |
123 matches = [] | |
124 for jid_search_item in cache[cache_key]: | |
125 self._process_matching( | |
126 search_term, sequence_matcher, matches, jid_search_item | |
127 ) | |
128 cache.move_to_end(cache_key) | |
129 break | |
130 else: | |
131 # If no match is found in the cache, perform a new search | |
132 matches = await self._perform_search(client, search_term, sequence_matcher) | |
133 cache[search_term] = matches | |
134 if len(cache) > MAX_CACHE_SIZE: | |
135 cache.popitem(last=False) | |
136 | |
137 # If no exact match is found, but the search term is a valid JID, we add the JID | |
138 # as a result | |
139 exact_match = any(m.exact_match for m in matches) | |
140 if not exact_match and "@" in search_term: | |
141 try: | |
142 search_jid = jid.JID(search_term) | |
143 except jid.InvalidFormat: | |
144 pass | |
145 else: | |
146 matches.append( | |
147 JidSearchItem( | |
148 entity=search_jid, | |
149 in_roster=False, | |
150 exact_match=True, | |
151 relevance=1, | |
152 ) | |
153 ) | |
154 | |
155 | |
156 matches.sort( | |
157 key=lambda item: (item.exact_match, item.relevance or 0, item.in_roster), | |
158 reverse=True, | |
159 ) | |
160 | |
161 return matches | |
162 | |
163 def _process_matching( | |
164 self, | |
165 search_term: str, | |
166 sequence_matcher: difflib.SequenceMatcher, | |
167 matches: List[JidSearchItem], | |
168 item: JidSearchItem, | |
169 ) -> None: | |
170 """Process matching of items | |
171 | |
172 @param sequence_matcher: The sequence matcher to be used for the matching process. | |
173 @param matches: A list where the match is to be appended. | |
174 @param item: The item that to be matched. | |
175 @return: True if it was an exact match | |
176 """ | |
177 | |
178 item_name_lower = item.name.lower() | |
179 item_entity_lower = item.entity.full().lower() | |
180 | |
181 if search_term in (item_name_lower, item_entity_lower): | |
182 item.exact_match = True | |
183 item.relevance = 1 | |
184 matches.append(item) | |
185 return | |
186 | |
187 item.exact_match = False | |
188 | |
189 sequence_matcher.set_seq2(item_name_lower) | |
190 name_ratio = sequence_matcher.ratio() | |
191 if name_ratio >= RATIO_CUTOFF: | |
192 item.relevance = name_ratio | |
193 matches.append(item) | |
194 return | |
195 | |
196 sequence_matcher.set_seq2(item_entity_lower) | |
197 jid_ratio = sequence_matcher.ratio() | |
198 if jid_ratio >= RATIO_CUTOFF: | |
199 item.relevance = jid_ratio | |
200 matches.append(item) | |
201 return | |
202 | |
203 localpart = item.entity.user.lower() if item.entity.user else "" | |
204 if localpart: | |
205 sequence_matcher.set_seq2(localpart) | |
206 domain_ratio = sequence_matcher.ratio() | |
207 if domain_ratio >= RATIO_CUTOFF: | |
208 item.relevance = domain_ratio | |
209 matches.append(item) | |
210 return | |
211 | |
212 if item.groups: | |
213 group_ratios = [] | |
214 for group in item.groups: | |
215 sequence_matcher.set_seq2(group.lower()) | |
216 group_ratios.append(sequence_matcher.ratio()) | |
217 group_ratio = max(group_ratios) | |
218 if group_ratio >= RATIO_CUTOFF: | |
219 item.relevance = group_ratio | |
220 matches.append(item) | |
221 return | |
222 | |
223 domain = item.entity.host.lower() | |
224 sequence_matcher.set_seq2(domain) | |
225 domain_ratio = sequence_matcher.ratio() | |
226 if domain_ratio >= RATIO_CUTOFF: | |
227 item.relevance = domain_ratio | |
228 matches.append(item) | |
229 return | |
230 | |
231 async def _perform_search( | |
232 self, | |
233 client: SatXMPPEntity, | |
234 search_term: str, | |
235 sequence_matcher: difflib.SequenceMatcher, | |
236 ) -> List[JidSearchItem]: | |
237 """Performs a new search when no match is found in the cache. | |
238 | |
239 @param search_term: The query to be searched. | |
240 @param sequence_matcher: The SequenceMatcher object to be used for matching. | |
241 @return: A list of matches found. | |
242 """ | |
243 matches = [] | |
244 | |
245 try: | |
246 roster = client.roster | |
247 except AttributeError: | |
248 # components have no roster | |
249 roster = [] | |
250 else: | |
251 roster = client.roster.get_items() | |
252 | |
253 for roster_item in roster: | |
254 jid_search_item = JidSearchItem( | |
255 entity=roster_item.entity, | |
256 name=roster_item.name, | |
257 in_roster=True, | |
258 groups=list(roster_item.groups), | |
259 ) | |
260 | |
261 self._process_matching( | |
262 search_term, sequence_matcher, matches, jid_search_item | |
263 ) | |
264 | |
265 return matches |