comparison libervia/backend/plugins/plugin_misc_jid_search.py @ 4108:238e305f2306

plugin JID Search: JID search plugin, first draft
author Goffi <goffi@goffi.org>
date Thu, 06 Jul 2023 11:34:51 +0200
parents
children 0d7bb4df2343
comparison
equal deleted inserted replaced
4107:bc7d45dedeb0 4108:238e305f2306
1 #!/usr/bin/env python3
2
3 # Libervia plugin to handle XMPP entities search
4 # Copyright (C) 2009-2023 Jérôme Poisson (goffi@goffi.org)
5
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Affero General Public License for more details.
15
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 from collections import OrderedDict
20 from dataclasses import dataclass, asdict
21 import difflib
22 from typing import List, Optional
23
24 from twisted.internet import defer
25 from twisted.words.protocols.jabber import jid
26
27 from libervia.backend.core.constants import Const as C
28 from libervia.backend.core.core_types import SatXMPPEntity
29 from libervia.backend.core.i18n import _
30 from libervia.backend.core.log import getLogger
31 from libervia.backend.tools.common import data_format
32
33 log = getLogger(__name__)
34
35
36 PLUGIN_INFO = {
37 C.PI_NAME: "JID Search",
38 C.PI_IMPORT_NAME: "JID_SEARCH",
39 C.PI_TYPE: C.PLUG_TYPE_MISC,
40 C.PI_MODES: C.PLUG_MODE_BOTH,
41 C.PI_PROTOCOLS: [],
42 C.PI_DEPENDENCIES: [],
43 C.PI_RECOMMENDATIONS: [],
44 C.PI_MAIN: "JidSearch",
45 C.PI_HANDLER: "no",
46 C.PI_DESCRIPTION: _("""Search for XMPP entities"""),
47 }
48 RATIO_CUTOFF = 0.6
49 MAX_CACHE_SIZE = 10
50
51
52 @dataclass
53 class JidSearchItem:
54 entity: jid.JID
55 name: str = ""
56 in_roster: bool = False
57 groups: list[str] | None = None
58 exact_match: bool = False
59 relevance: float | None = None
60
61
62 JidSearchCache = OrderedDict[str, list[JidSearchItem]]
63
64
65 class JidSearch:
66 def __init__(self, host) -> None:
67 log.info(f"plugin {PLUGIN_INFO[C.PI_NAME]!r} initialization")
68 self.host = host
69 host.bridge.add_method(
70 "jid_search",
71 ".plugin",
72 in_sign="sss",
73 out_sign="s",
74 method=self._search,
75 async_=True,
76 )
77
78 def profile_connecting(self, client: SatXMPPEntity) -> None:
79 client._jid_search_cache = JidSearchCache()
80
81 def _search(self, search_term: str, options_s: str, profile: str) -> defer.Deferred:
82 client = self.host.get_client(profile)
83 d = defer.ensureDeferred(
84 self.search(client, search_term, data_format.deserialise(options_s))
85 )
86 d.addCallback(
87 lambda search_items: data_format.serialise([asdict(i) for i in search_items])
88 )
89 return d
90
91 async def search(
92 self, client: SatXMPPEntity, search_term: str, options: Optional[dict] = None
93 ) -> List[JidSearchItem]:
94 """Searches for entities in various locations.
95
96 @param client: The SatXMPPEntity client where the search is to be performed.
97 @param search_term: The query to be searched.
98 @param options: Additional search options.
99 @return: A list of matches found.
100 """
101 search_term = search_term.strip().lower()
102 sequence_matcher = difflib.SequenceMatcher()
103 sequence_matcher.set_seq1(search_term)
104 # FIXME: cache can give different results due to the filtering mechanism (if a
105 # cached search term match the beginning of current search term, its results a
106 # re-used and filtered, and sometimes items can be missing in compraison to the
107 # results without caching). This may need to be fixed.
108 cache: JidSearchCache = client._jid_search_cache
109
110 # Look for a match in the cache
111 for cache_key in cache:
112 if search_term.startswith(cache_key):
113 log.debug(
114 f"Match found in cache for {search_term!r} in [{client.profile}]."
115 )
116 # If an exact match is found, return the results as is
117 if search_term == cache_key:
118 log.debug("Exact match found in cache, reusing results.")
119 matches = cache[cache_key]
120 else:
121 # If only the beginning matches, filter the cache results
122 log.debug("Prefix match found in cache, filtering results.")
123 matches = []
124 for jid_search_item in cache[cache_key]:
125 self._process_matching(
126 search_term, sequence_matcher, matches, jid_search_item
127 )
128 cache.move_to_end(cache_key)
129 break
130 else:
131 # If no match is found in the cache, perform a new search
132 matches = await self._perform_search(client, search_term, sequence_matcher)
133 cache[search_term] = matches
134 if len(cache) > MAX_CACHE_SIZE:
135 cache.popitem(last=False)
136
137 # If no exact match is found, but the search term is a valid JID, we add the JID
138 # as a result
139 exact_match = any(m.exact_match for m in matches)
140 if not exact_match and "@" in search_term:
141 try:
142 search_jid = jid.JID(search_term)
143 except jid.InvalidFormat:
144 pass
145 else:
146 matches.append(
147 JidSearchItem(
148 entity=search_jid,
149 in_roster=False,
150 exact_match=True,
151 relevance=1,
152 )
153 )
154
155
156 matches.sort(
157 key=lambda item: (item.exact_match, item.relevance or 0, item.in_roster),
158 reverse=True,
159 )
160
161 return matches
162
163 def _process_matching(
164 self,
165 search_term: str,
166 sequence_matcher: difflib.SequenceMatcher,
167 matches: List[JidSearchItem],
168 item: JidSearchItem,
169 ) -> None:
170 """Process matching of items
171
172 @param sequence_matcher: The sequence matcher to be used for the matching process.
173 @param matches: A list where the match is to be appended.
174 @param item: The item that to be matched.
175 @return: True if it was an exact match
176 """
177
178 item_name_lower = item.name.lower()
179 item_entity_lower = item.entity.full().lower()
180
181 if search_term in (item_name_lower, item_entity_lower):
182 item.exact_match = True
183 item.relevance = 1
184 matches.append(item)
185 return
186
187 item.exact_match = False
188
189 sequence_matcher.set_seq2(item_name_lower)
190 name_ratio = sequence_matcher.ratio()
191 if name_ratio >= RATIO_CUTOFF:
192 item.relevance = name_ratio
193 matches.append(item)
194 return
195
196 sequence_matcher.set_seq2(item_entity_lower)
197 jid_ratio = sequence_matcher.ratio()
198 if jid_ratio >= RATIO_CUTOFF:
199 item.relevance = jid_ratio
200 matches.append(item)
201 return
202
203 localpart = item.entity.user.lower() if item.entity.user else ""
204 if localpart:
205 sequence_matcher.set_seq2(localpart)
206 domain_ratio = sequence_matcher.ratio()
207 if domain_ratio >= RATIO_CUTOFF:
208 item.relevance = domain_ratio
209 matches.append(item)
210 return
211
212 if item.groups:
213 group_ratios = []
214 for group in item.groups:
215 sequence_matcher.set_seq2(group.lower())
216 group_ratios.append(sequence_matcher.ratio())
217 group_ratio = max(group_ratios)
218 if group_ratio >= RATIO_CUTOFF:
219 item.relevance = group_ratio
220 matches.append(item)
221 return
222
223 domain = item.entity.host.lower()
224 sequence_matcher.set_seq2(domain)
225 domain_ratio = sequence_matcher.ratio()
226 if domain_ratio >= RATIO_CUTOFF:
227 item.relevance = domain_ratio
228 matches.append(item)
229 return
230
231 async def _perform_search(
232 self,
233 client: SatXMPPEntity,
234 search_term: str,
235 sequence_matcher: difflib.SequenceMatcher,
236 ) -> List[JidSearchItem]:
237 """Performs a new search when no match is found in the cache.
238
239 @param search_term: The query to be searched.
240 @param sequence_matcher: The SequenceMatcher object to be used for matching.
241 @return: A list of matches found.
242 """
243 matches = []
244
245 try:
246 roster = client.roster
247 except AttributeError:
248 # components have no roster
249 roster = []
250 else:
251 roster = client.roster.get_items()
252
253 for roster_item in roster:
254 jid_search_item = JidSearchItem(
255 entity=roster_item.entity,
256 name=roster_item.name,
257 in_roster=True,
258 groups=list(roster_item.groups),
259 )
260
261 self._process_matching(
262 search_term, sequence_matcher, matches, jid_search_item
263 )
264
265 return matches