Mercurial > libervia-backend
annotate libervia/backend/tools/common/regex.py @ 4306:94e0968987cd
plugin XEP-0033: code modernisation, improve delivery, data validation:
- Code has been rewritten using Pydantic models and `async` coroutines for data validation
and cleaner element parsing/generation.
- Delivery has been completely rewritten. It now works even if server doesn't support
multicast, and send to local multicast service first. Delivering to local multicast
service first is due to bad support of XEP-0033 in server (notably Prosody which has an
incomplete implementation), and the current impossibility to detect if a sub-domain
service handles fully multicast or only for local domains. This is a workaround to have
a good balance between backward compatilibity and use of bandwith, and to make it work
with the incoming email gateway implementation (the gateway will only deliver to
entities of its own domain).
- disco feature checking now uses `async` corountines. `host` implementation still use
Deferred return values for compatibility with legacy code.
rel 450
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 26 Sep 2024 16:12:01 +0200 |
parents | 0d7bb4df2343 |
children |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # Salut à Toi: an XMPP client |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 """ regex tools common to backend and frontends """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 import re |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 import unicodedata |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} |
3028 | 26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} |
27 path_escape = {re.escape(k): v for k, v in path_escape.items()} | |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
28 # thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
29 RE_ANSI_REMOVE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
30 RE_TEXT_URL = re.compile(r"[^a-zA-Z0-9,_]+") |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
31 TEXT_MAX_LEN = 60 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
32 # min lenght is currently deactivated |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
33 TEXT_WORD_MIN_LENGHT = 0 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
36 def re_join(exps): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 """Join (OR) various regexes""" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
38 return re.compile("|".join(exps)) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
41 def re_sub_dict(pattern, repl_dict, string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 """Replace key, value found in dict according to pattern |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 @param pattern(basestr): pattern using keys found in repl_dict |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 @repl_dict(dict): keys found in this dict will be replaced by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 corresponding values |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 @param string(basestr): string to use for the replacement |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
51 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
52 path_escape_re = re_join(list(path_escape.keys())) |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
53 path_escape_rev_re = re_join(list(path_escape_rev.keys())) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
56 def path_escape(string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 """Escape string so it can be use in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 @param string(basestr): string to escape |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 @return (str, unicode): escaped string, usable in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
62 return re_sub_dict(path_escape_re, path_escape, string) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
65 def path_unescape(string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 """Unescape string from value found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 @param string(basestr): string found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 @return (str, unicode): unescaped string |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
71 return re_sub_dict(path_escape_rev_re, path_escape_rev, string) |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
72 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
74 def ansi_remove(string): |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
75 """Remove ANSI escape codes from string |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
76 |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
77 @param string(basestr): string to filter |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
78 @return (str, unicode): string without ANSI escape codes |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
79 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 return RE_ANSI_REMOVE.sub("", string) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
81 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
82 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
83 def url_friendly_text(text): |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
84 """Convert text to url-friendly one""" |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
85 # we change special chars to ascii one, |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
86 # trick found at https://stackoverflow.com/a/3194567 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
87 text = unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("utf-8") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
88 text = RE_TEXT_URL.sub(" ", text).lower() |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
89 text = "-".join([t for t in text.split() if t and len(t) >= TEXT_WORD_MIN_LENGHT]) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
90 while len(text) > TEXT_MAX_LEN: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
91 if "-" in text: |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
92 text = text.rsplit("-", 1)[0] |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
93 else: |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
94 text = text[:TEXT_MAX_LEN] |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
95 return text |