Mercurial > libervia-backend
annotate libervia/backend/tools/common/regex.py @ 4318:27bb22eace65
tests (unit/email gateway): add test for XEP-0131 handling:
rel 451
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 28 Sep 2024 15:59:48 +0200 |
parents | 0d7bb4df2343 |
children |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # Salut à Toi: an XMPP client |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 """ regex tools common to backend and frontends """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 import re |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 import unicodedata |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} |
3028 | 26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} |
27 path_escape = {re.escape(k): v for k, v in path_escape.items()} | |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
28 # thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
29 RE_ANSI_REMOVE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
30 RE_TEXT_URL = re.compile(r"[^a-zA-Z0-9,_]+") |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
31 TEXT_MAX_LEN = 60 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
32 # min lenght is currently deactivated |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
33 TEXT_WORD_MIN_LENGHT = 0 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
36 def re_join(exps): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 """Join (OR) various regexes""" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
38 return re.compile("|".join(exps)) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
41 def re_sub_dict(pattern, repl_dict, string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 """Replace key, value found in dict according to pattern |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 @param pattern(basestr): pattern using keys found in repl_dict |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 @repl_dict(dict): keys found in this dict will be replaced by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 corresponding values |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 @param string(basestr): string to use for the replacement |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
51 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
52 path_escape_re = re_join(list(path_escape.keys())) |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
53 path_escape_rev_re = re_join(list(path_escape_rev.keys())) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
56 def path_escape(string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 """Escape string so it can be use in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 @param string(basestr): string to escape |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 @return (str, unicode): escaped string, usable in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
62 return re_sub_dict(path_escape_re, path_escape, string) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
65 def path_unescape(string): |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 """Unescape string from value found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 @param string(basestr): string found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 @return (str, unicode): unescaped string |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
71 return re_sub_dict(path_escape_rev_re, path_escape_rev, string) |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
72 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
74 def ansi_remove(string): |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
75 """Remove ANSI escape codes from string |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
76 |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
77 @param string(basestr): string to filter |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
78 @return (str, unicode): string without ANSI escape codes |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
79 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 return RE_ANSI_REMOVE.sub("", string) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
81 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
82 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3501
diff
changeset
|
83 def url_friendly_text(text): |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
84 """Convert text to url-friendly one""" |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
85 # we change special chars to ascii one, |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
86 # trick found at https://stackoverflow.com/a/3194567 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
87 text = unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("utf-8") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
88 text = RE_TEXT_URL.sub(" ", text).lower() |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
89 text = "-".join([t for t in text.split() if t and len(t) >= TEXT_WORD_MIN_LENGHT]) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
90 while len(text) > TEXT_MAX_LEN: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
91 if "-" in text: |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
92 text = text.rsplit("-", 1)[0] |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
93 else: |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
94 text = text[:TEXT_MAX_LEN] |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
95 return text |