Mercurial > libervia-backend
comparison libervia/backend/tools/common/regex.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/tools/common/regex.py@524856bd7b19 |
children | 0d7bb4df2343 |
comparison
equal
deleted
inserted
replaced
4070:d10748475025 | 4071:4b842c1fb686 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 | |
4 # Salut à Toi: an XMPP client | |
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 """ regex tools common to backend and frontends """ | |
21 | |
22 import re | |
23 import unicodedata | |
24 | |
25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} | |
26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} | |
27 path_escape = {re.escape(k): v for k, v in path_escape.items()} | |
28 # thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) | |
29 RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') | |
30 RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+') | |
31 TEXT_MAX_LEN = 60 | |
32 # min lenght is currently deactivated | |
33 TEXT_WORD_MIN_LENGHT = 0 | |
34 | |
35 | |
36 def re_join(exps): | |
37 """Join (OR) various regexes""" | |
38 return re.compile("|".join(exps)) | |
39 | |
40 | |
41 def re_sub_dict(pattern, repl_dict, string): | |
42 """Replace key, value found in dict according to pattern | |
43 | |
44 @param pattern(basestr): pattern using keys found in repl_dict | |
45 @repl_dict(dict): keys found in this dict will be replaced by | |
46 corresponding values | |
47 @param string(basestr): string to use for the replacement | |
48 """ | |
49 return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) | |
50 | |
51 | |
52 path_escape_re = re_join(list(path_escape.keys())) | |
53 path_escape_rev_re = re_join(list(path_escape_rev.keys())) | |
54 | |
55 | |
56 def path_escape(string): | |
57 """Escape string so it can be use in a file path | |
58 | |
59 @param string(basestr): string to escape | |
60 @return (str, unicode): escaped string, usable in a file path | |
61 """ | |
62 return re_sub_dict(path_escape_re, path_escape, string) | |
63 | |
64 | |
65 def path_unescape(string): | |
66 """Unescape string from value found in file path | |
67 | |
68 @param string(basestr): string found in file path | |
69 @return (str, unicode): unescaped string | |
70 """ | |
71 return re_sub_dict(path_escape_rev_re, path_escape_rev, string) | |
72 | |
73 | |
74 def ansi_remove(string): | |
75 """Remove ANSI escape codes from string | |
76 | |
77 @param string(basestr): string to filter | |
78 @return (str, unicode): string without ANSI escape codes | |
79 """ | |
80 return RE_ANSI_REMOVE.sub("", string) | |
81 | |
82 | |
83 def url_friendly_text(text): | |
84 """Convert text to url-friendly one""" | |
85 # we change special chars to ascii one, | |
86 # trick found at https://stackoverflow.com/a/3194567 | |
87 text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8') | |
88 text = RE_TEXT_URL.sub(' ', text).lower() | |
89 text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) | |
90 while len(text) > TEXT_MAX_LEN: | |
91 if '-' in text: | |
92 text = text.rsplit('-', 1)[0] | |
93 else: | |
94 text = text[:TEXT_MAX_LEN] | |
95 return text |