comparison libervia/backend/tools/common/regex.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/tools/common/regex.py@524856bd7b19
children 0d7bb4df2343
comparison
equal deleted inserted replaced
4070:d10748475025 4071:4b842c1fb686
1 #!/usr/bin/env python3
2
3
4 # Salut à Toi: an XMPP client
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 """ regex tools common to backend and frontends """
21
22 import re
23 import unicodedata
24
25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"}
26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()}
27 path_escape = {re.escape(k): v for k, v in path_escape.items()}
28 #  thanks to Martijn Pieters (https://stackoverflow.com/a/14693789)
29 RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
30 RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+')
31 TEXT_MAX_LEN = 60
32 # min lenght is currently deactivated
33 TEXT_WORD_MIN_LENGHT = 0
34
35
36 def re_join(exps):
37 """Join (OR) various regexes"""
38 return re.compile("|".join(exps))
39
40
41 def re_sub_dict(pattern, repl_dict, string):
42 """Replace key, value found in dict according to pattern
43
44 @param pattern(basestr): pattern using keys found in repl_dict
45 @repl_dict(dict): keys found in this dict will be replaced by
46 corresponding values
47 @param string(basestr): string to use for the replacement
48 """
49 return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string)
50
51
52 path_escape_re = re_join(list(path_escape.keys()))
53 path_escape_rev_re = re_join(list(path_escape_rev.keys()))
54
55
56 def path_escape(string):
57 """Escape string so it can be use in a file path
58
59 @param string(basestr): string to escape
60 @return (str, unicode): escaped string, usable in a file path
61 """
62 return re_sub_dict(path_escape_re, path_escape, string)
63
64
65 def path_unescape(string):
66 """Unescape string from value found in file path
67
68 @param string(basestr): string found in file path
69 @return (str, unicode): unescaped string
70 """
71 return re_sub_dict(path_escape_rev_re, path_escape_rev, string)
72
73
74 def ansi_remove(string):
75 """Remove ANSI escape codes from string
76
77 @param string(basestr): string to filter
78 @return (str, unicode): string without ANSI escape codes
79 """
80 return RE_ANSI_REMOVE.sub("", string)
81
82
83 def url_friendly_text(text):
84 """Convert text to url-friendly one"""
85 # we change special chars to ascii one,
86 # trick found at https://stackoverflow.com/a/3194567
87 text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
88 text = RE_TEXT_URL.sub(' ', text).lower()
89 text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
90 while len(text) > TEXT_MAX_LEN:
91 if '-' in text:
92 text = text.rsplit('-', 1)[0]
93 else:
94 text = text[:TEXT_MAX_LEN]
95 return text