comparison sat/tools/common/regex.py @ 3501:85b8a899f407

tools (common/regex): move code to make user friendly URLs from web frontend
author Goffi <goffi@goffi.org>
date Fri, 16 Apr 2021 18:32:44 +0200
parents be6d91572633
children 524856bd7b19
comparison
equal deleted inserted replaced
3500:73b8a8d938be 3501:85b8a899f407
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 """ regex tools common to backend and frontends """ 20 """ regex tools common to backend and frontends """
21 21
22 import re 22 import re
23 import unicodedata
23 24
24 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} 25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"}
25 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} 26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()}
26 path_escape = {re.escape(k): v for k, v in path_escape.items()} 27 path_escape = {re.escape(k): v for k, v in path_escape.items()}
27 #  thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) 28 #  thanks to Martijn Pieters (https://stackoverflow.com/a/14693789)
28 RE_ANSI_REMOVE = re.compile(r"\x1b[^m]*m") 29 RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
30 RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+')
31 TEXT_MAX_LEN = 60
32 # min lenght is currently deactivated
33 TEXT_WORD_MIN_LENGHT = 0
29 34
30 35
31 def reJoin(exps): 36 def reJoin(exps):
32 """Join (OR) various regexes""" 37 """Join (OR) various regexes"""
33 return re.compile("|".join(exps)) 38 return re.compile("|".join(exps))
71 76
72 @param string(basestr): string to filter 77 @param string(basestr): string to filter
73 @return (str, unicode): string without ANSI escape codes 78 @return (str, unicode): string without ANSI escape codes
74 """ 79 """
75 return RE_ANSI_REMOVE.sub("", string) 80 return RE_ANSI_REMOVE.sub("", string)
81
82
83 def urlFriendlyText(text):
84 """Convert text to url-friendly one"""
85 # we change special chars to ascii one,
86 # trick found at https://stackoverflow.com/a/3194567
87 text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
88 text = RE_TEXT_URL.sub(' ', text).lower()
89 text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
90 while len(text) > TEXT_MAX_LEN:
91 if '-' in text:
92 text = text.rsplit('-', 1)[0]
93 else:
94 text = text[:TEXT_MAX_LEN]
95 return text