# HG changeset patch # User Goffi # Date 1618590764 -7200 # Node ID 85b8a899f407e96ade5818342475b5fe36ce2dd3 # Parent 73b8a8d938be97eedb25ad9841fe7ad7ecdc53d1 tools (common/regex): move code to make user friendly URLs from web frontend diff -r 73b8a8d938be -r 85b8a899f407 sat/tools/common/regex.py --- a/sat/tools/common/regex.py Fri Apr 16 18:32:40 2021 +0200 +++ b/sat/tools/common/regex.py Fri Apr 16 18:32:44 2021 +0200 @@ -20,12 +20,17 @@ """ regex tools common to backend and frontends """ import re +import unicodedata path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} path_escape = {re.escape(k): v for k, v in path_escape.items()} #  thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) -RE_ANSI_REMOVE = re.compile(r"\x1b[^m]*m") +RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') +RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+') +TEXT_MAX_LEN = 60 +# min lenght is currently deactivated +TEXT_WORD_MIN_LENGHT = 0 def reJoin(exps): @@ -73,3 +78,18 @@ @return (str, unicode): string without ANSI escape codes """ return RE_ANSI_REMOVE.sub("", string) + + +def urlFriendlyText(text): + """Convert text to url-friendly one""" + # we change special chars to ascii one, + # trick found at https://stackoverflow.com/a/3194567 + text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8') + text = RE_TEXT_URL.sub(' ', text).lower() + text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) + while len(text) > TEXT_MAX_LEN: + if '-' in text: + text = text.rsplit('-', 1)[0] + else: + text = text[:TEXT_MAX_LEN] + return text