diff sat/tools/common/regex.py @ 3501:85b8a899f407

tools (common/regex): move code to make user friendly URLs from web frontend
author Goffi <goffi@goffi.org>
date Fri, 16 Apr 2021 18:32:44 +0200
parents be6d91572633
children 524856bd7b19
line wrap: on
line diff
--- a/sat/tools/common/regex.py	Fri Apr 16 18:32:40 2021 +0200
+++ b/sat/tools/common/regex.py	Fri Apr 16 18:32:44 2021 +0200
@@ -20,12 +20,17 @@
 """ regex tools common to backend and frontends """
 
 import re
+import unicodedata
 
 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"}
 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()}
 path_escape = {re.escape(k): v for k, v in path_escape.items()}
 #  thanks to Martijn Pieters (https://stackoverflow.com/a/14693789)
-RE_ANSI_REMOVE = re.compile(r"\x1b[^m]*m")
+RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+')
+TEXT_MAX_LEN = 60
+# min lenght is currently deactivated
+TEXT_WORD_MIN_LENGHT = 0
 
 
 def reJoin(exps):
@@ -73,3 +78,18 @@
     @return (str, unicode): string without ANSI escape codes
     """
     return RE_ANSI_REMOVE.sub("", string)
+
+
+def urlFriendlyText(text):
+    """Convert text to url-friendly one"""
+    # we change special chars to ascii one,
+    # trick found at https://stackoverflow.com/a/3194567
+    text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
+    text = RE_TEXT_URL.sub(' ', text).lower()
+    text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
+    while len(text) > TEXT_MAX_LEN:
+        if '-' in text:
+            text = text.rsplit('-', 1)[0]
+        else:
+            text = text[:TEXT_MAX_LEN]
+    return text