Mercurial > libervia-backend
diff libervia/backend/tools/common/regex.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/tools/common/regex.py@524856bd7b19 |
children | 0d7bb4df2343 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libervia/backend/tools/common/regex.py Fri Jun 02 11:49:51 2023 +0200 @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + + +# Salut à Toi: an XMPP client +# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" regex tools common to backend and frontends """ + +import re +import unicodedata + +path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} +path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} +path_escape = {re.escape(k): v for k, v in path_escape.items()} +# thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) +RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') +RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+') +TEXT_MAX_LEN = 60 +# min lenght is currently deactivated +TEXT_WORD_MIN_LENGHT = 0 + + +def re_join(exps): + """Join (OR) various regexes""" + return re.compile("|".join(exps)) + + +def re_sub_dict(pattern, repl_dict, string): + """Replace key, value found in dict according to pattern + + @param pattern(basestr): pattern using keys found in repl_dict + @repl_dict(dict): keys found in this dict will be replaced by + corresponding values + @param string(basestr): string to use for the replacement + """ + return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) + + +path_escape_re = re_join(list(path_escape.keys())) +path_escape_rev_re = re_join(list(path_escape_rev.keys())) + + +def path_escape(string): + """Escape string so it can be use in a file path + + @param string(basestr): string to escape + @return (str, unicode): escaped string, usable in a file path + """ + return re_sub_dict(path_escape_re, path_escape, string) + + +def path_unescape(string): + """Unescape string from value found in file path + + @param string(basestr): string found in file path + @return (str, unicode): unescaped string + """ + return re_sub_dict(path_escape_rev_re, path_escape_rev, string) + + +def ansi_remove(string): + """Remove ANSI escape codes from string + + @param string(basestr): string to filter + @return (str, unicode): string without ANSI escape codes + """ + return RE_ANSI_REMOVE.sub("", string) + + +def url_friendly_text(text): + """Convert text to url-friendly one""" + # we change special chars to ascii one, + # trick found at https://stackoverflow.com/a/3194567 + text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8') + text = RE_TEXT_URL.sub(' ', text).lower() + text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) + while len(text) > TEXT_MAX_LEN: + if '-' in text: + text = text.rsplit('-', 1)[0] + else: + text = text[:TEXT_MAX_LEN] + return text