Mercurial > libervia-backend
view libervia/backend/tools/common/regex.py @ 4314:6a70fcd93a7a
plugin XEP-0131: Stanza Headers and Internet Metadata implementation:
- SHIM is now supported and put in `msg_data["extra"]["headers"]`.
- `Keywords` are converted from and to list of string in `msg_data["extra"]["keywords"]`
field (if present in headers on message sending, values are merged).
- Python minimal version upgraded to 3.11 due to use of `StrEnum`.
rel 451
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 28 Sep 2024 15:56:04 +0200 |
parents | 0d7bb4df2343 |
children |
line wrap: on
line source
#!/usr/bin/env python3 # Salut à Toi: an XMPP client # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ regex tools common to backend and frontends """ import re import unicodedata path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} path_escape = {re.escape(k): v for k, v in path_escape.items()} # thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) RE_ANSI_REMOVE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") RE_TEXT_URL = re.compile(r"[^a-zA-Z0-9,_]+") TEXT_MAX_LEN = 60 # min lenght is currently deactivated TEXT_WORD_MIN_LENGHT = 0 def re_join(exps): """Join (OR) various regexes""" return re.compile("|".join(exps)) def re_sub_dict(pattern, repl_dict, string): """Replace key, value found in dict according to pattern @param pattern(basestr): pattern using keys found in repl_dict @repl_dict(dict): keys found in this dict will be replaced by corresponding values @param string(basestr): string to use for the replacement """ return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) path_escape_re = re_join(list(path_escape.keys())) path_escape_rev_re = re_join(list(path_escape_rev.keys())) def path_escape(string): """Escape string so it can be use in a file path @param string(basestr): string to escape @return (str, unicode): escaped string, usable in a file path """ return re_sub_dict(path_escape_re, path_escape, string) def path_unescape(string): """Unescape string from value found in file path @param string(basestr): string found in file path @return (str, unicode): unescaped string """ return re_sub_dict(path_escape_rev_re, path_escape_rev, string) def ansi_remove(string): """Remove ANSI escape codes from string @param string(basestr): string to filter @return (str, unicode): string without ANSI escape codes """ return RE_ANSI_REMOVE.sub("", string) def url_friendly_text(text): """Convert text to url-friendly one""" # we change special chars to ascii one, # trick found at https://stackoverflow.com/a/3194567 text = unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("utf-8") text = RE_TEXT_URL.sub(" ", text).lower() text = "-".join([t for t in text.split() if t and len(t) >= TEXT_WORD_MIN_LENGHT]) while len(text) > TEXT_MAX_LEN: if "-" in text: text = text.rsplit("-", 1)[0] else: text = text[:TEXT_MAX_LEN] return text