Mercurial > libervia-backend
annotate sat/tools/common/regex.py @ 3728:b15644cae50d
component AP gateway: JID/node ⟺ AP outbox conversion:
- convert a combination of JID and optional pubsub node to AP actor handle (see
`getJIDAndNode` for details) and vice versa
- the gateway now provides a Pubsub service
- retrieve pubsub node and convert it to AP collection, AP pagination is converted to RSM
- do the opposite: convert AP collection to pubsub and handle RSM request. Due to
ActivityStream collection pagination limitations, some RSM request produce inefficient
requests, but caching should be used most of the time in the future and avoid the
problem.
- set specific name to HTTP Server
- new `local_only` setting (`True` by default) to indicate if the gateway can request or
not XMPP Pubsub nodes from other servers
- disco info now specifies important features such as Pubsub RSM, and nodes metadata
ticket 363
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 25 Jan 2022 17:54:06 +0100 |
parents | 85b8a899f407 |
children | 524856bd7b19 |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # Salut à Toi: an XMPP client |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 """ regex tools common to backend and frontends """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 import re |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 import unicodedata |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
25 path_escape = {"%": "%25", "/": "%2F", "\\": "%5c"} |
3028 | 26 path_escape_rev = {re.escape(v): k for k, v in path_escape.items()} |
27 path_escape = {re.escape(k): v for k, v in path_escape.items()} | |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
28 # thanks to Martijn Pieters (https://stackoverflow.com/a/14693789) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
29 RE_ANSI_REMOVE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
30 RE_TEXT_URL = re.compile(r'[^a-zA-Z0-9,_]+') |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
31 TEXT_MAX_LEN = 60 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
32 # min lenght is currently deactivated |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
33 TEXT_WORD_MIN_LENGHT = 0 |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 def reJoin(exps): |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 """Join (OR) various regexes""" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
38 return re.compile("|".join(exps)) |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
41 def reSubDict(pattern, repl_dict, string): |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 """Replace key, value found in dict according to pattern |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 @param pattern(basestr): pattern using keys found in repl_dict |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 @repl_dict(dict): keys found in this dict will be replaced by |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 corresponding values |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 @param string(basestr): string to use for the replacement |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 return pattern.sub(lambda m: repl_dict[re.escape(m.group(0))], string) |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
51 |
3028 | 52 path_escape_re = reJoin(list(path_escape.keys())) |
53 path_escape_rev_re = reJoin(list(path_escape_rev.keys())) | |
1920
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
56 def pathEscape(string): |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 """Escape string so it can be use in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 @param string(basestr): string to escape |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 @return (str, unicode): escaped string, usable in a file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 return reSubDict(path_escape_re, path_escape, string) |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 def pathUnescape(string): |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 """Unescape string from value found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 @param string(basestr): string found in file path |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 @return (str, unicode): unescaped string |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 """ |
03526c8abeb0
tools (common): added regex module with path (un)escaping methods
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 return reSubDict(path_escape_rev_re, path_escape_rev, string) |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
72 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 |
2297
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
74 def ansiRemove(string): |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
75 """Remove ANSI escape codes from string |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
76 |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
77 @param string(basestr): string to filter |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
78 @return (str, unicode): string without ANSI escape codes |
ad2a8e8b52da
core (tools/common/regex): new ansiRemove method to remove ANSI escape codes from a string
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
79 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 return RE_ANSI_REMOVE.sub("", string) |
3501
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
81 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
82 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
83 def urlFriendlyText(text): |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
84 """Convert text to url-friendly one""" |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
85 # we change special chars to ascii one, |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
86 # trick found at https://stackoverflow.com/a/3194567 |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
87 text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8') |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
88 text = RE_TEXT_URL.sub(' ', text).lower() |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
89 text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
90 while len(text) > TEXT_MAX_LEN: |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
91 if '-' in text: |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
92 text = text.rsplit('-', 1)[0] |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
93 else: |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
94 text = text[:TEXT_MAX_LEN] |
85b8a899f407
tools (common/regex): move code to make user friendly URLs from web frontend
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
95 return text |