Mercurial > libervia-backend
view sat/plugins/plugin_comp_ap_gateway/regex.py @ 4022:cdb7de398c85
plugin lang detect: don't detect the language if the body is empty
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 23 Mar 2023 15:39:48 +0100 |
parents | 381340b9a9ee |
children |
line wrap: on
line source
#!/usr/bin/env python3 # Libervia ActivityPub Gateway # Copyright (C) 2009-2022 Jérôme Poisson (goffi@goffi.org) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """Various Regular Expression for AP gateway""" import re ## "Signature" header parsing # those expression have been generated with abnf-to-regex # (https://github.com/aas-core-works/abnf-to-regexp) # the base RFC 7320 ABNF rules come from https://github.com/EricGT/ABNF # here is the ABNF file used: # --- # BWS = OWS # OWS = *( SP / HTAB ) # tchar = "!" / "#" / "$" / "%" / "&" / "`" / "*" / "+" / "-" / "." / "^" / "_" / "\'" / "|" / "~" / DIGIT / ALPHA # token = 1*tchar # sig-param = token BWS "=" BWS ( token / quoted-string ) # quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE # qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' # / %x5D-7E ; ']'-'~' # / obs-text # quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) # obs-text = %x80-FF # --- ows = '[ \t]*' bws = f'{ows}' obs_text = '[\\x80-\\xff]' qdtext = f'([\t !#-\\[\\]-~]|{obs_text})' quoted_pair = f'\\\\([\t !-~]|{obs_text})' quoted_string = f'"({qdtext}|{quoted_pair})*"' tchar = "([!#$%&`*+\\-.^_]|\\\\'|[|~0-9a-zA-Z])" token = f'({tchar})+' RE_SIG_PARAM = re.compile( f'(?P<key>{token}{bws})={bws}' f'((?P<uq_value>{token})|(?P<quoted_value>{quoted_string}))' ) ## Account/Mention # FIXME: naive regex, should be approved following webfinger, but popular implementations # such as Mastodon use a very restricted subset RE_ACCOUNT = re.compile(r"[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-]+") RE_MENTION = re.compile(rf"(?<!\w)@{RE_ACCOUNT.pattern}\b")