diff libervia/backend/plugins/plugin_comp_ap_gateway/regex.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/plugins/plugin_comp_ap_gateway/regex.py@381340b9a9ee
children 0d7bb4df2343
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libervia/backend/plugins/plugin_comp_ap_gateway/regex.py	Fri Jun 02 11:49:51 2023 +0200
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+
+# Libervia ActivityPub Gateway
+# Copyright (C) 2009-2022 Jérôme Poisson (goffi@goffi.org)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Various Regular Expression for AP gateway"""
+
+import re
+
+## "Signature" header parsing
+
+# those expression have been generated with abnf-to-regex
+# (https://github.com/aas-core-works/abnf-to-regexp)
+
+# the base RFC 7320 ABNF rules come from https://github.com/EricGT/ABNF
+
+# here is the ABNF file used:
+# ---
+# BWS = OWS
+# OWS = *( SP / HTAB )
+# tchar = "!" / "#" / "$" / "%" / "&" / "`" / "*" / "+" / "-" / "." / "^" / "_" / "\'" / "|" / "~" / DIGIT / ALPHA
+# token = 1*tchar
+# sig-param = token BWS "=" BWS ( token / quoted-string )
+# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+# qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'['
+#  / %x5D-7E ; ']'-'~'
+#  / obs-text
+# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
+# obs-text = %x80-FF
+# ---
+
+ows = '[ \t]*'
+bws = f'{ows}'
+obs_text = '[\\x80-\\xff]'
+qdtext = f'([\t !#-\\[\\]-~]|{obs_text})'
+quoted_pair = f'\\\\([\t !-~]|{obs_text})'
+quoted_string = f'"({qdtext}|{quoted_pair})*"'
+tchar = "([!#$%&`*+\\-.^_]|\\\\'|[|~0-9a-zA-Z])"
+token = f'({tchar})+'
+RE_SIG_PARAM = re.compile(
+    f'(?P<key>{token}{bws})={bws}'
+    f'((?P<uq_value>{token})|(?P<quoted_value>{quoted_string}))'
+)
+
+
+## Account/Mention
+
+# FIXME: naive regex, should be approved following webfinger, but popular implementations
+#   such as Mastodon use a very restricted subset
+RE_ACCOUNT = re.compile(r"[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-]+")
+RE_MENTION = re.compile(rf"(?<!\w)@{RE_ACCOUNT.pattern}\b")