view libervia/backend/tools/xmpp_datetime.py @ 4163:3b3cd9453d9b

plugin XEP-0308: implement Last Message Correction
author Goffi <goffi@goffi.org>
date Tue, 28 Nov 2023 17:38:31 +0100
parents 4b842c1fb686
children 0d7bb4df2343
line wrap: on
line source

#!/usr/bin/env python3

# Libervia: XMPP Date and Time profiles as per XEP-0082
# Copyright (C) 2022-2022 Tim Henkes (me@syndace.dev)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from datetime import date, datetime, time, timezone
import re
from typing import Optional, Tuple

from libervia.backend.core import exceptions


__all__ = [  # pylint: disable=unused-variable
    "format_date",
    "parse_date",
    "format_datetime",
    "parse_datetime",
    "format_time",
    "parse_time"
]


def __parse_fraction_of_a_second(value: str) -> Tuple[str, Optional[int]]:
    """
    datetime's strptime only supports up to six digits of the fraction of a seconds, while
    the XEP-0082 specification allows for any number of digits. This function parses and
    removes the optional fraction of a second from the input string.

    @param value: The input string, containing a section of the format [.sss].
    @return: The input string with the fraction of a second removed, and the fraction of a
        second parsed with microsecond resolution. Returns the unaltered input string and
        ``None`` if no fraction of a second was found in the input string.
    """

    #  The following regex matches the optional fraction of a seconds for manual
    # processing.
    match = re.search(r"\.(\d*)", value)
    microsecond: Optional[int] = None
    if match is not None:
        # Remove the fraction of a second from the input string
        value = value[:match.start()] + value[match.end():]

        # datetime supports microsecond resolution for the fraction of a second, thus
        # limit/pad the parsed fraction of a second to six digits
        microsecond = int(match.group(1)[:6].ljust(6, '0'))

    return value, microsecond


def format_date(value: Optional[date] = None) -> str:
    """
    @param value: The date for format. Defaults to the current date in the UTC timezone.
    @return: The date formatted according to the Date profile specified in XEP-0082.

    @warning: Formatting of the current date in the local timezone may leak geographical
        information of the sender. Thus, it is advised to only format the current date in
        UTC.
    """
    # CCYY-MM-DD

    # The Date profile of XEP-0082 is equal to the ISO 8601 format.
    return (datetime.now(timezone.utc).date() if value is None else value).isoformat()


def parse_date(value: str) -> date:
    """
    @param value: A string containing date information formatted according to the Date
        profile specified in XEP-0082.
    @return: The date parsed from the input string.
    @raise exceptions.ParsingError: if the input string is not correctly formatted.
    """
    # CCYY-MM-DD

    # The Date profile of XEP-0082 is equal to the ISO 8601 format.
    try:
        return date.fromisoformat(value)
    except ValueError as e:
        raise exceptions.ParsingError() from e


def format_datetime(
    value: Optional[datetime] = None,
    include_microsecond: bool = False
) -> str:
    """
    @param value: The datetime to format. Defaults to the current datetime.
        must be an aware datetime object (timezone must be specified)
    @param include_microsecond: Include the microsecond of the datetime in the output.
    @return: The datetime formatted according to the DateTime profile specified in
        XEP-0082. The datetime is always converted to UTC before formatting to avoid
        leaking geographical information of the sender.
    """
    # CCYY-MM-DDThh:mm:ss[.sss]TZD

    # We format the time in UTC, since the %z formatter of strftime doesn't include colons
    # to separate hours and minutes which is required by XEP-0082. UTC allows us to put a
    # simple letter 'Z' as the time zone definition.
    if value is not None:
        if value.tzinfo is None:
            raise exceptions.InternalError(
                "an aware datetime object must be used, but a naive one has been provided"
            )
        value = value.astimezone(timezone.utc)  # pylint: disable=no-member
    else:
        value = datetime.now(timezone.utc)

    if include_microsecond:
        return value.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

    return value.strftime("%Y-%m-%dT%H:%M:%SZ")


def parse_datetime(value: str) -> datetime:
    """
    @param value: A string containing datetime information formatted according to the
        DateTime profile specified in XEP-0082.
    @return: The datetime parsed from the input string.
    @raise exceptions.ParsingError: if the input string is not correctly formatted.
    """
    # CCYY-MM-DDThh:mm:ss[.sss]TZD

    value, microsecond = __parse_fraction_of_a_second(value)

    try:
        result = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S%z")
    except ValueError as e:
        raise exceptions.ParsingError() from e

    if microsecond is not None:
        result = result.replace(microsecond=microsecond)

    return result


def format_time(value: Optional[time] = None, include_microsecond: bool = False) -> str:
    """
    @param value: The time to format. Defaults to the current time in the UTC timezone.
    @param include_microsecond: Include the microsecond of the time in the output.
    @return: The time formatted according to the Time profile specified in XEP-0082.

    @warning: Since accurate timezone conversion requires the date to be known, this
        function cannot convert input times to UTC before formatting. This means that
        geographical information of the sender may be leaked if a time in local timezone
        is formatted. Thus, when passing a time to format, it is advised to pass the time
        in UTC if possible.
    """
    # hh:mm:ss[.sss][TZD]

    if value is None:
        # There is no time.now() method as one might expect, but the current time can be
        # extracted from a datetime object including time zone information.
        value = datetime.now(timezone.utc).timetz()

    # The format created by time.isoformat complies with the XEP-0082 Time profile.
    return value.isoformat("auto" if include_microsecond else "seconds")


def parse_time(value: str) -> time:
    """
    @param value: A string containing time information formatted according to the Time
        profile specified in XEP-0082.
    @return: The time parsed from the input string.
    @raise exceptions.ParsingError: if the input string is not correctly formatted.
    """
    # hh:mm:ss[.sss][TZD]

    value, microsecond = __parse_fraction_of_a_second(value)

    # The format parsed by time.fromisoformat mostly complies with the XEP-0082 Time
    # profile, except that it doesn't handle the letter Z as time zone information for
    # UTC. This can be fixed with a simple string replacement of 'Z' with "+00:00", which
    # is another way to represent UTC.
    try:
        result = time.fromisoformat(value.replace('Z', "+00:00"))
    except ValueError as e:
        raise exceptions.ParsingError() from e

    if microsecond is not None:
        result = result.replace(microsecond=microsecond)

    return result