view sat/tools/web.py @ 4022:cdb7de398c85

plugin lang detect: don't detect the language if the body is empty
author Goffi <goffi@goffi.org>
date Thu, 23 Mar 2023 15:39:48 +0100
parents 65bac82e4049
children 524856bd7b19
line wrap: on
line source

#!/usr/bin/env python3

# Libervia: an XMPP client
# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from typing import Optional, Union
from pathlib import Path
from io import BufferedIOBase

from OpenSSL import SSL
import treq
from treq.client import HTTPClient
from twisted.internet import reactor, ssl
from twisted.internet.interfaces import IOpenSSLClientConnectionCreator
from twisted.web import iweb
from twisted.web import client as http_client
from zope.interface import implementer

from sat.core import exceptions
from sat.core.log import getLogger


log = getLogger(__name__)


SSLError = SSL.Error


@implementer(IOpenSSLClientConnectionCreator)
class NoCheckConnectionCreator(object):
    def __init__(self, hostname, ctx):
        self._ctx = ctx

    def clientConnectionForTLS(self, tlsProtocol):
        context = self._ctx
        connection = SSL.Connection(context, None)
        connection.set_app_data(tlsProtocol)
        return connection


@implementer(iweb.IPolicyForHTTPS)
class NoCheckContextFactory:
    """Context factory which doesn't do TLS certificate check

    /!\\ it's obvisously a security flaw to use this class,
    and it should be used only with explicit agreement from the end used
    """

    def creatorForNetloc(self, hostname, port):
        log.warning(
            "TLS check disabled for {host} on port {port}".format(
                host=hostname, port=port
            )
        )
        certificateOptions = ssl.CertificateOptions(trustRoot=None)
        return NoCheckConnectionCreator(hostname, certificateOptions.getContext())


#: following treq doesn't check TLS, obviously it is unsecure and should not be used
#: without explicit warning
treq_client_no_ssl = HTTPClient(http_client.Agent(reactor, NoCheckContextFactory()))


async def downloadFile(
    url: str,
    dest: Union[str, Path, BufferedIOBase],
    max_size: Optional[int] = None
) -> None:
    """Helper method to download a file

    This is for internal download, for high level download with progression, use
    ``plugin_misc_download``.

    Inspired from
    https://treq.readthedocs.io/en/latest/howto.html#handling-streaming-responses

    @param dest: destination filename or file-like object
        of it's a file-like object, you'll have to close it yourself
    @param max_size: if set, an exceptions.DataError will be raised if the downloaded file
        is bigger that given value (in bytes).
    """
    if isinstance(dest, BufferedIOBase):
        f = dest
        must_close = False
    else:
        dest = Path(dest)
        f = dest.open("wb")
        must_close = True
    d = treq.get(url, unbuffered=True)
    written = 0

    def write(data: bytes):
        if max_size is not None:
            nonlocal written
            written += len(data)
            if written > max_size:
                raise exceptions.DataError(
                    "downloaded file is bigger than expected ({max_size})"
                )
        f.write(data)

    d.addCallback(treq.collect, f.write)
    try:
        await d
    except exceptions.DataError as e:
        log.warning("download cancelled due to file oversized")
        raise e
    except Exception as e:
        log.error(f"Can't write file {dest}: {e}")
        raise e
    finally:
        if must_close:
            f.close()