view cagou/core/simple_xhtml.py @ 466:cd448b877d1d

install: update requirements with alabaster==0.7.12 alembic==1.4.3 anki @ file:///build/anki/src/anki/dist/anki-2.1.35-py3-none-any.whl ankirspy @ file:///build/anki/src/anki/dist/ankirspy-2.1.35-cp39-cp39-manylinux1_x86_64.whl ansi2html==1.6.0 anytree==2.8.0 apipkg==1.5 apparmor==3.0.1 appdirs==1.4.4 appimage-builder==0.8.5 aqt @ file:///build/anki/src/anki/dist/aqt-2.1.35-py3-none-any.whl argcomplete==1.12.1 argon2-cffi==20.1.0 asn1crypto==1.4.0 async-generator==1.10 attrs==20.3.0 autobahn==21.3.1 Automat==20.2.0 autopep8==1.5.5 Babel==2.9.0 backcall==0.2.0 bcrypt==3.2.0 Beaker==1.11.0 beautifulsoup4==4.9.3 black==20.8b1 bleach==3.3.0 blinker==1.4 bmap-tools==3.5 breezy==3.1.0 Brlapi==0.8.2 btrfsutil==5.11 CacheControl==0.12.6 cached-property==1.5.2 cagou==0.8.0.dev0+83c67b093350.153 cairocffi==1.2.0 CairoSVG==2.5.2 certifi==2020.6.20 cffi==1.14.5 chardet==3.0.4 click==7.1.2 colorama==0.4.4 commonmark==0.9.1 configobj==5.1.0.dev0 constantly==15.1.0 contextlib2==0.6.0.post1 coverage==5.5 cryptography==3.4.6 css-parser==1.0.6 cssselect2==0.4.1 cycler==0.10.0 Cython==0.29.22 decorator==4.4.2 defusedxml==0.6.0 diffoscope==169 distlib==0.3.1 distro==1.5.0 docker==4.4.4 docker-compose==1.28.5 docker-pycreds==0.4.0 dockerpty==0.4.1 docopt==0.6.2 docutils==0.16 dulwich==0.20.20 emrichen==0.2.3 entrypoints==0.3 extras==1.0.0 fastimport==0.9.8 file-magic==0.4.0 filelock==3.0.12 fixtures==3.0.0 flake8==3.8.4 Flask==1.1.2 Flask-BabelEx==0.9.4 Flask-Compress==1.8.0 Flask-Cors==3.0.9 Flask-Gravatar==0.5.0 Flask-Login==0.5.0 Flask-Mail==0.9.1 Flask-Migrate==2.7.0 Flask-Paranoid==0.2.0 Flask-Principal==0.4.0 Flask-Script==2.0.6 Flask-Security-Too==3.3.3 Flask-SQLAlchemy==2.4.4 Flask-WTF==0.14.3 future==0.18.2 gajim==1.2.2 gssapi==1.6.12 html2text==2020.1.16 html5lib==1.1 httpie==2.4.0 httplib2==0.19.0 hyperlink==21.0.0 idna==2.10 imagesize==1.2.0 img2pdf==0.4.0 importlib-metadata==3.4.0 incremental==17.5.0 inflect==5.3.0 iniconfig==1.1.1 ipdb==0.13.6 ipykernel==5.4.2 ipython==7.19.0 ipython-genutils==0.2.0 ipywidgets==7.6.2 isc==2.0 itsdangerous==1.1.0 jedi==0.17.2 jeepney==0.6.0 Jinja2==2.11.3 joblib==1.0.0 jsonpath-rw==1.4.0 jsonpickle==1.5.1 jsonschema==3.2.0 jupyter-client==6.1.7 jupyter-console==6.2.0 jupyter-core==4.6.3 jupyterlab-pygments==0.1.2 keyring==23.0.0 Kivy==2.0.0 kiwisolver==1.3.1 langdetect==1.0.8 ldap3==2.9.dev0 lensfun==0.3.95 LibAppArmor==3.0.1 libarchive-c==2.9 libfdt==1.6.0 libnacl==1.7.2 lockfile==0.12.2 louis==3.17.0 lxml==4.6.2 Mako==1.1.4 Markdown==3.3.3 MarkupSafe==1.1.1 matplotlib==3.3.4 mccabe==0.6.1 meld==3.20.3 mercurial==5.7.1 meson==0.57.1 miniupnpc==2.1 mistune==0.8.4 more-itertools==8.6.0 msgpack==1.0.2 mypy==0.812 mypy-extensions==0.4.3 natsort==7.1.1 nbclient==0.5.1 nbconvert==6.0.7 nbformat==5.0.8 nbxmpp==1.0.2 nest-asyncio==1.4.3 netifaces==0.10.9 netsnmp-python==1.0a1 networkx==2.5 nltk==3.5 nose==1.3.7 notebook==6.2.0 Nuitka==0.6.12.3 numpy==1.20.1 openshot-qt==2.5.1 ordered-set==4.0.2 orjson @ file:///build/python-orjson/src/python-orjson-3.5.1/target/wheels/orjson-3.5.1-cp39-cp39-manylinux2010_x86_64.whl packaging==20.9 pandas==1.2.3 pandocfilters==1.4.3 paramiko==2.7.2 parso==0.7.1 passlib==1.7.4 path==15.1.2 pathspec==0.8.1 patiencediff==0.2.1 pbr==5.5.1 pdfarranger==1.7.0 pep517==0.9.1 pexpect==4.8.0 pickleshare==0.7.5 pikepdf==2.8.0.post2 Pillow==8.1.0 pluggy==0.13.1 ply==3.11 precis-i18n==1.0.3 progress==1.5 progressbar2==3.53.1 prometheus-client==0.9.0 prompt-toolkit==3.0.17 protobuf==3.12.4 psutil==5.8.0 psycopg2==2.8.6 ptyprocess==0.7.0 pudb==2020.1 pwquality==1.4.4 py==1.10.0 pyaml==20.4.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 PyAudio==0.2.11 pybind11==2.6.2 pycairo==1.20.0 pycodestyle==2.6.0 pycountry==20.7.3 pycparser==2.20 pydocstyle==5.1.1 pyenchant==3.2.0 pyflakes==2.2.0 Pygments==2.8.1 PyGObject==3.38.0 PyHamcrest==1.9.0 pyinotify==0.9.6 pymediainfo==5.0.3 PyNaCl==1.4.0 PyOpenGL==3.1.5 pyOpenSSL==20.0.1 pyparsing==2.4.7 pyPEG2==2.15.2 PyQt5==5.15.4 PyQt5-sip==12.8.1 PyQtWebEngine==5.15.4 pyrsistent==0.17.3 PySocks==1.7.1 pytest==6.2.2 python-dateutil==2.8.1 python-dotenv==0.15.0 python-editor==1.0.4 python-Levenshtein==0.12.2 python-mimeparse==1.6.0 python-utils==2.5.6 python-xlib==0.29 pytoml==0.1.21 pytz==2021.1 pyxdg==0.26 PyYAML==5.3.1 pyzmq==20.0.0 questionary==1.9.0 qutebrowser==2.1.0 Reflector==2021.1.10.0.6.34 regex==2020.11.13 requests==2.25.1 requests-toolbelt==0.9.1 requirements-parser==0.2.0 resolvelib==0.5.4 retrying==1.3.3 s3cmd==2.1.0 schema==0.7.4 scikit-learn==0.24.1 scipy==1.6.1 scons==3.1.2 screenkey==1.4 SecretStorage==3.3.1 Send2Trash==1.5.0 service-identity==18.1.0 sh==1.14.1 shortuuid==1.0.1 simplejson==3.17.2 sip==4.19.25 six==1.15.0 snowballstemmer==2.1.0 soupsieve==2.2 speaklater==1.3 Sphinx==3.5.2 sphinx-rtd-theme==0.5.1 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==1.0.3 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.4 sqlacodegen==2.3.0 SQLAlchemy==1.3.23 sqlparse==0.4.1 sshtunnel==0.1.5 subdownloader==2.1.0 systemd-python==234 team==1.0 termcolor==1.1.0 terminado==0.9.2 terminator==2.1.0 testpath==0.4.4 testtools==2.4.0 texttable==1.6.3 threadpoolctl==2.1.0 tinycss2==1.1.0 tlsh==0.2.0 toml==0.10.2 tornado==6.1 traitlets==5.0.5 treq==21.1.0 Twisted==20.3.0 txaio==21.2.1 typed-ast==1.4.2 typing-extensions==3.7.4.3 tzlocal==2.1 urllib3==1.26.3 urwid==2.1.1 validate==5.1.0.dev0 virtualenv==20.4.2 waitress==1.4.4 wcwidth==0.2.5 webencodings==0.5.1 websocket-client==0.58.0 Werkzeug==1.0.1 Whoosh==2.7.4 widgetsnbextension==3.5.1 wsaccel==0.6.3 WTForms==2.2.1 xcffib==0.11.1 youtube-dl==2021.3.3 zipp==3.4.1 zope.interface==5.2.0
author Goffi <goffi@goffi.org>
date Sat, 20 Mar 2021 14:26:33 +0100
parents 3c9ba4a694ef
children 203755bbe0fe
line wrap: on
line source

#!/usr/bin/env python3


# Cagou: desktop/mobile frontend for Salut à Toi XMPP client
# Copyright (C) 2016-2021 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from xml.etree import ElementTree as ET
from kivy.uix.stacklayout import StackLayout
from kivy.uix.label import Label
from kivy.utils import escape_markup
from kivy.metrics import sp
from kivy import properties
from sat.core import log as logging
from sat_frontends.tools import css_color, strings as sat_strings
from cagou import G
from cagou.core.common  import SizedImage


log = logging.getLogger(__name__)


class Escape(str):
    """Class used to mark that a message need to be escaped"""


class SimpleXHTMLWidgetEscapedText(Label):

    def on_parent(self, instance, parent):
        if parent is not None:
            self.font_size = parent.font_size

    def _addUrlMarkup(self, text):
        text_elts = []
        idx = 0
        links = 0
        while True:
            m = sat_strings.RE_URL.search(text[idx:])
            if m is not None:
                text_elts.append(escape_markup(m.string[0:m.start()]))
                link_key = 'link_' + str(links)
                url = m.group()
                escaped_url = escape_markup(url)
                text_elts.append(
                    f'[color=5500ff][ref={link_key}]{escaped_url}[/ref][/color]')
                if not links:
                    self.ref_urls = {link_key: url}
                else:
                    self.ref_urls[link_key] = url
                links += 1
                idx += m.end()
            else:
                if links:
                    text_elts.append(escape_markup(text[idx:]))
                    self.markup = True
                    self.text = ''.join(text_elts)
                break

    def on_text(self, instance, text):
        # do NOT call the method if self.markup is set
        # this would result in infinite loop (because self.text
        # is changed if an URL is found, and in this case markup too)
        if text and not self.markup:
            self._addUrlMarkup(text)

    def on_ref_press(self, ref):
        url = self.ref_urls[ref]
        G.local_platform.open_url(url, self)


class SimpleXHTMLWidgetText(Label):

    def on_parent(self, instance, parent):
        if parent is not None:
            self.font_size = parent.font_size


class SimpleXHTMLWidget(StackLayout):
    """widget handling simple XHTML parsing"""
    xhtml = properties.StringProperty()
    color = properties.ListProperty([1, 1, 1, 1])
    # XXX: bold is only used for escaped text
    bold = properties.BooleanProperty(False)
    font_size = properties.NumericProperty(sp(14))

    # text/XHTML input

    def on_xhtml(self, instance, xhtml):
        """parse xhtml and set content accordingly

        if xhtml is an instance of Escape, a Label with no markup will be used
        """
        self.clear_widgets()
        if isinstance(xhtml, Escape):
            label = SimpleXHTMLWidgetEscapedText(
                text=xhtml, color=self.color, bold=self.bold)
            self.bind(font_size=label.setter('font_size'))
            self.bind(color=label.setter('color'))
            self.bind(bold=label.setter('bold'))
            self.add_widget(label)
        else:
            xhtml = ET.fromstring(xhtml.encode())
            self.current_wid = None
            self.styles = []
            self._callParseMethod(xhtml)
        if len(self.children) > 1:
            self._do_split_labels()

    def escape(self, text):
        """mark that a text need to be escaped (i.e. no markup)"""
        return Escape(text)

    def _do_split_labels(self):
        """Split labels so their content can flow with images"""
        # XXX: to make things easier, we split labels in words
        log.debug("labels splitting start")
        children = self.children[::-1]
        self.clear_widgets()
        for child in children:
            if isinstance(child, Label):
                log.debug("label before split: {}".format(child.text))
                styles = []
                tag = False
                new_text = []
                current_tag = []
                current_value = []
                current_wid = self._createText()
                value = False
                close = False
                # we will parse the text and create a new widget
                # on each new word (actually each space)
                # FIXME: handle '\n' and other white chars
                for c in child.text:
                    if tag:
                        # we are parsing a markup tag
                        if c == ']':
                            current_tag_s = ''.join(current_tag)
                            current_style = (current_tag_s, ''.join(current_value))
                            if close:
                                for idx, s in enumerate(reversed(styles)):
                                    if s[0] == current_tag_s:
                                        del styles[len(styles) - idx - 1]
                                        break
                            else:
                                styles.append(current_style)
                            current_tag = []
                            current_value = []
                            tag = False
                            value = False
                            close = False
                        elif c == '/':
                            close = True
                        elif c == '=':
                            value = True
                        elif value:
                            current_value.append(c)
                        else:
                            current_tag.append(c)
                        new_text.append(c)
                    else:
                        # we are parsing regular text
                        if c == '[':
                            new_text.append(c)
                            tag = True
                        elif c == ' ':
                            # new word, we do a new widget
                            new_text.append(' ')
                            for t, v in reversed(styles):
                                new_text.append('[/{}]'.format(t))
                            current_wid.text = ''.join(new_text)
                            new_text = []
                            self.add_widget(current_wid)
                            log.debug("new widget: {}".format(current_wid.text))
                            current_wid = self._createText()
                            for t, v in styles:
                                new_text.append('[{tag}{value}]'.format(
                                    tag = t,
                                    value = '={}'.format(v) if v else ''))
                        else:
                            new_text.append(c)
                if current_wid.text:
                    # we may have a remaining widget after the parsing
                    close_styles = []
                    for t, v in reversed(styles):
                        close_styles.append('[/{}]'.format(t))
                    current_wid.text = ''.join(close_styles)
                    self.add_widget(current_wid)
                    log.debug("new widget: {}".format(current_wid.text))
            else:
                # non Label widgets, we just add them
                self.add_widget(child)
        self.splitted = True
        log.debug("split OK")

    # XHTML parsing methods

    def _callParseMethod(self, e):
        """Call the suitable method to parse the element

        self.xhtml_[tag] will be called if it exists, else
        self.xhtml_generic will be used
        @param e(ET.Element): element to parse
        """
        try:
            method = getattr(self, f"xhtml_{e.tag}")
        except AttributeError:
            log.warning(f"Unhandled XHTML tag: {e.tag}")
            method = self.xhtml_generic
        method(e)

    def _addStyle(self, tag, value=None, append_to_list=True):
        """add a markup style to label

        @param tag(unicode): markup tag
        @param value(unicode): markup value if suitable
        @param append_to_list(bool): if True style we be added to self.styles
            self.styles is needed to keep track of styles to remove
            should most probably be set to True
        """
        label = self._getLabel()
        label.text += '[{tag}{value}]'.format(
            tag = tag,
            value = '={}'.format(value) if value else ''
            )
        if append_to_list:
            self.styles.append((tag, value))

    def _removeStyle(self, tag, remove_from_list=True):
        """remove a markup style from the label

        @param tag(unicode): markup tag to remove
        @param remove_from_list(bool): if True, remove from self.styles too
            should most probably be set to True
        """
        label = self._getLabel()
        label.text += '[/{tag}]'.format(
            tag = tag
            )
        if remove_from_list:
            for rev_idx, style in enumerate(reversed(self.styles)):
                if style[0] == tag:
                    tag_idx = len(self.styles) - 1 - rev_idx
                    del self.styles[tag_idx]
                    break

    def _getLabel(self):
        """get current Label if it exists, or create a new one"""
        if not isinstance(self.current_wid, Label):
            self._addLabel()
        return self.current_wid

    def _addLabel(self):
        """add a new Label

        current styles will be closed and reopened if needed
        """
        self._closeLabel()
        self.current_wid = self._createText()
        for tag, value in self.styles:
            self._addStyle(tag, value, append_to_list=False)
        self.add_widget(self.current_wid)

    def _createText(self):
        label = SimpleXHTMLWidgetText(color=self.color, markup=True)
        self.bind(color=label.setter('color'))
        label.bind(texture_size=label.setter('size'))
        return label

    def _closeLabel(self):
        """close current style tags in current label

        needed when you change label to keep style between
        different widgets
        """
        if isinstance(self.current_wid, Label):
            for tag, value in reversed(self.styles):
                self._removeStyle(tag, remove_from_list=False)

    def _parseCSS(self, e):
        """parse CSS found in "style" attribute of element

        self._css_styles will be created and contained markup styles added by this method
        @param e(ET.Element): element which may have a "style" attribute
        """
        styles_limit = len(self.styles)
        styles = e.attrib['style'].split(';')
        for style in styles:
            try:
                prop, value = style.split(':')
            except ValueError:
                log.warning(f"can't parse style: {style}")
                continue
            prop = prop.strip().replace('-', '_')
            value = value.strip()
            try:
                method = getattr(self, f"css_{prop}")
            except AttributeError:
                log.warning(f"Unhandled CSS: {prop}")
            else:
                method(e, value)
        self._css_styles = self.styles[styles_limit:]

    def _closeCSS(self):
        """removed CSS styles

        styles in self._css_styles will be removed
        and the attribute will be deleted
        """
        for tag, __ in reversed(self._css_styles):
            self._removeStyle(tag)
        del self._css_styles

    def xhtml_generic(self, elem, style=True, markup=None):
        """Generic method for adding HTML elements

        this method handle content, style and children parsing
        @param elem(ET.Element): element to add
        @param style(bool): if True handle style attribute (CSS)
        @param markup(tuple[unicode, (unicode, None)], None): kivy markup to use
        """
        # we first add markup and CSS style
        if markup is not None:
            if isinstance(markup, str):
                tag, value = markup, None
            else:
                tag, value = markup
            self._addStyle(tag, value)
        style_ = 'style' in elem.attrib and style
        if style_:
            self._parseCSS(elem)

        # then content
        if elem.text:
            self._getLabel().text += escape_markup(elem.text)

        # we parse the children
        for child in elem:
            self._callParseMethod(child)

        # closing CSS style and markup
        if style_:
            self._closeCSS()
        if markup is not None:
            self._removeStyle(tag)

        # and the tail, which is regular text
        if elem.tail:
            self._getLabel().text += escape_markup(elem.tail)

    # method handling XHTML elements

    def xhtml_br(self, elem):
        label = self._getLabel()
        label.text+='\n'
        self.xhtml_generic(elem, style=False)

    def xhtml_em(self, elem):
        self.xhtml_generic(elem, markup='i')

    def xhtml_img(self, elem):
        try:
            src = elem.attrib['src']
        except KeyError:
            log.warning("<img> element without src: {}".format(ET.tostring(elem)))
            return
        try:
            target_height = int(elem.get('height', 0))
        except ValueError:
            log.warning(f"Can't parse image height: {elem.get('height')}")
            target_height = None
        try:
            target_width = int(elem.get('width', 0))
        except ValueError:
            log.warning(f"Can't parse image width: {elem.get('width')}")
            target_width = None

        img = SizedImage(
            source=src, target_height=target_height, target_width=target_width)
        self.current_wid = img
        self.add_widget(img)

    def xhtml_p(self, elem):
        if isinstance(self.current_wid, Label):
            self.current_wid.text+="\n\n"
        self.xhtml_generic(elem)

    def xhtml_span(self, elem):
        self.xhtml_generic(elem)

    def xhtml_strong(self, elem):
        self.xhtml_generic(elem, markup='b')

    # methods handling CSS properties

    def css_color(self, elem, value):
        self._addStyle("color", css_color.parse(value))

    def css_text_decoration(self, elem, value):
        if value == 'underline':
            self._addStyle('u')
        elif value == 'line-through':
            self._addStyle('s')
        else:
            log.warning("unhandled text decoration: {}".format(value))