comparison libervia/backend/test/test_plugin_misc_text_syntaxes.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/test/test_plugin_misc_text_syntaxes.py@524856bd7b19
children 0d7bb4df2343
comparison
equal deleted inserted replaced
4070:d10748475025 4071:4b842c1fb686
1 #!/usr/bin/env python3
2
3
4 # SAT: a jabber client
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 """ Plugin text syntaxes tests """
21
22 from libervia.backend.test import helpers
23 from libervia.backend.plugins import plugin_misc_text_syntaxes
24 from twisted.trial.unittest import SkipTest
25 import re
26 import importlib
27
28
29 class SanitisationTest(helpers.SatTestCase):
30
31 EVIL_HTML1 = """
32 <html>
33 <head>
34 <script type="text/javascript" src="evil-site"></script>
35 <link rel="alternate" type="text/rss" src="evil-rss">
36 <style>
37 body {background-image: url(javascript:do_evil)};
38 div {color: expression(evil)};
39 </style>
40 </head>
41 <body onload="evil_function()">
42 <!-- I am interpreted for EVIL! -->
43 <a href="javascript:evil_function()">a link</a>
44 <a href="#" onclick="evil_function()">another link</a>
45 <p onclick="evil_function()">a paragraph</p>
46 <div style="display: none">secret EVIL!</div>
47 <object> of EVIL! </object>
48 <iframe src="evil-site"></iframe>
49 <form action="evil-site">
50 Password: <input type="password" name="password">
51 </form>
52 <blink>annoying EVIL!</blink>
53 <a href="evil-site">spam spam SPAM!</a>
54 <image src="evil!">
55 </body>
56 </html>""" # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html
57
58 EVIL_HTML2 = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px !important; font-size: 100px ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
59
60 def setUp(self):
61 self.host = helpers.FakeSAT()
62 importlib.reload(plugin_misc_text_syntaxes) # reload the plugin to avoid conflict error
63 self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
64
65 def test_xhtml_sanitise(self):
66 expected = """<div>
67 <style>/* deleted */</style>
68 <body>
69 <a href="">a link</a>
70 <a href="#">another link</a>
71 <p>a paragraph</p>
72 <div style="">secret EVIL!</div>
73 of EVIL!
74 Password:
75 annoying EVIL!
76 <a href="evil-site">spam spam SPAM!</a>
77 <img src="evil!">
78 </img></body>
79 </div>"""
80
81 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML1)
82 d.addCallback(self.assert_equal_xml, expected, ignore_blank=True)
83 return d
84
85 def test_styles_sanitise(self):
86 expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>"""
87
88 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML2)
89 d.addCallback(self.assert_equal_xml, expected)
90 return d
91
92 def test_html2text(self):
93 """Check that html2text is not inserting \n in the middle of that link.
94 By default lines are truncated after the 79th characters."""
95 source = '<img src="http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png" alt="sat"/>'
96 expected = "![sat](http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png)"
97 try:
98 d = self.text_syntaxes.convert(
99 source,
100 self.text_syntaxes.SYNTAX_XHTML,
101 self.text_syntaxes.SYNTAX_MARKDOWN,
102 )
103 except plugin_misc_text_syntaxes.UnknownSyntax:
104 raise SkipTest("Markdown syntax is not available.")
105 d.addCallback(self.assertEqual, expected)
106 return d
107
108 def test_remove_xhtml_markups(self):
109 expected = """ a link another link a paragraph secret EVIL! of EVIL! Password: annoying EVIL! spam spam SPAM! """
110 result = self.text_syntaxes._remove_markups(self.EVIL_HTML1)
111 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
112
113 expected = """test retest toto"""
114 result = self.text_syntaxes._remove_markups(self.EVIL_HTML2)
115 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())