annotate sat/test/test_plugin_misc_text_syntaxes.py @ 3393:2b6f69f6df8c

tools(xml_tools): fixed `<div>` unwrapping + added `parse` instance: `<div>` unwrapping could fail when a text node was a sibling of the top element (could easily happen ith a `\n` line feed added by an editor). This is fixed by filtering on IElement with `elements()`. A `parse` instance has been added as it is not necessary to create a new `ElementParser` each time that we want to parse something.
author Goffi <goffi@goffi.org>
date Thu, 12 Nov 2020 14:53:15 +0100
parents 559a625a236b
children be6d91572633
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
1 #!/usr/bin/env python3
3137
559a625a236b fixed shebangs
Goffi <goffi@goffi.org>
parents: 3136
diff changeset
2
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SAT: a jabber client
3136
9d0df638c8b4 dates update
Goffi <goffi@goffi.org>
parents: 3028
diff changeset
5 # Copyright (C) 2009-2020 Jérôme Poisson (goffi@goffi.org)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
19
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
20 """ Plugin text syntaxes tests """
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
21
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.test import helpers
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
23 from sat.plugins import plugin_misc_text_syntaxes
856
7ea7053dda88 test: skip the test for markdown syntax if the module is not installed
souliane <souliane@mailoo.org>
parents: 841
diff changeset
24 from twisted.trial.unittest import SkipTest
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
25 import re
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
26 import importlib
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
27
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
28
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
29 class SanitisationTest(helpers.SatTestCase):
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
30
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
31 EVIL_HTML1 = """
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
32 <html>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
33 <head>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
34 <script type="text/javascript" src="evil-site"></script>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
35 <link rel="alternate" type="text/rss" src="evil-rss">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
36 <style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
37 body {background-image: url(javascript:do_evil)};
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
38 div {color: expression(evil)};
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
39 </style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
40 </head>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
41 <body onload="evil_function()">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
42 <!-- I am interpreted for EVIL! -->
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
43 <a href="javascript:evil_function()">a link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
44 <a href="#" onclick="evil_function()">another link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
45 <p onclick="evil_function()">a paragraph</p>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
46 <div style="display: none">secret EVIL!</div>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
47 <object> of EVIL! </object>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
48 <iframe src="evil-site"></iframe>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
49 <form action="evil-site">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
50 Password: <input type="password" name="password">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
51 </form>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
52 <blink>annoying EVIL!</blink>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
53 <a href="evil-site">spam spam SPAM!</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
54 <image src="evil!">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
55 </body>
780
9810f22ba733 test: store the constants in constants.py + better PEP8 compliance
souliane <souliane@mailoo.org>
parents: 694
diff changeset
56 </html>""" # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
57
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
58 EVIL_HTML2 = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px !important; font-size: 100px ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
59
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
60 def setUp(self):
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
61 self.host = helpers.FakeSAT()
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
62 importlib.reload(plugin_misc_text_syntaxes) # reload the plugin to avoid conflict error
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
63 self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
64
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
65 def test_xhtml_sanitise(self):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
66 expected = """<div>
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
67 <style>/* deleted */</style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
68 <body>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
69 <a href="">a link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
70 <a href="#">another link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
71 <p>a paragraph</p>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
72 <div style="">secret EVIL!</div>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
73 of EVIL!
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
74 Password:
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
75 annoying EVIL!
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
76 <a href="evil-site">spam spam SPAM!</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
77 <img src="evil!">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
78 </img></body>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
79 </div>"""
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
80
1812
160b0d4c6520 plugin XEP-0071, XEP-0277: method clean_xhtml has been renamed to cleanXHTML
souliane <souliane@mailoo.org>
parents: 1809
diff changeset
81 d = self.text_syntaxes.cleanXHTML(self.EVIL_HTML1)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
82 d.addCallback(self.assertEqualXML, expected, ignore_blank=True)
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
83 return d
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
84
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
85 def test_styles_sanitise(self):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
86 expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>"""
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
87
1812
160b0d4c6520 plugin XEP-0071, XEP-0277: method clean_xhtml has been renamed to cleanXHTML
souliane <souliane@mailoo.org>
parents: 1809
diff changeset
88 d = self.text_syntaxes.cleanXHTML(self.EVIL_HTML2)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
89 d.addCallback(self.assertEqualXML, expected)
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
90 return d
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
91
841
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
92 def test_html2text(self):
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
93 """Check that html2text is not inserting \n in the middle of that link.
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
94 By default lines are truncated after the 79th characters."""
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
95 source = '<img src="http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png" alt="sat"/>'
841
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
96 expected = "![sat](http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png)"
856
7ea7053dda88 test: skip the test for markdown syntax if the module is not installed
souliane <souliane@mailoo.org>
parents: 841
diff changeset
97 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
98 d = self.text_syntaxes.convert(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
99 source,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
100 self.text_syntaxes.SYNTAX_XHTML,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
101 self.text_syntaxes.SYNTAX_MARKDOWN,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
102 )
856
7ea7053dda88 test: skip the test for markdown syntax if the module is not installed
souliane <souliane@mailoo.org>
parents: 841
diff changeset
103 except plugin_misc_text_syntaxes.UnknownSyntax:
7ea7053dda88 test: skip the test for markdown syntax if the module is not installed
souliane <souliane@mailoo.org>
parents: 841
diff changeset
104 raise SkipTest("Markdown syntax is not available.")
841
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
105 d.addCallback(self.assertEqual, expected)
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
106 return d
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
107
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
108 def test_removeXHTMLMarkups(self):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
109 expected = """ a link another link a paragraph secret EVIL! of EVIL! Password: annoying EVIL! spam spam SPAM! """
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
110 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML1)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
111 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
112
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
113 expected = """test retest toto"""
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
114 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML2)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
115 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())