annotate src/test/test_plugin_misc_text_syntaxes.py @ 841:831f208b4ea3

plugin text_syntaxes: html2text was breaking the long URLs
author souliane <souliane@mailoo.org>
date Thu, 13 Feb 2014 12:29:14 +0100
parents c4b22aedb7d7
children 7ea7053dda88
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
1 #!/usr/bin/python
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SAT: a jabber client
811
1fe00f0c9a91 dates update
Goffi <goffi@goffi.org>
parents: 795
diff changeset
5 # Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Jérôme Poisson (goffi@goffi.org)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
19
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
20 """ Plugin text syntaxes tests """
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
21
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.test import helpers
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
23 from sat.plugins import plugin_misc_text_syntaxes
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
24 import re
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
25
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
26
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
27 class SanitisationTest(helpers.SatTestCase):
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
28
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
29 EVIL_HTML1 = """
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
30 <html>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
31 <head>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
32 <script type="text/javascript" src="evil-site"></script>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
33 <link rel="alternate" type="text/rss" src="evil-rss">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
34 <style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
35 body {background-image: url(javascript:do_evil)};
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
36 div {color: expression(evil)};
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
37 </style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
38 </head>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
39 <body onload="evil_function()">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
40 <!-- I am interpreted for EVIL! -->
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
41 <a href="javascript:evil_function()">a link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
42 <a href="#" onclick="evil_function()">another link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
43 <p onclick="evil_function()">a paragraph</p>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
44 <div style="display: none">secret EVIL!</div>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
45 <object> of EVIL! </object>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
46 <iframe src="evil-site"></iframe>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
47 <form action="evil-site">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
48 Password: <input type="password" name="password">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
49 </form>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
50 <blink>annoying EVIL!</blink>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
51 <a href="evil-site">spam spam SPAM!</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
52 <image src="evil!">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
53 </body>
780
9810f22ba733 test: store the constants in constants.py + better PEP8 compliance
souliane <souliane@mailoo.org>
parents: 694
diff changeset
54 </html>""" # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
55
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
56 EVIL_HTML2 = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px !important; font-size: 100px ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
57
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
58 def setUp(self):
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
59 self.host = helpers.FakeSAT()
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
60 self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
61
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
62 def test_xhtml_sanitise(self):
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
63 expected = """<div>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
64 <style>/* deleted */</style>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
65 <body>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
66 <a href="">a link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
67 <a href="#">another link</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
68 <p>a paragraph</p>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
69 <div style="">secret EVIL!</div>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
70 of EVIL!
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
71 Password:
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
72 annoying EVIL!
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
73 <a href="evil-site">spam spam SPAM!</a>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
74 <img src="evil!">
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
75 </img></body>
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
76 </div>"""
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
77
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
78 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML1)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
79 d.addCallback(self.assertEqualXML, expected, ignore_blank=True)
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
80 return d
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
81
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
82 def test_styles_sanitise(self):
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
83 expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>"""
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
84
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
85 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML2)
694
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
86 d.addCallback(self.assertEqualXML, expected)
4284b6ad8aa3 tests: plugin text syntaxes sanitisation tests
Goffi <goffi@goffi.org>
parents:
diff changeset
87 return d
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
88
841
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
89 def test_html2text(self):
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
90 """Check that html2text is not inserting \n in the middle of that link.
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
91 By default lines are truncated after the 79th characters."""
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
92 source = "<img src=\"http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png\" alt=\"sat\"/>"
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
93 expected = "![sat](http://sat.goffi.org/static/images/screenshots/libervia/libervia_discussions.png)"
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
94 d = self.text_syntaxes.convert(source, self.text_syntaxes.SYNTAX_XHTML, self.text_syntaxes.SYNTAX_MARKDOWN)
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
95 d.addCallback(self.assertEqual, expected)
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
96 return d
831f208b4ea3 plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents: 832
diff changeset
97
832
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
98 def test_removeXHTMLMarkups(self):
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
99 expected = """ a link another link a paragraph secret EVIL! of EVIL! Password: annoying EVIL!spam spam SPAM! """
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
100 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML1)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
101 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
102
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
103 expected = """test retest toto"""
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
104 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML2)
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
105 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
c4b22aedb7d7 plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents: 811
diff changeset
106