comparison src/test/test_plugin_misc_text_syntaxes.py @ 832:c4b22aedb7d7

plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title: Implementation should follow the following formal specification: "title" and "content" data can be passed in raw, xhtml or rich format. When we receive from a frontend a new/updated microblog item: - keys "title" or "content" have to be escaped (disable HTML tags) - keys "title_rich" or "content_rich" have to be converted from the current syntax to XHTML - keys "title_xhtml" or "content_xhtml" have to be cleaned from unwanted XHTML content Rules to deal with concurrent keys: - existence of both "*_xhtml" and "*_rich" keys must raise an exception - existence of both raw and ("*_xhtml" or "*_rich") is OK As the storage always need raw data, if it is not given by the user it can be extracted from the "*_rich" or "*_xhtml" data (remove the XHTML tags). When a frontend wants to edit a blog post that contains XHTML title or content, the conversion is made from XHTML to the current user-defined syntax. - plugin text_syntaxes: added "text" syntax (using lxml)
author souliane <souliane@mailoo.org>
date Wed, 05 Feb 2014 16:36:51 +0100
parents 1fe00f0c9a91
children 831f208b4ea3
comparison
equal deleted inserted replaced
831:d7f9cd8a08cd 832:c4b22aedb7d7
19 19
20 """ Plugin text syntaxes tests """ 20 """ Plugin text syntaxes tests """
21 21
22 from sat.test import helpers 22 from sat.test import helpers
23 from sat.plugins import plugin_misc_text_syntaxes 23 from sat.plugins import plugin_misc_text_syntaxes
24 import re
24 25
25 26
26 class SanitisationTest(helpers.SatTestCase): 27 class SanitisationTest(helpers.SatTestCase):
27 28
28 def setUp(self): 29 EVIL_HTML1 = """
29 self.host = helpers.FakeSAT()
30 self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
31
32 def test_xhtml_sanitise(self):
33 evil_html = """
34 <html> 30 <html>
35 <head> 31 <head>
36 <script type="text/javascript" src="evil-site"></script> 32 <script type="text/javascript" src="evil-site"></script>
37 <link rel="alternate" type="text/rss" src="evil-rss"> 33 <link rel="alternate" type="text/rss" src="evil-rss">
38 <style> 34 <style>
55 <a href="evil-site">spam spam SPAM!</a> 51 <a href="evil-site">spam spam SPAM!</a>
56 <image src="evil!"> 52 <image src="evil!">
57 </body> 53 </body>
58 </html>""" # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html 54 </html>""" # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html
59 55
56 EVIL_HTML2 = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px !important; font-size: 100px ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
57
58 def setUp(self):
59 self.host = helpers.FakeSAT()
60 self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
61
62 def test_xhtml_sanitise(self):
60 expected = """<div> 63 expected = """<div>
61 <style>/* deleted */</style> 64 <style>/* deleted */</style>
62 <body> 65 <body>
63 <a href="">a link</a> 66 <a href="">a link</a>
64 <a href="#">another link</a> 67 <a href="#">another link</a>
70 <a href="evil-site">spam spam SPAM!</a> 73 <a href="evil-site">spam spam SPAM!</a>
71 <img src="evil!"> 74 <img src="evil!">
72 </img></body> 75 </img></body>
73 </div>""" 76 </div>"""
74 77
75 d = self.text_syntaxes.clean_xhtml(evil_html) 78 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML1)
76 d.addCallback(self.assertEqualXML, expected, ignore_blank=True) 79 d.addCallback(self.assertEqualXML, expected, ignore_blank=True)
77 return d 80 return d
78 81
79 def test_styles_sanitise(self): 82 def test_styles_sanitise(self):
80 evil_html = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px !important; font-size: 100px ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
81
82 expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>""" 83 expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>"""
83 84
84 d = self.text_syntaxes.clean_xhtml(evil_html) 85 d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML2)
85 d.addCallback(self.assertEqualXML, expected) 86 d.addCallback(self.assertEqualXML, expected)
86 return d 87 return d
88
89 def test_removeXHTMLMarkups(self):
90 expected = """ a link another link a paragraph secret EVIL! of EVIL! Password: annoying EVIL!spam spam SPAM! """
91 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML1)
92 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
93
94 expected = """test retest toto"""
95 result = self.text_syntaxes._removeMarkups(self.EVIL_HTML2)
96 self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
97