diff src/test/test_plugin_misc_text_syntaxes.py @ 832:c4b22aedb7d7

plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title: Implementation should follow the following formal specification: "title" and "content" data can be passed in raw, xhtml or rich format. When we receive from a frontend a new/updated microblog item: - keys "title" or "content" have to be escaped (disable HTML tags) - keys "title_rich" or "content_rich" have to be converted from the current syntax to XHTML - keys "title_xhtml" or "content_xhtml" have to be cleaned from unwanted XHTML content Rules to deal with concurrent keys: - existence of both "*_xhtml" and "*_rich" keys must raise an exception - existence of both raw and ("*_xhtml" or "*_rich") is OK As the storage always need raw data, if it is not given by the user it can be extracted from the "*_rich" or "*_xhtml" data (remove the XHTML tags). When a frontend wants to edit a blog post that contains XHTML title or content, the conversion is made from XHTML to the current user-defined syntax. - plugin text_syntaxes: added "text" syntax (using lxml)
author souliane <souliane@mailoo.org>
date Wed, 05 Feb 2014 16:36:51 +0100
parents 1fe00f0c9a91
children 831f208b4ea3
line wrap: on
line diff
--- a/src/test/test_plugin_misc_text_syntaxes.py	Wed Jan 22 17:10:28 2014 +0100
+++ b/src/test/test_plugin_misc_text_syntaxes.py	Wed Feb 05 16:36:51 2014 +0100
@@ -21,16 +21,12 @@
 
 from sat.test import helpers
 from sat.plugins import plugin_misc_text_syntaxes
+import re
 
 
 class SanitisationTest(helpers.SatTestCase):
 
-    def setUp(self):
-        self.host = helpers.FakeSAT()
-        self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
-
-    def test_xhtml_sanitise(self):
-        evil_html = """
+    EVIL_HTML1 = """
    <html>
     <head>
       <script type="text/javascript" src="evil-site"></script>
@@ -57,6 +53,13 @@
     </body>
    </html>"""  # example from lxml: /usr/share/doc/python-lxml-doc/html/lxmlhtml.html#cleaning-up-html
 
+    EVIL_HTML2 = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px       !important; font-size: 100px  ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
+
+    def setUp(self):
+        self.host = helpers.FakeSAT()
+        self.text_syntaxes = plugin_misc_text_syntaxes.TextSyntaxes(self.host)
+
+    def test_xhtml_sanitise(self):
         expected = """<div>
       <style>/* deleted */</style>
     <body>
@@ -72,15 +75,23 @@
     </img></body>
    </div>"""
 
-        d = self.text_syntaxes.clean_xhtml(evil_html)
+        d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML1)
         d.addCallback(self.assertEqualXML, expected, ignore_blank=True)
         return d
 
     def test_styles_sanitise(self):
-        evil_html = """<p style='display: None; test: blah; background: url(: alert()); color: blue;'>test <strong>retest</strong><br><span style="background-color: (alert('bouh')); titi; color: #cf2828; font-size: 3px; direction: !important; color: red; color: red !important; font-size: 100px       !important; font-size: 100px  ! important; font-size: 100%; font-size: 100ox; font-size: 100px; font-size: 100;;;; font-size: 100 %; color: 100 px 1.7em; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat; background-color: :alert(1); color: (alert('XSS')); color: (window.location='http://example.org/'); color: url(:window.location='http://example.org/'); "> toto </span></p>"""
-
         expected = """<p style="color: blue">test <strong>retest</strong><br/><span style="color: #cf2828; font-size: 3px; color: red; color: red !important; font-size: 100px       !important; font-size: 100%; font-size: 100px; font-size: 100; font-size: 100 %; color: rgba(0, 0, 0, 0.1); color: rgb(35,79,255); background-color: no-repeat"> toto </span></p>"""
 
-        d = self.text_syntaxes.clean_xhtml(evil_html)
+        d = self.text_syntaxes.clean_xhtml(self.EVIL_HTML2)
         d.addCallback(self.assertEqualXML, expected)
         return d
+
+    def test_removeXHTMLMarkups(self):
+        expected = """ a link another link a paragraph secret EVIL! of EVIL! Password: annoying EVIL!spam spam SPAM! """
+        result = self.text_syntaxes._removeMarkups(self.EVIL_HTML1)
+        self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
+
+        expected = """test retest toto"""
+        result = self.text_syntaxes._removeMarkups(self.EVIL_HTML2)
+        self.assertEqual(re.sub(r"\s+", " ", result).rstrip(), expected.rstrip())
+