|
7 | 7 | except ImportError: |
8 | 8 | import simplejson as json |
9 | 9 |
|
10 | | -from support import html5lib_test_files |
11 | 10 | from html5lib import html5parser, sanitizer, constants |
12 | 11 |
|
13 | | -class SanitizeTest(unittest.TestCase): |
14 | | - def addTest(cls, name, expected, input): |
15 | | - def test(self, expected=expected, input=input): |
16 | | - expected = ''.join([token.toxml() for token in html5parser.HTMLParser(). |
17 | | - parseFragment(expected).childNodes]) |
18 | | - expected = json.loads(json.dumps(expected)) |
19 | | - self.assertEqual(expected, self.sanitize_html(input)) |
20 | | - setattr(cls, name, test) |
21 | | - addTest = classmethod(addTest) |
| 12 | +def runSanitizerTest(name, expected, input): |
| 13 | + expected = ''.join([token.toxml() for token in html5parser.HTMLParser(). |
| 14 | + parseFragment(expected).childNodes]) |
| 15 | + expected = json.loads(json.dumps(expected)) |
| 16 | + assert expected == sanitize_html(input) |
22 | 17 |
|
23 | | - def sanitize_html(self,stream): |
| 18 | +def sanitize_html(stream): |
24 | 19 | return ''.join([token.toxml() for token in |
25 | | - html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
26 | | - parseFragment(stream).childNodes]) |
27 | | - |
28 | | - def test_should_handle_astral_plane_characters(self): |
29 | | - self.assertEqual(u"<p>\U0001d4b5 \U0001d538</p>", |
30 | | - self.sanitize_html("<p>𝒵 𝔸</p>")) |
31 | | - |
32 | | -for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
33 | | - if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: continue ### TODO |
34 | | - if tag_name != tag_name.lower(): continue ### TODO |
35 | | - if tag_name == 'image': |
36 | | - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
37 | | - "<img title=\"1\"/>foo <bad>bar</bad> baz", |
38 | | - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
39 | | - elif tag_name == 'br': |
40 | | - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
41 | | - "<br title=\"1\"/>foo <bad>bar</bad> baz<br/>", |
| 20 | + html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
| 21 | + parseFragment(stream).childNodes]) |
| 22 | + |
| 23 | +def test_should_handle_astral_plane_characters(): |
| 24 | + assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>𝒵 𝔸</p>") |
| 25 | + |
| 26 | +def test_sanitizer(): |
| 27 | + for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
| 28 | + if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: |
| 29 | + continue ### TODO |
| 30 | + if tag_name != tag_name.lower(): |
| 31 | + continue ### TODO |
| 32 | + if tag_name == 'image': |
| 33 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 34 | + "<img title=\"1\"/>foo <bad>bar</bad> baz", |
| 35 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 36 | + elif tag_name == 'br': |
| 37 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 38 | + "<br title=\"1\"/>foo <bad>bar</bad> baz<br/>", |
| 39 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 40 | + elif tag_name in constants.voidElements: |
| 41 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 42 | + "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, |
| 43 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 44 | + else: |
| 45 | + yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, |
| 46 | + "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
| 47 | + "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
| 48 | + |
| 49 | + for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
| 50 | + tag_name = tag_name.upper() |
| 51 | + yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name, |
| 52 | + "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
42 | 53 | "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
43 | | - elif tag_name in constants.voidElements: |
44 | | - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
45 | | - "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, |
46 | | - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
47 | | - else: |
48 | | - SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name, |
49 | | - "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
50 | | - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
51 | | - |
52 | | -for tag_name in sanitizer.HTMLSanitizer.allowed_elements: |
53 | | - tag_name = tag_name.upper() |
54 | | - SanitizeTest.addTest("test_should_forbid_%s_tag" % tag_name, |
55 | | - "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name), |
56 | | - "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name)) |
57 | | - |
58 | | -for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
59 | | - if attribute_name != attribute_name.lower(): continue ### TODO |
60 | | - if attribute_name == 'style': continue |
61 | | - SanitizeTest.addTest("test_should_allow_%s_attribute" % attribute_name, |
62 | | - "<p %s=\"foo\">foo <bad>bar</bad> baz</p>" % attribute_name, |
63 | | - "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name) |
64 | | - |
65 | | -for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
66 | | - attribute_name = attribute_name.upper() |
67 | | - SanitizeTest.addTest("test_should_forbid_%s_attribute" % attribute_name, |
68 | | - "<p>foo <bad>bar</bad> baz</p>", |
69 | | - "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name) |
70 | | - |
71 | | -for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
72 | | - SanitizeTest.addTest("test_should_allow_%s_uris" % protocol, |
73 | | - "<a href=\"%s\">foo</a>" % protocol, |
74 | | - """<a href="%s">foo</a>""" % protocol) |
75 | | - |
76 | | -for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
77 | | - SanitizeTest.addTest("test_should_allow_uppercase_%s_uris" % protocol, |
78 | | - "<a href=\"%s\">foo</a>" % protocol, |
79 | | - """<a href="%s">foo</a>""" % protocol) |
80 | | - |
81 | | -def buildTestSuite(): |
82 | | - for filename in html5lib_test_files("sanitizer"): |
83 | | - for test in json.load(file(filename)): |
84 | | - SanitizeTest.addTest('test_' + test['name'], test['output'], test['input']) |
85 | | - |
86 | | - return unittest.TestLoader().loadTestsFromTestCase(SanitizeTest) |
87 | | - |
88 | | -def sanitize_html(stream): |
89 | | - return ''.join([token.toxml() for token in |
90 | | - html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer). |
91 | | - parseFragment(stream).childNodes]) |
92 | | - |
93 | | -def main(): |
94 | | - buildTestSuite() |
95 | | - unittest.main() |
96 | 54 |
|
97 | | -if __name__ == "__main__": |
98 | | - main() |
| 55 | + for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
| 56 | + if attribute_name != attribute_name.lower(): continue ### TODO |
| 57 | + if attribute_name == 'style': continue |
| 58 | + yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name, |
| 59 | + "<p %s=\"foo\">foo <bad>bar</bad> baz</p>" % attribute_name, |
| 60 | + "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name) |
| 61 | + |
| 62 | + for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes: |
| 63 | + attribute_name = attribute_name.upper() |
| 64 | + yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name, |
| 65 | + "<p>foo <bad>bar</bad> baz</p>", |
| 66 | + "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name) |
| 67 | + |
| 68 | + for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
| 69 | + yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol, |
| 70 | + "<a href=\"%s\">foo</a>" % protocol, |
| 71 | + """<a href="%s">foo</a>""" % protocol) |
| 72 | + |
| 73 | + for protocol in sanitizer.HTMLSanitizer.allowed_protocols: |
| 74 | + yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol, |
| 75 | + "<a href=\"%s\">foo</a>" % protocol, |
| 76 | + """<a href="%s">foo</a>""" % protocol) |
0 commit comments