|
2 | 2 |
|
3 | 3 | import os
|
4 | 4 | import json
|
5 |
| -import unittest |
6 | 5 |
|
7 |
| -from .support import get_data_files |
| 6 | +import pytest |
8 | 7 |
|
9 |
| -try: |
10 |
| - unittest.TestCase.assertEqual |
11 |
| -except AttributeError: |
12 |
| - unittest.TestCase.assertEqual = unittest.TestCase.assertEquals |
| 8 | +from .support import get_data_files |
13 | 9 |
|
14 |
| -import html5lib |
15 | 10 | from html5lib import constants
|
16 | 11 | from html5lib.filters.lint import Filter as Lint
|
17 | 12 | from html5lib.serializer import HTMLSerializer, serialize
|
@@ -102,70 +97,83 @@ def runSerializerTest(input, expected, options):
|
102 | 97 | assert False, "Expected: %s, Received: %s" % (expected, result)
|
103 | 98 |
|
104 | 99 |
|
105 |
| -class EncodingTestCase(unittest.TestCase): |
106 |
| - def throwsWithLatin1(self, input): |
107 |
| - self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"}) |
| 100 | +def throwsWithLatin1(input): |
| 101 | + with pytest.raises(UnicodeEncodeError): |
| 102 | + serialize_html(input, {"encoding": "iso-8859-1"}) |
| 103 | + |
| 104 | + |
| 105 | +def testDoctypeName(): |
| 106 | + throwsWithLatin1([["Doctype", "\u0101"]]) |
| 107 | + |
| 108 | + |
| 109 | +def testDoctypePublicId(): |
| 110 | + throwsWithLatin1([["Doctype", "potato", "\u0101"]]) |
| 111 | + |
| 112 | + |
| 113 | +def testDoctypeSystemId(): |
| 114 | + throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) |
| 115 | + |
| 116 | + |
| 117 | +def testCdataCharacters(): |
| 118 | + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], |
| 119 | + ["<style>ā"], {"encoding": "iso-8859-1"}) |
| 120 | + |
| 121 | + |
| 122 | +def testCharacters(): |
| 123 | + runSerializerTest([["Characters", "\u0101"]], |
| 124 | + ["ā"], {"encoding": "iso-8859-1"}) |
| 125 | + |
| 126 | + |
| 127 | +def testStartTagName(): |
| 128 | + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) |
| 129 | + |
108 | 130 |
|
109 |
| - def testDoctypeName(self): |
110 |
| - self.throwsWithLatin1([["Doctype", "\u0101"]]) |
| 131 | +def testAttributeName(): |
| 132 | + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) |
111 | 133 |
|
112 |
| - def testDoctypePublicId(self): |
113 |
| - self.throwsWithLatin1([["Doctype", "potato", "\u0101"]]) |
114 | 134 |
|
115 |
| - def testDoctypeSystemId(self): |
116 |
| - self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) |
| 135 | +def testAttributeValue(): |
| 136 | + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", |
| 137 | + [{"namespace": None, "name": "potato", "value": "\u0101"}]]], |
| 138 | + ["<span potato=ā>"], {"encoding": "iso-8859-1"}) |
117 | 139 |
|
118 |
| - def testCdataCharacters(self): |
119 |
| - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], |
120 |
| - ["<style>ā"], {"encoding": "iso-8859-1"}) |
121 | 140 |
|
122 |
| - def testCharacters(self): |
123 |
| - runSerializerTest([["Characters", "\u0101"]], |
124 |
| - ["ā"], {"encoding": "iso-8859-1"}) |
| 141 | +def testEndTagName(): |
| 142 | + throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) |
125 | 143 |
|
126 |
| - def testStartTagName(self): |
127 |
| - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) |
128 | 144 |
|
129 |
| - def testAttributeName(self): |
130 |
| - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) |
| 145 | +def testComment(): |
| 146 | + throwsWithLatin1([["Comment", "\u0101"]]) |
131 | 147 |
|
132 |
| - def testAttributeValue(self): |
133 |
| - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", |
134 |
| - [{"namespace": None, "name": "potato", "value": "\u0101"}]]], |
135 |
| - ["<span potato=ā>"], {"encoding": "iso-8859-1"}) |
136 | 148 |
|
137 |
| - def testEndTagName(self): |
138 |
| - self.throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) |
| 149 | +@pytest.fixture |
| 150 | +def lxml_parser(): |
| 151 | + return etree.XMLParser(resolve_entities=False) |
139 | 152 |
|
140 |
| - def testComment(self): |
141 |
| - self.throwsWithLatin1([["Comment", "\u0101"]]) |
142 | 153 |
|
| 154 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 155 | +def testEntityReplacement(lxml_parser): |
| 156 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
| 157 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 158 | + result = serialize(tree, tree="lxml", omit_optional_tags=False) |
| 159 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>' |
143 | 160 |
|
144 |
| -if "lxml" in optionals_loaded: |
145 |
| - class LxmlTestCase(unittest.TestCase): |
146 |
| - def setUp(self): |
147 |
| - self.parser = etree.XMLParser(resolve_entities=False) |
148 |
| - self.treewalker = html5lib.getTreeWalker("lxml") |
149 |
| - self.serializer = HTMLSerializer() |
150 | 161 |
|
151 |
| - def testEntityReplacement(self): |
152 |
| - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" |
153 |
| - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
154 |
| - result = serialize(tree, tree="lxml", omit_optional_tags=False) |
155 |
| - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result) |
| 162 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 163 | +def testEntityXML(lxml_parser): |
| 164 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' |
| 165 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 166 | + result = serialize(tree, tree="lxml", omit_optional_tags=False) |
| 167 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' |
156 | 168 |
|
157 |
| - def testEntityXML(self): |
158 |
| - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""" |
159 |
| - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
160 |
| - result = serialize(tree, tree="lxml", omit_optional_tags=False) |
161 |
| - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""", result) |
162 | 169 |
|
163 |
| - def testEntityNoResolve(self): |
164 |
| - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" |
165 |
| - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
166 |
| - result = serialize(tree, tree="lxml", omit_optional_tags=False, |
167 |
| - resolve_entities=False) |
168 |
| - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""", result) |
| 170 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 171 | +def testEntityNoResolve(lxml_parser): |
| 172 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
| 173 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 174 | + result = serialize(tree, tree="lxml", omit_optional_tags=False, |
| 175 | + resolve_entities=False) |
| 176 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
169 | 177 |
|
170 | 178 |
|
171 | 179 | def test_serializer():
|
|
0 commit comments