Skip to content

Commit 7357bea

Browse files
committed
Generate tokenizer tests from testdata files
1 parent ebcc17b commit 7357bea

File tree

3 files changed

+84
-51
lines changed

3 files changed

+84
-51
lines changed

.pytest.expect

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@ pytest-expect file v1
22
(2, 7, 11, 'final', 0)
33
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
44
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
5-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4718]': FAIL
6-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4990]': FAIL
7-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4993]': FAIL
8-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4994]': FAIL
9-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4996]': FAIL
10-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4997]': FAIL
11-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[4999]': FAIL
12-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5002]': FAIL
13-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5003]': FAIL
14-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5005]': FAIL
15-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5006]': FAIL
16-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5008]': FAIL
17-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5020]': FAIL
18-
b'html5lib/tests/test_tokenizer.py::testTokenizer::[5418]': FAIL
5+
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
6+
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
7+
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
8+
u'html5lib/tests/testdata/tokenizer/test3.test::232::dataState': FAIL
9+
u'html5lib/tests/testdata/tokenizer/test3.test::234::dataState': FAIL
10+
u'html5lib/tests/testdata/tokenizer/test3.test::235::dataState': FAIL
11+
u'html5lib/tests/testdata/tokenizer/test3.test::237::dataState': FAIL
12+
u'html5lib/tests/testdata/tokenizer/test3.test::240::dataState': FAIL
13+
u'html5lib/tests/testdata/tokenizer/test3.test::241::dataState': FAIL
14+
u'html5lib/tests/testdata/tokenizer/test3.test::243::dataState': FAIL
15+
u'html5lib/tests/testdata/tokenizer/test3.test::244::dataState': FAIL
16+
u'html5lib/tests/testdata/tokenizer/test3.test::246::dataState': FAIL
17+
u'html5lib/tests/testdata/tokenizer/test3.test::258::dataState': FAIL
18+
u'html5lib/tests/testdata/tokenizer/test3.test::656::dataState': FAIL
1919
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::namespaced': FAIL
2020
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::DOM::parser::void-namespace': FAIL
2121
u'html5lib/tests/testdata/tree-construction/foreign-fragment.dat::0::ElementTree::parser::namespaced': FAIL

html5lib/tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os.path
22

33
from .tree_construction import TreeConstructionFile
4+
from .tokenizer import TokenizerFile
45

56
_dir = os.path.abspath(os.path.dirname(__file__))
67
_testdata = os.path.join(_dir, "testdata")
78
_tree_construction = os.path.join(_testdata, "tree-construction")
9+
_tokenizer = os.path.join(_testdata, "tokenizer")
810

911

1012
def pytest_collectstart():
@@ -19,3 +21,6 @@ def pytest_collect_file(path, parent):
1921
return
2022
if path.ext == ".dat":
2123
return TreeConstructionFile(path, parent)
24+
elif dir == _tokenizer:
25+
if path.ext == ".test":
26+
return TokenizerFile(path, parent)

html5lib/tests/test_tokenizer.py renamed to html5lib/tests/tokenizer.py

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
import warnings
55
import re
66

7+
import pytest
78
from six import unichr
89

9-
from .support import get_data_files
10-
1110
from html5lib.tokenizer import HTMLTokenizer
1211
from html5lib import constants, utils
1312

@@ -172,27 +171,6 @@ def repl(m):
172171
return test
173172

174173

175-
def runTokenizerTest(test):
176-
warnings.resetwarnings()
177-
warnings.simplefilter("error")
178-
179-
expected = test['output']
180-
if 'lastStartTag' not in test:
181-
test['lastStartTag'] = None
182-
parser = TokenizerTestParser(test['initialState'],
183-
test['lastStartTag'])
184-
tokens = parser.parse(test['input'])
185-
received = normalizeTokens(tokens)
186-
errorMsg = "\n".join(["\n\nInitial state:",
187-
test['initialState'],
188-
"\nInput:", test['input'],
189-
"\nExpected:", repr(expected),
190-
"\nreceived:", repr(tokens)])
191-
errorMsg = errorMsg
192-
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
193-
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
194-
195-
196174
def _doCapitalize(match):
197175
return match.group(1).upper()
198176

@@ -205,18 +183,68 @@ def capitalize(s):
205183
return s
206184

207185

208-
def testTokenizer():
209-
for filename in get_data_files('tokenizer', '*.test'):
210-
with open(filename) as fp:
186+
class TokenizerFile(pytest.File):
187+
def collect(self):
188+
with open(str(self.fspath), "rb") as fp:
211189
tests = json.load(fp)
212-
if 'tests' in tests:
213-
for index, test in enumerate(tests['tests']):
214-
if 'initialStates' not in test:
215-
test["initialStates"] = ["Data state"]
216-
if 'doubleEscaped' in test:
217-
test = unescape(test)
218-
if test["input"] is None:
219-
continue # Not valid input for this platform
220-
for initialState in test["initialStates"]:
221-
test["initialState"] = capitalize(initialState)
222-
yield runTokenizerTest, test
190+
if 'tests' in tests:
191+
for i, test in enumerate(tests['tests']):
192+
yield TokenizerTestCollector(str(i), self, testdata=test)
193+
194+
195+
class TokenizerTestCollector(pytest.Collector):
196+
def __init__(self, name, parent=None, config=None, session=None, testdata=None):
197+
super(TokenizerTestCollector, self).__init__(name, parent, config, session)
198+
if 'initialStates' not in testdata:
199+
testdata["initialStates"] = ["Data state"]
200+
if 'doubleEscaped' in testdata:
201+
testdata = unescape(testdata)
202+
self.testdata = testdata
203+
204+
def collect(self):
205+
for initialState in self.testdata["initialStates"]:
206+
initialState = capitalize(initialState)
207+
item = TokenizerTest(initialState,
208+
self,
209+
self.testdata,
210+
initialState)
211+
if self.testdata["input"] is None:
212+
item.add_marker(pytest.mark.skipif(True, reason="Relies on lone surrogates"))
213+
yield item
214+
215+
216+
class TokenizerTest(pytest.Item):
217+
def __init__(self, name, parent, test, initialState):
218+
super(TokenizerTest, self).__init__(name, parent)
219+
self.obj = lambda: 1 # this is to hack around skipif needing a function!
220+
self.test = test
221+
self.initialState = initialState
222+
223+
def runtest(self):
224+
warnings.resetwarnings()
225+
warnings.simplefilter("error")
226+
227+
expected = self.test['output']
228+
if 'lastStartTag' not in self.test:
229+
self.test['lastStartTag'] = None
230+
parser = TokenizerTestParser(self.initialState,
231+
self.test['lastStartTag'])
232+
tokens = parser.parse(self.test['input'])
233+
received = normalizeTokens(tokens)
234+
errorMsg = "\n".join(["\n\nInitial state:",
235+
self.initialState,
236+
"\nInput:", self.test['input'],
237+
"\nExpected:", repr(expected),
238+
"\nreceived:", repr(tokens)])
239+
errorMsg = errorMsg
240+
ignoreErrorOrder = self.test.get('ignoreErrorOrder', False)
241+
assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
242+
243+
def repr_failure(self, excinfo):
244+
traceback = excinfo.traceback
245+
ntraceback = traceback.cut(path=__file__)
246+
excinfo.traceback = ntraceback.filter()
247+
248+
return excinfo.getrepr(funcargs=True,
249+
showlocals=False,
250+
style="short", tbfilter=False)

0 commit comments

Comments
 (0)