4
4
import warnings
5
5
import re
6
6
7
+ import pytest
7
8
from six import unichr
8
9
9
- from .support import get_data_files
10
-
11
10
from html5lib .tokenizer import HTMLTokenizer
12
11
from html5lib import constants , utils
13
12
@@ -172,27 +171,6 @@ def repl(m):
172
171
return test
173
172
174
173
175
- def runTokenizerTest (test ):
176
- warnings .resetwarnings ()
177
- warnings .simplefilter ("error" )
178
-
179
- expected = test ['output' ]
180
- if 'lastStartTag' not in test :
181
- test ['lastStartTag' ] = None
182
- parser = TokenizerTestParser (test ['initialState' ],
183
- test ['lastStartTag' ])
184
- tokens = parser .parse (test ['input' ])
185
- received = normalizeTokens (tokens )
186
- errorMsg = "\n " .join (["\n \n Initial state:" ,
187
- test ['initialState' ],
188
- "\n Input:" , test ['input' ],
189
- "\n Expected:" , repr (expected ),
190
- "\n received:" , repr (tokens )])
191
- errorMsg = errorMsg
192
- ignoreErrorOrder = test .get ('ignoreErrorOrder' , False )
193
- assert tokensMatch (expected , received , ignoreErrorOrder , True ), errorMsg
194
-
195
-
196
174
def _doCapitalize (match ):
197
175
return match .group (1 ).upper ()
198
176
@@ -205,18 +183,68 @@ def capitalize(s):
205
183
return s
206
184
207
185
208
- def testTokenizer ( ):
209
- for filename in get_data_files ( 'tokenizer' , '*.test' ):
210
- with open (filename ) as fp :
186
+ class TokenizerFile ( pytest . File ):
187
+ def collect ( self ):
188
+ with open (str ( self . fspath ), "rb" ) as fp :
211
189
tests = json .load (fp )
212
- if 'tests' in tests :
213
- for index , test in enumerate (tests ['tests' ]):
214
- if 'initialStates' not in test :
215
- test ["initialStates" ] = ["Data state" ]
216
- if 'doubleEscaped' in test :
217
- test = unescape (test )
218
- if test ["input" ] is None :
219
- continue # Not valid input for this platform
220
- for initialState in test ["initialStates" ]:
221
- test ["initialState" ] = capitalize (initialState )
222
- yield runTokenizerTest , test
190
+ if 'tests' in tests :
191
+ for i , test in enumerate (tests ['tests' ]):
192
+ yield TokenizerTestCollector (str (i ), self , testdata = test )
193
+
194
+
195
+ class TokenizerTestCollector (pytest .Collector ):
196
+ def __init__ (self , name , parent = None , config = None , session = None , testdata = None ):
197
+ super (TokenizerTestCollector , self ).__init__ (name , parent , config , session )
198
+ if 'initialStates' not in testdata :
199
+ testdata ["initialStates" ] = ["Data state" ]
200
+ if 'doubleEscaped' in testdata :
201
+ testdata = unescape (testdata )
202
+ self .testdata = testdata
203
+
204
+ def collect (self ):
205
+ for initialState in self .testdata ["initialStates" ]:
206
+ initialState = capitalize (initialState )
207
+ item = TokenizerTest (initialState ,
208
+ self ,
209
+ self .testdata ,
210
+ initialState )
211
+ if self .testdata ["input" ] is None :
212
+ item .add_marker (pytest .mark .skipif (True , reason = "Relies on lone surrogates" ))
213
+ yield item
214
+
215
+
216
+ class TokenizerTest (pytest .Item ):
217
+ def __init__ (self , name , parent , test , initialState ):
218
+ super (TokenizerTest , self ).__init__ (name , parent )
219
+ self .obj = lambda : 1 # this is to hack around skipif needing a function!
220
+ self .test = test
221
+ self .initialState = initialState
222
+
223
+ def runtest (self ):
224
+ warnings .resetwarnings ()
225
+ warnings .simplefilter ("error" )
226
+
227
+ expected = self .test ['output' ]
228
+ if 'lastStartTag' not in self .test :
229
+ self .test ['lastStartTag' ] = None
230
+ parser = TokenizerTestParser (self .initialState ,
231
+ self .test ['lastStartTag' ])
232
+ tokens = parser .parse (self .test ['input' ])
233
+ received = normalizeTokens (tokens )
234
+ errorMsg = "\n " .join (["\n \n Initial state:" ,
235
+ self .initialState ,
236
+ "\n Input:" , self .test ['input' ],
237
+ "\n Expected:" , repr (expected ),
238
+ "\n received:" , repr (tokens )])
239
+ errorMsg = errorMsg
240
+ ignoreErrorOrder = self .test .get ('ignoreErrorOrder' , False )
241
+ assert tokensMatch (expected , received , ignoreErrorOrder , True ), errorMsg
242
+
243
+ def repr_failure (self , excinfo ):
244
+ traceback = excinfo .traceback
245
+ ntraceback = traceback .cut (path = __file__ )
246
+ excinfo .traceback = ntraceback .filter ()
247
+
248
+ return excinfo .getrepr (funcargs = True ,
249
+ showlocals = False ,
250
+ style = "short" , tbfilter = False )
0 commit comments