@@ -147,7 +147,9 @@ def test_unsupported_dtype(c_parser_only, match, kwargs):
147
147
148
148
@td .skip_if_32bit
149
149
@pytest .mark .slow
150
- def test_precise_conversion (c_parser_only ):
150
+ # test numbers between 1 and 2
151
+ @pytest .mark .parametrize ("num" , np .linspace (1.0 , 2.0 , num = 21 ))
152
+ def test_precise_conversion (c_parser_only , num ):
151
153
parser = c_parser_only
152
154
153
155
normal_errors = []
@@ -156,27 +158,23 @@ def test_precise_conversion(c_parser_only):
156
158
def error (val : float , actual_val : Decimal ) -> Decimal :
157
159
return abs (Decimal (f"{ val :.100} " ) - actual_val )
158
160
159
- # test numbers between 1 and 2
160
- for num in np .linspace (1.0 , 2.0 , num = 500 ):
161
- # 25 decimal digits of precision
162
- text = f"a\n { num :.25} "
161
+ # 25 decimal digits of precision
162
+ text = f"a\n { num :.25} "
163
163
164
- normal_val = float (
165
- parser .read_csv (StringIO (text ), float_precision = "legacy" )["a" ][0 ]
166
- )
167
- precise_val = float (
168
- parser .read_csv (StringIO (text ), float_precision = "high" )["a" ][0 ]
169
- )
170
- roundtrip_val = float (
171
- parser .read_csv (StringIO (text ), float_precision = "round_trip" )["a" ][0 ]
172
- )
173
- actual_val = Decimal (text [2 :])
164
+ normal_val = float (
165
+ parser .read_csv (StringIO (text ), float_precision = "legacy" )["a" ][0 ]
166
+ )
167
+ precise_val = float (parser .read_csv (StringIO (text ), float_precision = "high" )["a" ][0 ])
168
+ roundtrip_val = float (
169
+ parser .read_csv (StringIO (text ), float_precision = "round_trip" )["a" ][0 ]
170
+ )
171
+ actual_val = Decimal (text [2 :])
174
172
175
- normal_errors .append (error (normal_val , actual_val ))
176
- precise_errors .append (error (precise_val , actual_val ))
173
+ normal_errors .append (error (normal_val , actual_val ))
174
+ precise_errors .append (error (precise_val , actual_val ))
177
175
178
- # round-trip should match float()
179
- assert roundtrip_val == float (text [2 :])
176
+ # round-trip should match float()
177
+ assert roundtrip_val == float (text [2 :])
180
178
181
179
assert sum (precise_errors ) <= sum (normal_errors )
182
180
assert max (precise_errors ) <= max (normal_errors )
@@ -287,7 +285,8 @@ def test_tokenize_CR_with_quoting(c_parser_only):
287
285
288
286
289
287
@pytest .mark .slow
290
- def test_grow_boundary_at_cap (c_parser_only ):
288
+ @pytest .mark .parametrize ("count" , [3 * 2 ** n for n in range (6 )])
289
+ def test_grow_boundary_at_cap (c_parser_only , count ):
291
290
# See gh-12494
292
291
#
293
292
# Cause of error was that the C parser
@@ -296,19 +295,18 @@ def test_grow_boundary_at_cap(c_parser_only):
296
295
# to capacity, which would later cause a
297
296
# buffer overflow error when checking the
298
297
# EOF terminator of the CSV stream.
298
+ # 3 * 2^n commas was observed to break the parser
299
299
parser = c_parser_only
300
300
301
- def test_empty_header_read (count ):
302
- with StringIO ("," * count ) as s :
303
- expected = DataFrame (columns = [f"Unnamed: { i } " for i in range (count + 1 )])
304
- df = parser .read_csv (s )
305
- tm .assert_frame_equal (df , expected )
306
-
307
- for cnt in range (1 , 101 ):
308
- test_empty_header_read (cnt )
301
+ with StringIO ("," * count ) as s :
302
+ expected = DataFrame (columns = [f"Unnamed: { i } " for i in range (count + 1 )])
303
+ df = parser .read_csv (s )
304
+ tm .assert_frame_equal (df , expected )
309
305
310
306
311
- def test_parse_trim_buffers (c_parser_only ):
307
+ @pytest .mark .slow
308
+ @pytest .mark .parametrize ("encoding" , [None , "utf-8" ])
309
+ def test_parse_trim_buffers (c_parser_only , encoding ):
312
310
# This test is part of a bugfix for gh-13703. It attempts to
313
311
# to stress the system memory allocator, to cause it to move the
314
312
# stream buffer and either let the OS reclaim the region, or let
@@ -319,6 +317,9 @@ def test_parse_trim_buffers(c_parser_only):
319
317
# times it fails due to memory corruption, which causes the
320
318
# loaded DataFrame to differ from the expected one.
321
319
320
+ # Also force 'utf-8' encoding, so that `_string_convert` would take
321
+ # a different execution branch.
322
+
322
323
parser = c_parser_only
323
324
324
325
# Generate a large mixed-type CSV file on-the-fly (one record is
@@ -374,25 +375,16 @@ def test_parse_trim_buffers(c_parser_only):
374
375
)
375
376
376
377
# Iterate over the CSV file in chunks of `chunksize` lines
377
- with parser .read_csv (
378
- StringIO (csv_data ), header = None , dtype = object , chunksize = chunksize
379
- ) as chunks_ :
380
- result = concat (chunks_ , axis = 0 , ignore_index = True )
381
-
382
- # Check for data corruption if there was no segfault
383
- tm .assert_frame_equal (result , expected )
384
-
385
- # This extra test was added to replicate the fault in gh-5291.
386
- # Force 'utf-8' encoding, so that `_string_convert` would take
387
- # a different execution branch.
388
378
with parser .read_csv (
389
379
StringIO (csv_data ),
390
380
header = None ,
391
381
dtype = object ,
392
382
chunksize = chunksize ,
393
- encoding = "utf_8" ,
383
+ encoding = encoding ,
394
384
) as chunks_ :
395
385
result = concat (chunks_ , axis = 0 , ignore_index = True )
386
+
387
+ # Check for data corruption if there was no segfault
396
388
tm .assert_frame_equal (result , expected )
397
389
398
390
0 commit comments