@@ -439,15 +439,24 @@ def test_parse_trim_buffers(self):
439
439
440
440
# Generate the expected output: manually create the dataframe
441
441
# by splitting by comma and repeating the `n_lines` times.
442
- row = tuple (val_ if val_ else float ( " nan" )
442
+ row = tuple (val_ if val_ else np . nan
443
443
for val_ in record_ .split ("," ))
444
444
expected = pd .DataFrame ([row for _ in range (n_lines )],
445
445
dtype = object , columns = None , index = None )
446
446
447
447
# Iterate over the CSV file in chunks of `chunksize` lines
448
448
chunks_ = self .read_csv (StringIO (csv_data ), header = None ,
449
449
dtype = object , chunksize = chunksize )
450
- result = pd .concat (chunks_ , axis = 0 , ignore_index = True )
450
+ result1 = pd .concat (chunks_ , axis = 0 , ignore_index = True )
451
451
452
452
# Check for data corruption if there was no segfault
453
- tm .assert_frame_equal (result , expected )
453
+ tm .assert_frame_equal (result1 , expected )
454
+
455
+ # This extra test was added to replicate the fault in #5291.
456
+ # Force 'utf-8' encoding, so that `_string_convert` would take
457
+ # a different execution branch.
458
+ chunks_ = self .read_csv (StringIO (csv_data ), header = None ,
459
+ dtype = object , chunksize = chunksize ,
460
+ encoding = 'utf_8' )
461
+ result2 = pd .concat (chunks_ , axis = 0 , ignore_index = True )
462
+ tm .assert_frame_equal (result2 , expected )
0 commit comments