Skip to content

Commit 5befb0e

Browse files
committed
TST: expanded 'test_parse_trim_buffers' to cover issue pandas-dev#5291
1 parent d4f95fd commit 5befb0e

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

pandas/io/tests/parser/c_parser_only.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -439,15 +439,24 @@ def test_parse_trim_buffers(self):
439439

440440
# Generate the expected output: manually create the dataframe
441441
# by splitting by comma and repeating the `n_lines` times.
442-
row = tuple(val_ if val_ else float("nan")
442+
row = tuple(val_ if val_ else np.nan
443443
for val_ in record_.split(","))
444444
expected = pd.DataFrame([row for _ in range(n_lines)],
445445
dtype=object, columns=None, index=None)
446446

447447
# Iterate over the CSV file in chunks of `chunksize` lines
448448
chunks_ = self.read_csv(StringIO(csv_data), header=None,
449449
dtype=object, chunksize=chunksize)
450-
result = pd.concat(chunks_, axis=0, ignore_index=True)
450+
result1 = pd.concat(chunks_, axis=0, ignore_index=True)
451451

452452
# Check for data corruption if there was no segfault
453-
tm.assert_frame_equal(result, expected)
453+
tm.assert_frame_equal(result1, expected)
454+
455+
# This extra test was added to replicate the fault in #5291.
456+
# Force 'utf-8' encoding, so that `_string_convert` would take
457+
# a different execution branch.
458+
chunks_ = self.read_csv(StringIO(csv_data), header=None,
459+
dtype=object, chunksize=chunksize,
460+
encoding='utf_8')
461+
result2 = pd.concat(chunks_, axis=0, ignore_index=True)
462+
tm.assert_frame_equal(result2, expected)

0 commit comments

Comments
 (0)