Skip to content

Commit 2bd1311

Browse files
committed
TST: Add another test for segfault in C engine
xref pandas-devgh-13833. Closes pandas-devgh-5291.
1 parent f7f214b commit 2bd1311

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

pandas/tests/io/parser/c_parser_only.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,11 @@ def test_empty_header_read(count):
290290
test_empty_header_read(count)
291291

292292
def test_parse_trim_buffers(self):
293-
# This test is part of a bugfix for issue #13703. It attmepts to
293+
# This test is part of a bugfix for issue #13703. It attempts to
294294
# to stress the system memory allocator, to cause it to move the
295295
# stream buffer and either let the OS reclaim the region, or let
296296
# other memory requests of parser otherwise modify the contents
297-
# of memory space, where it was formely located.
297+
# of memory space, where it was formally located.
298298
# This test is designed to cause a `segfault` with unpatched
299299
# `tokenizer.c`. Sometimes the test fails on `segfault`, other
300300
# times it fails due to memory corruption, which causes the
@@ -346,7 +346,7 @@ def test_parse_trim_buffers(self):
346346

347347
# Generate the expected output: manually create the dataframe
348348
# by splitting by comma and repeating the `n_lines` times.
349-
row = tuple(val_ if val_ else float("nan")
349+
row = tuple(val_ if val_ else np.nan
350350
for val_ in record_.split(","))
351351
expected = pd.DataFrame([row for _ in range(n_lines)],
352352
dtype=object, columns=None, index=None)
@@ -359,6 +359,15 @@ def test_parse_trim_buffers(self):
359359
# Check for data corruption if there was no segfault
360360
tm.assert_frame_equal(result, expected)
361361

362+
# This extra test was added to replicate the fault in gh-5291.
363+
# Force 'utf-8' encoding, so that `_string_convert` would take
364+
# a different execution branch.
365+
chunks_ = self.read_csv(StringIO(csv_data), header=None,
366+
dtype=object, chunksize=chunksize,
367+
encoding='utf_8')
368+
result = pd.concat(chunks_, axis=0, ignore_index=True)
369+
tm.assert_frame_equal(result, expected)
370+
362371
def test_internal_null_byte(self):
363372
# see gh-14012
364373
#

0 commit comments

Comments
 (0)