Skip to content

Commit c72d1aa

Browse files
committed
Fix pandas-dev#25099 set na_rep values before converting to string to prevent data truncation
1 parent c6a7cc1 commit c72d1aa

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

pandas/core/internals/blocks.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,10 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None,
712712
mask = isna(values)
713713

714714
if not self.is_object and not quoting:
715-
values = values.astype(str)
715+
if na_rep and isinstance(na_rep, str):
716+
values = values.astype("<U{length}".format(length=len(na_rep)))
717+
else:
718+
values = values.astype(str)
716719
else:
717720
values = np.array(values, dtype='object')
718721

pandas/tests/io/formats/test_to_csv.py

+16
Original file line numberDiff line numberDiff line change
@@ -534,3 +534,19 @@ def test_to_csv_compression(self, compression_only,
534534
result = pd.read_csv(path, index_col=0,
535535
compression=read_compression)
536536
tm.assert_frame_equal(result, df)
537+
538+
def test_to_csv_na_rep_long_string(self, capsys):
539+
# see gh-25099
540+
df = pd.DataFrame({"c": [float('nan')] * 3})
541+
df = df.astype("Int64")
542+
expected_rows = ['c',
543+
'mynull',
544+
'mynull',
545+
'mynull']
546+
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
547+
548+
df.to_csv(sys.stdout, index=False, na_rep='mynull', encoding='ascii')
549+
captured = capsys.readouterr()
550+
551+
assert captured.out == expected_ascii
552+
assert not sys.stdout.closed

0 commit comments

Comments
 (0)