Skip to content

Commit 789a98b

Browse files
kinowBruno P. Kinoshita
authored and
Bruno P. Kinoshita
committed
Fix pandas-dev#25099 set na_rep values before converting to string to prevent data truncation
1 parent 2d65e38 commit 789a98b

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

pandas/core/internals/blocks.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,10 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
709709
mask = isna(values)
710710

711711
if not self.is_object and not quoting:
712-
values = values.astype(str)
712+
if na_rep and isinstance(na_rep, str):
713+
values = values.astype("<U{length}".format(length=len(na_rep)))
714+
else:
715+
values = values.astype(str)
713716
else:
714717
values = np.array(values, dtype="object")
715718

pandas/tests/io/formats/test_to_csv.py

+16
Original file line numberDiff line numberDiff line change
@@ -555,3 +555,19 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
555555
assert len(zp.filelist) == 1
556556
archived_file = os.path.basename(zp.filelist[0].filename)
557557
assert archived_file == expected_arcname
558+
559+
def test_to_csv_na_rep_long_string(self, capsys):
560+
# see gh-25099
561+
df = pd.DataFrame({"c": [float('nan')] * 3})
562+
df = df.astype("Int64")
563+
expected_rows = ['c',
564+
'mynull',
565+
'mynull',
566+
'mynull']
567+
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
568+
569+
df.to_csv(sys.stdout, index=False, na_rep='mynull', encoding='ascii')
570+
captured = capsys.readouterr()
571+
572+
assert captured.out == expected_ascii
573+
assert not sys.stdout.closed

0 commit comments

Comments
 (0)