Skip to content

Commit 1c94026

Browse files
committed
BUG: Don't overflow PeriodIndex in to_csv
Closes pandas-devgh-15982.
1 parent c4d71ce commit 1c94026

File tree

3 files changed

+33
-4
lines changed

3 files changed

+33
-4
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,7 @@ I/O
12601260
- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`)
12611261
- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`)
12621262
- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`)
1263+
- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`)
12631264
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
12641265
- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`)
12651266
- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`)

pandas/formats/format.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1564,10 +1564,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
15641564
self.chunksize = int(chunksize)
15651565

15661566
self.data_index = obj.index
1567-
if isinstance(obj.index, PeriodIndex):
1568-
self.data_index = obj.index.to_timestamp()
1569-
1570-
if (isinstance(self.data_index, DatetimeIndex) and
1567+
if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and
15711568
date_format is not None):
15721569
self.data_index = Index([x.strftime(date_format) if notnull(x) else
15731570
'' for x in self.data_index])

pandas/tests/frame/test_to_csv.py

+31
Original file line numberDiff line numberDiff line change
@@ -1143,3 +1143,34 @@ def test_to_csv_quoting(self):
11431143
df = df.set_index(['a', 'b'])
11441144
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
11451145
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
1146+
1147+
def test_period_index_date_overflow(self):
1148+
# see gh-15982
1149+
1150+
dates = ["1990-01-01", "2000-01-01", "3005-01-01"]
1151+
index = pd.PeriodIndex(dates, freq="D")
1152+
df = pd.DataFrame([4, 5, 6], index=index)
1153+
1154+
buf = StringIO()
1155+
df.to_csv(buf)
1156+
1157+
expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n'
1158+
self.assertEqual(buf.getvalue(), expected)
1159+
1160+
buf = StringIO()
1161+
date_format = "%m-%d-%Y"
1162+
df.to_csv(buf, date_format=date_format)
1163+
1164+
expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n'
1165+
self.assertEqual(buf.getvalue(), expected)
1166+
1167+
# Overflow with pd.NaT
1168+
dates = ["1990-01-01", pd.NaT, "3005-01-01"]
1169+
index = pd.PeriodIndex(dates, freq="D")
1170+
df = pd.DataFrame([4, 5, 6], index=index)
1171+
1172+
buf = StringIO()
1173+
df.to_csv(buf)
1174+
1175+
expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n'
1176+
self.assertEqual(buf.getvalue(), expected)

0 commit comments

Comments
 (0)