Skip to content

Commit 2126601

Browse files
committed
BUG: Don't overflow PeriodIndex in to_csv
Closes pandas-devgh-15982.
1 parent f2ed595 commit 2126601

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,7 @@ I/O
13441344
- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`)
13451345
- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`)
13461346
- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`)
1347+
- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`)
13471348
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
13481349
- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`)
13491350
- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`)

pandas/formats/format.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1564,10 +1564,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
15641564
self.chunksize = int(chunksize)
15651565

15661566
self.data_index = obj.index
1567-
if isinstance(obj.index, PeriodIndex):
1568-
self.data_index = obj.index.to_timestamp()
1569-
1570-
if (isinstance(self.data_index, DatetimeIndex) and
1567+
if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and
15711568
date_format is not None):
15721569
self.data_index = Index([x.strftime(date_format) if notnull(x) else
15731570
'' for x in self.data_index])

pandas/tests/frame/test_to_csv.py

+28
Original file line numberDiff line numberDiff line change
@@ -1143,3 +1143,31 @@ def test_to_csv_quoting(self):
11431143
df = df.set_index(['a', 'b'])
11441144
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
11451145
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
1146+
1147+
def test_period_index_date_overflow(self):
1148+
# see gh-15982
1149+
1150+
dates = ["1990-01-01", "2000-01-01", "3005-01-01"]
1151+
index = pd.PeriodIndex(dates, freq="D")
1152+
1153+
df = pd.DataFrame([4, 5, 6], index=index)
1154+
result = df.to_csv()
1155+
1156+
expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n'
1157+
assert result == expected
1158+
1159+
date_format = "%m-%d-%Y"
1160+
result = df.to_csv(date_format=date_format)
1161+
1162+
expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n'
1163+
assert result == expected
1164+
1165+
# Overflow with pd.NaT
1166+
dates = ["1990-01-01", pd.NaT, "3005-01-01"]
1167+
index = pd.PeriodIndex(dates, freq="D")
1168+
1169+
df = pd.DataFrame([4, 5, 6], index=index)
1170+
result = df.to_csv()
1171+
1172+
expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n'
1173+
assert result == expected

0 commit comments

Comments
 (0)