diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 07f393a814f8b..a105a6801fb61 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1344,6 +1344,7 @@ I/O - Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) +- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 2665f5aea145d..66a81aadc4213 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1564,10 +1564,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.chunksize = int(chunksize) self.data_index = obj.index - if isinstance(obj.index, PeriodIndex): - self.data_index = obj.index.to_timestamp() - - if (isinstance(self.data_index, DatetimeIndex) and + if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): self.data_index = Index([x.strftime(date_format) if notnull(x) else '' for x in self.data_index]) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bf7975bcdb964..ab5c01388e652 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1820,7 +1820,26 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): return header + result def to_native_types(self, slicer=None, **kwargs): - """ slice and dice then format """ + """ + Format specified values of `self` and return them. + + Parameters + ---------- + slicer : int, array-like + An indexer into `self` that specifies which values + are used in the formatting process. + kwargs : dict + Options for specifying how the values should be formatted. + These options include the following: + + 1) na_rep : str + The value that serves as a placeholder for NULL values + 2) quoting : bool or None + Whether or not there are quoted values in `self` + 3) date_format : str + The format used to represent date-like values + """ + values = self if slicer is not None: values = values[slicer] diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 927b9f6a48718..2d2dfa9a3d849 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1143,3 +1143,31 @@ def test_to_csv_quoting(self): df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n' + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n' + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", pd.NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n' + assert result == expected diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py new file mode 100644 index 0000000000000..ea2731f66f0ef --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -0,0 +1,47 @@ +from pandas import DatetimeIndex + +import numpy as np + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(['2017-01-01', '2017-01-03'], dtype=object) + + result = index.to_native_types([0, 2]) + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(['01-2017-01', '01-2017-02', + '01-2017-03'], dtype=object) + + result = index.to_native_types(date_format='%m-%Y-%d') + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = DatetimeIndex(['2017-01-01', pd.NaT, '2017-01-03']) + expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(['2017-01-01', 'pandas', + '2017-01-03'], dtype=object) + + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py new file mode 100644 index 0000000000000..533481ce051f7 --- /dev/null +++ b/pandas/tests/indexes/period/test_formats.py @@ -0,0 +1,48 @@ +from pandas import PeriodIndex + +import numpy as np + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = PeriodIndex(['2017-01-01', '2017-01-02', + '2017-01-03'], freq='D') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype='