diff --git a/.gitignore b/.gitignore index 6c3c275c48fb7..f236f897637a3 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,4 @@ doc/build/html/index.html doc/tmp.sv env/ doc/source/savefig/ +venv/ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bd47bef397aa7..fafb9abdade8c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -985,6 +985,7 @@ I/O - Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`) - Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`) - Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`) +- Bug in :meth:`to_json` was raising ``AttributeError`` with column or Series of `PeriodDtype` (:issue:`31917`) - :func:`pandas.read_hdf` has a more explicit error message when loading an unsupported HDF file (:issue:`9539`) - Bug in :meth:`~DataFrame.read_feather` was raising an `ArrowIOError` when reading an s3 or http file path (:issue:`29055`) diff --git a/pandas/conftest.py b/pandas/conftest.py index e4cb3270b9acf..229d2e3894b62 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -477,6 +477,15 @@ def _create_series(index): } +@pytest.fixture +def period_series(): + """Fixture for Series with Period-type index. + """ + s = tm.makePeriodSeries() + s.name = "ps" + return s + + @pytest.fixture def series_with_simple_index(indices): """ @@ -588,6 +597,35 @@ def datetime_frame(): return DataFrame(tm.getTimeSeriesData()) +@pytest.fixture +def period_frame(): + """ + Fixture for DataFrame of floats with PeriodIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getPeriodData()) + + @pytest.fixture def float_frame(): """ diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 56b854bee77d7..2a3d614674b8f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -42,23 +42,6 @@ def setup(self): yield - @pytest.fixture - def datetime_series(self): - # Same as usual datetime_series, but with index freq set to None, - # since that doesnt round-trip, see GH#33711 - ser = tm.makeTimeSeries() - ser.name = "ts" - ser.index = ser.index._with_freq(None) - return ser - - @pytest.fixture - def datetime_frame(self): - # Same as usual datetime_frame, but with index freq set to None, - # since that doesnt round-trip, see GH#33711 - df = DataFrame(tm.getTimeSeriesData()) - df.index = df.index._with_freq(None) - return df - def test_frame_double_encoded_labels(self, orient): df = DataFrame( [["a", "b"], ["c", "d"]], @@ -382,6 +365,42 @@ def test_frame_to_json_except(self): with pytest.raises(ValueError, match=msg): df.to_json(orient="garbage") + def test_frame_roundtrip_period_index(self, orient, period_frame): + # GH32665: Fix to_json when converting Period column/series + if orient == "split": + pytest.skip("skipping orient=split due to different conversion schema") + + data = period_frame.to_json(orient=orient) + result = pd.read_json(data, typ="frame", orient=orient) + + expected = period_frame.copy() + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient == "values": + # drop column names as well + expected = expected.T.reset_index(drop=True).T + if orient in ("index", "columns"): + result.index = result.index.to_period(freq=expected.index.freq) + expected.name = None + + tm.assert_frame_equal(result, expected) + + @pytest.mark.skip(reason="Conversion of Period-like column in dict-like format") + def test_frame_roundtrip_period_columns(self, orient, period_frame): + # GH32665: Fix to_json when converting Period column/series + + test_frame = period_frame.reset_index() + data = test_frame.to_json(orient=orient) + result = pd.read_json(data, typ="frame", orient=orient) + + expected = test_frame + if orient == "values": + expected.colummns = range(len(expected.columns)) + if orient != "split": + expected.name = None + + tm.assert_frame_equal(result, expected) + def test_frame_empty(self): df = DataFrame(columns=["jim", "joe"]) assert not df._is_mixed_type @@ -433,9 +452,6 @@ def test_frame_mixedtype_orient(self): # GH10289 tm.assert_frame_equal(left, right) def test_v12_compat(self, datapath): - dti = pd.date_range("2000-01-03", "2000-01-07") - # freq doesnt roundtrip - dti = pd.DatetimeIndex(np.asarray(dti), freq=None) df = DataFrame( [ [1.56808523, 0.65727391, 1.81021139, -0.17251653], @@ -445,7 +461,7 @@ def test_v12_compat(self, datapath): [0.05951614, -2.69652057, 1.28163262, 0.34703478], ], columns=["A", "B", "C", "D"], - index=dti, + index=pd.date_range("2000-01-03", "2000-01-07"), ) df["date"] = pd.Timestamp("19920106 18:21:32.12") df.iloc[3, df.columns.get_loc("date")] = pd.Timestamp("20130101") @@ -464,9 +480,6 @@ def test_v12_compat(self, datapath): def test_blocks_compat_GH9037(self): index = pd.date_range("20000101", periods=10, freq="H") - # freq doesnt round-trip - index = pd.DatetimeIndex(list(index), freq=None) - df_mixed = DataFrame( OrderedDict( float_1=[ @@ -673,6 +686,23 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series): tm.assert_series_equal(result, expected) + def test_series_roundtrip_periodseries(self, orient, period_series): + # GH32665: Fix to_json when converting Period column/series + if orient == "split": + pytest.skip("skipping orient=split due to different conversion schema") + + data = period_series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient) + + expected = period_series + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient in ("index", "columns"): + result.index = result.index.to_period(freq=expected.index.freq) + expected.name = None + + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", [np.float64, int]) @pytest.mark.parametrize("numpy", [True, False]) def test_series_roundtrip_numeric(self, orient, numpy, dtype):