From 0ce7afbbdef0e97191ad8320496e97974019c84c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 May 2023 07:44:07 -0700 Subject: [PATCH] BUG: Resampler.ohlc with empty data --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/core/resample.py | 18 ++++++++++++++++ pandas/tests/resample/test_base.py | 33 +++++++++++++++++++----------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1c0798e6cf9b1..cd1e1cda2382d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -420,6 +420,8 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) +- Bug in :meth:`Resampler.ohlc` with empty object returning a :class:`Series` instead of empty :class:`DataFrame` (:issue:`42902`) +- Reshaping ^^^^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f8adb2332609b..1e43f86269634 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -57,6 +57,7 @@ ) from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper +from pandas.core.indexes.api import MultiIndex from pandas.core.indexes.datetimes import ( DatetimeIndex, date_range, @@ -1211,6 +1212,23 @@ def ohlc( ): maybe_warn_args_and_kwargs(type(self), "ohlc", args, kwargs) nv.validate_resampler_func("ohlc", args, kwargs) + + ax = self.ax + obj = self._obj_with_exclusions + if len(ax) == 0: + # GH#42902 + obj = obj.copy() + obj.index = _asfreq_compat(obj.index, self.freq) + if obj.ndim == 1: + obj = obj.to_frame() + obj = obj.reindex(["open", "high", "low", "close"], axis=1) + else: + mi = MultiIndex.from_product( + [obj.columns, ["open", "high", "low", "close"]] + ) + obj = obj.reindex(mi, axis=1) + return obj + return self._downsample("ohlc") @doc(SeriesGroupBy.nunique) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 9514ccd24c1ca..709c3d65064d6 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -5,6 +5,7 @@ from pandas import ( DataFrame, + MultiIndex, NaT, PeriodIndex, Series, @@ -100,16 +101,9 @@ def test_raises_on_non_datetimelike_index(): @all_ts @pytest.mark.parametrize("freq", ["M", "D", "H"]) -def test_resample_empty_series(freq, empty_series_dti, resample_method, request): +def test_resample_empty_series(freq, empty_series_dti, resample_method): # GH12771 & GH12868 - if resample_method == "ohlc" and isinstance(empty_series_dti.index, PeriodIndex): - request.node.add_marker( - pytest.mark.xfail( - reason=f"GH13083: {resample_method} fails for PeriodIndex" - ) - ) - ser = empty_series_dti if freq == "M" and isinstance(ser.index, TimedeltaIndex): msg = ( @@ -123,12 +117,19 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method, request) rs = ser.resample(freq) result = getattr(rs, resample_method)() - expected = ser.copy() - expected.index = _asfreq_compat(ser.index, freq) + if resample_method == "ohlc": + expected = DataFrame( + [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] + ) + expected.index = _asfreq_compat(ser.index, freq) + tm.assert_frame_equal(result, expected, check_dtype=False) + else: + expected = ser.copy() + expected.index = _asfreq_compat(ser.index, freq) + tm.assert_series_equal(result, expected, check_dtype=False) tm.assert_index_equal(result.index, expected.index) assert result.index.freq == expected.index.freq - tm.assert_series_equal(result, expected, check_dtype=False) @all_ts @@ -199,7 +200,15 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): rs = df.resample(freq, group_keys=False) result = getattr(rs, resample_method)() - if resample_method != "size": + if resample_method == "ohlc": + # TODO: no tests with len(df.columns) > 0 + mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]]) + expected = DataFrame( + [], index=df.index[:0].copy(), columns=mi, dtype=np.float64 + ) + expected.index = _asfreq_compat(df.index, freq) + + elif resample_method != "size": expected = df.copy() else: # GH14962