From 029173a6a99177c0099cdd0dd28d66bd9606aa3c Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:22:57 +0800 Subject: [PATCH 1/7] add test to hit the issue --- pandas/tests/resample/test_base.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 42e741119b0a1..6ef6a0f69f359 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -287,6 +287,33 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): tm.assert_series_equal(result, expected) +@all_ts +@pytest.mark.parametrize("freq", ["ME", "D", "h"]) +@pytest.mark.parametrize( + "method", ["ffill", "bfill", "nearest", "asfreq", "interpolate"] +) +def test_resample_upsample_empty_dataframe(freq, method, empty_frame_dti): + # GH#55572 + if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24h' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + empty_frame_dti.resample(freq) + return + elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): + # index is PeriodIndex, so convert to corresponding Period freq + freq = "M" + rs = empty_frame_dti.resample(freq) + result = getattr(rs, method)() + + index = _asfreq_compat(empty_frame_dti.index, freq) + expected = DataFrame([], index=index) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) @pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"]) From 9cb8324659f6a896ef6dfbaea78cb9f7667399fb Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:26:14 +0800 Subject: [PATCH 2/7] move changes specific to groupby resample down to groupby mixin --- pandas/core/resample.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d648a0afb8ce4..a11f11c9acdf2 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -492,22 +492,12 @@ def _wrap_result(self, result): """ Potentially wrap any results. """ - # GH 47705 - obj = self.obj - if ( - isinstance(result, ABCDataFrame) - and len(result) == 0 - and not isinstance(result.index, PeriodIndex) - ): - result = result.set_index( - _asfreq_compat(obj.index[:0], freq=self.freq), append=True - ) - if isinstance(result, ABCSeries) and self._selection is not None: result.name = self._selection if isinstance(result, ABCSeries) and result.empty: # When index is all NaT, result is empty but index is not + obj = self.obj result.index = _asfreq_compat(obj.index[:0], freq=self.freq) result.name = getattr(obj, "name", None) @@ -1675,6 +1665,17 @@ def func(x): return x.apply(f, *args, **kwargs) result = _apply(self._groupby, func, include_groups=self.include_groups) + + # GH 47705 + if ( + isinstance(result, ABCDataFrame) + and len(result) == 0 + and not isinstance(result.index, PeriodIndex) + ): + result = result.set_index( + _asfreq_compat(self.obj.index[:0], freq=self.freq), append=True + ) + return self._wrap_result(result) _upsample = _apply From 955c61d9c12e703b2e994ece49adb1eed72f7e1d Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:31:08 +0800 Subject: [PATCH 3/7] changelog added --- doc/source/whatsnew/v2.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index 1359123ef153e..1b5fd2113a720 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) - .. --------------------------------------------------------------------------- From eda108b831179ee04a9c8a04a7e8dd02585a9347 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:35:20 +0800 Subject: [PATCH 4/7] retrigger checks From ed9804407944bcf3195b65c18c69ff3837cd7cf3 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Wed, 13 Dec 2023 21:59:37 +0800 Subject: [PATCH 5/7] move changelog version --- doc/source/whatsnew/v2.1.3.rst | 1 - doc/source/whatsnew/v2.1.4.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index e38b892854820..af626895a9e0e 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -20,7 +20,6 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) - Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`) - Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`) - Fix :func:`read_parquet` and :func:`read_feather` for `CVE-2023-47248 `__ (:issue:`55894`) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 57b83a294963b..4106001298c88 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -23,6 +23,7 @@ Bug fixes - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) - Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`) - Bug in :class:`Timestamp` construction with ``ts_input="now"`` or ``ts_input="today"`` giving a different unit from :meth:`Timestamp.now` or :meth:`Timestamp.today` (:issue:`55879`) +- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :func:`read_csv` not respecting object dtype when ``infer_string`` option is set (:issue:`56047`) - Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`) From b4033a5cd85682937ea3583d75bd8b701223e503 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Sun, 17 Dec 2023 14:43:32 +0800 Subject: [PATCH 6/7] move changelog to 2.2.0 --- doc/source/whatsnew/v2.1.4.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 4106001298c88..57b83a294963b 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -23,7 +23,6 @@ Bug fixes - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) - Bug in :class:`Series` when trying to cast date-like string inputs to :class:`ArrowDtype` of ``pyarrow.timestamp`` (:issue:`56266`) - Bug in :class:`Timestamp` construction with ``ts_input="now"`` or ``ts_input="today"`` giving a different unit from :meth:`Timestamp.now` or :meth:`Timestamp.today` (:issue:`55879`) -- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) - Fixed bug in :func:`read_csv` not respecting object dtype when ``infer_string`` option is set (:issue:`56047`) - Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0c4fb6d3d1164..2204d02ad8932 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -658,6 +658,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.value_counts` and :meth:`.SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`) - Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`) - Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`) +- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`) - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) From 5aeb180ac83aa6a5a85ad54247af6862a825cbca Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:43:16 +0800 Subject: [PATCH 7/7] Fix according to cleanup of fixtures and deprecation of resampling with PeriodIndex --- pandas/tests/resample/test_base.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index d948e0be8c8e5..7a6c7eba1193d 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -337,12 +337,17 @@ def test_resample_size_empty_dataframe(freq, index): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "index", [DatetimeIndex([]), TimedeltaIndex([]), PeriodIndex([], freq="D")] +) @pytest.mark.parametrize("freq", ["ME", "D", "h"]) @pytest.mark.parametrize( "method", ["ffill", "bfill", "nearest", "asfreq", "interpolate"] ) -def test_resample_upsample_empty_dataframe(freq, method, empty_frame_dti): +def test_resample_upsample_empty_dataframe(index, freq, method): # GH#55572 + empty_frame_dti = DataFrame(index=index) + if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " @@ -354,11 +359,18 @@ def test_resample_upsample_empty_dataframe(freq, method, empty_frame_dti): elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): # index is PeriodIndex, so convert to corresponding Period freq freq = "M" - rs = empty_frame_dti.resample(freq) + + msg = "Resampling with a PeriodIndex" + warn = None + if isinstance(empty_frame_dti.index, PeriodIndex): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + rs = empty_frame_dti.resample(freq) result = getattr(rs, method)() - index = _asfreq_compat(empty_frame_dti.index, freq) - expected = DataFrame([], index=index) + expected_index = _asfreq_compat(empty_frame_dti.index, freq) + expected = DataFrame([], index=expected_index) tm.assert_frame_equal(result, expected)