diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2dea25d6f10f4..e74629e63f2c7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -200,6 +200,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`) - Bug in weighted rolling aggregations when specifying ``min_periods=0`` (:issue:`51449`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` in incorrectly allowing non-fixed ``freq`` when resampling on a :class:`TimedeltaIndex` (:issue:`51896`) - Reshaping @@ -226,6 +227,7 @@ Styler Other ^^^^^ +- .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5295f89626e04..a0008ad0015ed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1851,6 +1851,8 @@ def __init__( values = values.copy() if freq: freq = to_offset(freq) + if values.dtype.kind == "m" and not isinstance(freq, Tick): + raise TypeError("TimedeltaArray/Index freq must be a Tick") NDArrayBacked.__init__(self, values=values, dtype=dtype) self._freq = freq @@ -1874,6 +1876,8 @@ def freq(self, value) -> None: if value is not None: value = to_offset(value) self._validate_frequency(self, value) + if self.dtype.kind == "m" and not isinstance(value, Tick): + raise TypeError("TimedeltaArray/Index freq must be a Tick") if self.ndim > 1: raise ValueError("Cannot set freq with ndim > 1") @@ -2067,9 +2071,9 @@ def _with_freq(self, freq): # Always valid pass elif len(self) == 0 and isinstance(freq, BaseOffset): - # Always valid. In the TimedeltaArray case, we assume this - # is a Tick offset. - pass + # Always valid. In the TimedeltaArray case, we require a Tick offset + if self.dtype.kind == "m" and not isinstance(freq, Tick): + raise TypeError("TimedeltaArray/Index freq must be a Tick") else: # As an internal method, we can ensure this assertion always holds assert freq == "infer" diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 179de3925e371..e52df2a118151 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -202,6 +202,7 @@ def _simple_new( # type: ignore[override] assert not tslibs.is_unitless(dtype) assert isinstance(values, np.ndarray), type(values) assert dtype == values.dtype + assert freq is None or isinstance(freq, Tick) result = super()._simple_new(values=values, dtype=dtype) result._freq = freq diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0ca01efe0c855..50eae11be99eb 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1826,6 +1826,13 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): f"an instance of {type(ax).__name__}" ) + if not isinstance(self.freq, Tick): + # GH#51896 + raise ValueError( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + f"e.g. '24H' or '3D', not {self.freq}" + ) + if not len(ax): binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) return binner, [], labels diff --git a/pandas/tests/indexes/timedeltas/test_freq_attr.py b/pandas/tests/indexes/timedeltas/test_freq_attr.py index 39b9c11aa833c..868da4329dccf 100644 --- a/pandas/tests/indexes/timedeltas/test_freq_attr.py +++ b/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -6,6 +6,7 @@ DateOffset, Day, Hour, + MonthEnd, ) @@ -25,6 +26,16 @@ def test_freq_setter(self, values, freq): idx._data.freq = None assert idx.freq is None + def test_with_freq_empty_requires_tick(self): + idx = TimedeltaIndex([]) + + off = MonthEnd(1) + msg = "TimedeltaArray/Index freq must be a Tick" + with pytest.raises(TypeError, match=msg): + idx._with_freq(off) + with pytest.raises(TypeError, match=msg): + idx._data._with_freq(off) + def test_freq_setter_errors(self): # GH#20678 idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 19445c35f0bb6..28e99bd3c0cc0 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -8,6 +8,7 @@ NaT, PeriodIndex, Series, + TimedeltaIndex, ) import pandas._testing as tm from pandas.core.groupby.groupby import DataError @@ -110,7 +111,17 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method, request) ) ser = empty_series_dti - result = getattr(ser.resample(freq), resample_method)() + if freq == "M" and isinstance(ser.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + ser.resample(freq) + return + + rs = ser.resample(freq) + result = getattr(rs, resample_method)() expected = ser.copy() expected.index = _asfreq_compat(ser.index, freq) @@ -150,11 +161,23 @@ def test_resample_nat_index_series(request, freq, series, resample_method): @pytest.mark.parametrize("resample_method", ["count", "size"]) def test_resample_count_empty_series(freq, empty_series_dti, resample_method): # GH28427 - result = getattr(empty_series_dti.resample(freq), resample_method)() + ser = empty_series_dti + if freq == "M" and isinstance(ser.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + ser.resample(freq) + return + + rs = ser.resample(freq) + + result = getattr(rs, resample_method)() - index = _asfreq_compat(empty_series_dti.index, freq) + index = _asfreq_compat(ser.index, freq) - expected = Series([], dtype="int64", index=index, name=empty_series_dti.name) + expected = Series([], dtype="int64", index=index, name=ser.name) tm.assert_series_equal(result, expected) @@ -165,7 +188,17 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): # GH13212 df = empty_frame_dti # count retains dimensions too - result = getattr(df.resample(freq, group_keys=False), resample_method)() + if freq == "M" and isinstance(df.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + df.resample(freq, group_keys=False) + return + + rs = df.resample(freq, group_keys=False) + result = getattr(rs, resample_method)() if resample_method != "size": expected = df.copy() else: @@ -188,6 +221,15 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti): empty_frame_dti["a"] = [] + if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + empty_frame_dti.resample(freq) + return + result = empty_frame_dti.resample(freq).count() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -204,6 +246,15 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): empty_frame_dti["a"] = [] + if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + empty_frame_dti.resample(freq) + return + result = empty_frame_dti.resample(freq).size() index = _asfreq_compat(empty_frame_dti.index, freq) @@ -233,6 +284,16 @@ def test_resample_empty_dtypes(index, dtype, resample_method): def test_apply_to_empty_series(empty_series_dti, freq): # GH 14313 ser = empty_series_dti + + if freq == "M" and isinstance(empty_series_dti.index, TimedeltaIndex): + msg = ( + "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " + "e.g. '24H' or '3D', not " + ) + with pytest.raises(ValueError, match=msg): + empty_series_dti.resample(freq) + return + result = ser.resample(freq, group_keys=False).apply(lambda x: 1) expected = ser.resample(freq).apply(np.sum)