Skip to content

Commit 707dda0

Browse files
authored
BUG: disallow resample with non-Tick on TimedeltaIndex (#51896)
* BUG: disallow resample with non-Tick on TimedeltaIndex * GH ref * misplaced whatsnew
1 parent b07a388 commit 707dda0

File tree

6 files changed

+94
-8
lines changed

6 files changed

+94
-8
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ Groupby/resample/rolling
201201
^^^^^^^^^^^^^^^^^^^^^^^^
202202
- Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`)
203203
- Bug in weighted rolling aggregations when specifying ``min_periods=0`` (:issue:`51449`)
204+
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` in incorrectly allowing non-fixed ``freq`` when resampling on a :class:`TimedeltaIndex` (:issue:`51896`)
204205
-
205206

206207
Reshaping
@@ -227,6 +228,7 @@ Styler
227228

228229
Other
229230
^^^^^
231+
-
230232

231233
.. ***DO NOT USE THIS SECTION***
232234

pandas/core/arrays/datetimelike.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,8 @@ def __init__(
18511851
values = values.copy()
18521852
if freq:
18531853
freq = to_offset(freq)
1854+
if values.dtype.kind == "m" and not isinstance(freq, Tick):
1855+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
18541856

18551857
NDArrayBacked.__init__(self, values=values, dtype=dtype)
18561858
self._freq = freq
@@ -1874,6 +1876,8 @@ def freq(self, value) -> None:
18741876
if value is not None:
18751877
value = to_offset(value)
18761878
self._validate_frequency(self, value)
1879+
if self.dtype.kind == "m" and not isinstance(value, Tick):
1880+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
18771881

18781882
if self.ndim > 1:
18791883
raise ValueError("Cannot set freq with ndim > 1")
@@ -2067,9 +2071,9 @@ def _with_freq(self, freq):
20672071
# Always valid
20682072
pass
20692073
elif len(self) == 0 and isinstance(freq, BaseOffset):
2070-
# Always valid. In the TimedeltaArray case, we assume this
2071-
# is a Tick offset.
2072-
pass
2074+
# Always valid. In the TimedeltaArray case, we require a Tick offset
2075+
if self.dtype.kind == "m" and not isinstance(freq, Tick):
2076+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
20732077
else:
20742078
# As an internal method, we can ensure this assertion always holds
20752079
assert freq == "infer"

pandas/core/arrays/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def _simple_new( # type: ignore[override]
202202
assert not tslibs.is_unitless(dtype)
203203
assert isinstance(values, np.ndarray), type(values)
204204
assert dtype == values.dtype
205+
assert freq is None or isinstance(freq, Tick)
205206

206207
result = super()._simple_new(values=values, dtype=dtype)
207208
result._freq = freq

pandas/core/resample.py

+7
Original file line numberDiff line numberDiff line change
@@ -1826,6 +1826,13 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex):
18261826
f"an instance of {type(ax).__name__}"
18271827
)
18281828

1829+
if not isinstance(self.freq, Tick):
1830+
# GH#51896
1831+
raise ValueError(
1832+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
1833+
f"e.g. '24H' or '3D', not {self.freq}"
1834+
)
1835+
18291836
if not len(ax):
18301837
binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name)
18311838
return binner, [], labels

pandas/tests/indexes/timedeltas/test_freq_attr.py

+11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
DateOffset,
77
Day,
88
Hour,
9+
MonthEnd,
910
)
1011

1112

@@ -25,6 +26,16 @@ def test_freq_setter(self, values, freq):
2526
idx._data.freq = None
2627
assert idx.freq is None
2728

29+
def test_with_freq_empty_requires_tick(self):
30+
idx = TimedeltaIndex([])
31+
32+
off = MonthEnd(1)
33+
msg = "TimedeltaArray/Index freq must be a Tick"
34+
with pytest.raises(TypeError, match=msg):
35+
idx._with_freq(off)
36+
with pytest.raises(TypeError, match=msg):
37+
idx._data._with_freq(off)
38+
2839
def test_freq_setter_errors(self):
2940
# GH#20678
3041
idx = TimedeltaIndex(["0 days", "2 days", "4 days"])

pandas/tests/resample/test_base.py

+66-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
NaT,
99
PeriodIndex,
1010
Series,
11+
TimedeltaIndex,
1112
)
1213
import pandas._testing as tm
1314
from pandas.core.groupby.groupby import DataError
@@ -110,7 +111,17 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method, request)
110111
)
111112

112113
ser = empty_series_dti
113-
result = getattr(ser.resample(freq), resample_method)()
114+
if freq == "M" and isinstance(ser.index, TimedeltaIndex):
115+
msg = (
116+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
117+
"e.g. '24H' or '3D', not <MonthEnd>"
118+
)
119+
with pytest.raises(ValueError, match=msg):
120+
ser.resample(freq)
121+
return
122+
123+
rs = ser.resample(freq)
124+
result = getattr(rs, resample_method)()
114125

115126
expected = ser.copy()
116127
expected.index = _asfreq_compat(ser.index, freq)
@@ -150,11 +161,23 @@ def test_resample_nat_index_series(request, freq, series, resample_method):
150161
@pytest.mark.parametrize("resample_method", ["count", "size"])
151162
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
152163
# GH28427
153-
result = getattr(empty_series_dti.resample(freq), resample_method)()
164+
ser = empty_series_dti
165+
if freq == "M" and isinstance(ser.index, TimedeltaIndex):
166+
msg = (
167+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
168+
"e.g. '24H' or '3D', not <MonthEnd>"
169+
)
170+
with pytest.raises(ValueError, match=msg):
171+
ser.resample(freq)
172+
return
173+
174+
rs = ser.resample(freq)
175+
176+
result = getattr(rs, resample_method)()
154177

155-
index = _asfreq_compat(empty_series_dti.index, freq)
178+
index = _asfreq_compat(ser.index, freq)
156179

157-
expected = Series([], dtype="int64", index=index, name=empty_series_dti.name)
180+
expected = Series([], dtype="int64", index=index, name=ser.name)
158181

159182
tm.assert_series_equal(result, expected)
160183

@@ -165,7 +188,17 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
165188
# GH13212
166189
df = empty_frame_dti
167190
# count retains dimensions too
168-
result = getattr(df.resample(freq, group_keys=False), resample_method)()
191+
if freq == "M" and isinstance(df.index, TimedeltaIndex):
192+
msg = (
193+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
194+
"e.g. '24H' or '3D', not <MonthEnd>"
195+
)
196+
with pytest.raises(ValueError, match=msg):
197+
df.resample(freq, group_keys=False)
198+
return
199+
200+
rs = df.resample(freq, group_keys=False)
201+
result = getattr(rs, resample_method)()
169202
if resample_method != "size":
170203
expected = df.copy()
171204
else:
@@ -188,6 +221,15 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti):
188221

189222
empty_frame_dti["a"] = []
190223

224+
if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex):
225+
msg = (
226+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
227+
"e.g. '24H' or '3D', not <MonthEnd>"
228+
)
229+
with pytest.raises(ValueError, match=msg):
230+
empty_frame_dti.resample(freq)
231+
return
232+
191233
result = empty_frame_dti.resample(freq).count()
192234

193235
index = _asfreq_compat(empty_frame_dti.index, freq)
@@ -204,6 +246,15 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti):
204246

205247
empty_frame_dti["a"] = []
206248

249+
if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex):
250+
msg = (
251+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
252+
"e.g. '24H' or '3D', not <MonthEnd>"
253+
)
254+
with pytest.raises(ValueError, match=msg):
255+
empty_frame_dti.resample(freq)
256+
return
257+
207258
result = empty_frame_dti.resample(freq).size()
208259

209260
index = _asfreq_compat(empty_frame_dti.index, freq)
@@ -233,6 +284,16 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
233284
def test_apply_to_empty_series(empty_series_dti, freq):
234285
# GH 14313
235286
ser = empty_series_dti
287+
288+
if freq == "M" and isinstance(empty_series_dti.index, TimedeltaIndex):
289+
msg = (
290+
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
291+
"e.g. '24H' or '3D', not <MonthEnd>"
292+
)
293+
with pytest.raises(ValueError, match=msg):
294+
empty_series_dti.resample(freq)
295+
return
296+
236297
result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
237298
expected = ser.resample(freq).apply(np.sum)
238299

0 commit comments

Comments
 (0)