Skip to content

Commit 1ec59e6

Browse files
committed
BUG: fix pandas-dev#30353 invalid end
1 parent ef88d21 commit 1ec59e6

File tree

3 files changed

+30
-15
lines changed

3 files changed

+30
-15
lines changed

pandas/core/arrays/timedeltas.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from pandas.core import nanops
3939
from pandas.core.algorithms import checked_add_with_arr
4040
from pandas.core.arrays import datetimelike as dtl
41+
from pandas.core.arrays._ranges import _generate_range_overflow_safe
4142
import pandas.core.common as com
4243
from pandas.core.construction import extract_array
4344

@@ -1061,14 +1062,15 @@ def _generate_regular_range(start, end, periods, offset):
10611062
stride = offset.nanos
10621063
if periods is None:
10631064
b = Timedelta(start).value
1064-
e = Timedelta(end).value
1065-
e += stride - e % stride
1065+
# cannot just use e = Timestamp(end) + 1 because arange breaks when
1066+
# stride is too large, see GH 10887 & GH 30353
1067+
e = b + (Timedelta(end).value - b) // stride * stride + stride // 2 + 1
10661068
elif start is not None:
10671069
b = Timedelta(start).value
1068-
e = b + periods * stride
1070+
e = _generate_range_overflow_safe(b, periods, stride, side="start")
10691071
elif end is not None:
10701072
e = Timedelta(end).value + stride
1071-
b = e - periods * stride
1073+
b = _generate_range_overflow_safe(e, periods, stride, side="end")
10721074
else:
10731075
raise ValueError(
10741076
"at least 'start' or 'end' should be specified if a 'period' is given."

pandas/tests/resample/test_base.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.groupby.grouper import Grouper
1111
from pandas.core.indexes.datetimes import date_range
1212
from pandas.core.indexes.period import PeriodIndex, period_range
13-
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
13+
from pandas.core.indexes.timedeltas import timedelta_range
1414
from pandas.core.resample import _asfreq_compat
1515

1616
# a fixture value can be overridden by the test parameter value. Note that the
@@ -182,7 +182,6 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti):
182182
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
183183
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
184184
def test_resample_empty_dtypes(index, dtype, resample_method):
185-
186185
# Empty series were sometimes causing a segfault (for the functions
187186
# with Cython bounds-checking disabled) or an IndexError. We just run
188187
# them to ensure they no longer do. (GH #10228)
@@ -215,13 +214,7 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
215214
if isinstance(arg, list):
216215
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
217216

218-
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
219-
if isinstance(expected.index, TimedeltaIndex):
220-
msg = "DataFrame are different"
221-
with pytest.raises(AssertionError, match=msg):
222-
tm.assert_frame_equal(result_agg, expected)
223-
else:
224-
tm.assert_frame_equal(result_agg, expected)
217+
tm.assert_frame_equal(result_agg, expected)
225218

226219

227220
@all_ts

pandas/tests/resample/test_timedelta.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import timedelta
22

33
import numpy as np
4+
import pytest
45

56
import pandas as pd
67
from pandas import DataFrame, Series
@@ -114,14 +115,33 @@ def test_resample_timedelta_values():
114115
# check that timedelta dtype is preserved when NaT values are
115116
# introduced by the resampling
116117

117-
times = timedelta_range("1 day", "4 day", freq="4D")
118+
times = timedelta_range("1 day", "6 day", freq="4D")
118119
df = DataFrame({"time": times}, index=times)
119120

120-
times2 = timedelta_range("1 day", "4 day", freq="2D")
121+
times2 = timedelta_range("1 day", "6 day", freq="2D")
121122
exp = Series(times2, index=times2, name="time")
122123
exp.iloc[1] = pd.NaT
123124

124125
res = df.resample("2D").first()["time"]
125126
tm.assert_series_equal(res, exp)
126127
res = df["time"].resample("2D").first()
127128
tm.assert_series_equal(res, exp)
129+
130+
131+
@pytest.mark.parametrize(
132+
"freq, resample_freq, start, periods, expected_resample_end",
133+
[("10S", "3H", "8H", 5040, "20H")],
134+
)
135+
def test_resample_timedelta_end_already_included_in_bins(
136+
freq, resample_freq, start, periods, expected_resample_end,
137+
):
138+
# GH 30353
139+
# check that the timedelta bins does not contains an extra bin
140+
idx = pd.timedelta_range(start=start, freq=freq, periods=periods)
141+
s = pd.Series(np.arange(periods), index=idx)
142+
result = s.resample(resample_freq).min()
143+
expected_index = pd.timedelta_range(
144+
freq=resample_freq, start=start, end=expected_resample_end
145+
)
146+
tm.assert_index_equal(result.index, expected_index)
147+
assert not np.isnan(result[-1])

0 commit comments

Comments
 (0)