Skip to content

Commit f3d4113

Browse files
authored
BUG: can't resample with non-nano dateindex, out-of-nanosecond-bounds (#51274)
* fix resample non-nano out-of-nano-bounds * use periods_per_second and abbrev_to_npy_unit * autotyping * require without freq.nanos --------- Co-authored-by: MarcoGorelli <>
1 parent 081167d commit f3d4113

File tree

2 files changed

+53
-22
lines changed

2 files changed

+53
-22
lines changed

pandas/core/resample.py

+27-21
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,7 @@ def _get_time_bins(self, ax: DatetimeIndex):
17411741
ax.min(),
17421742
ax.max(),
17431743
self.freq,
1744+
unit=ax.unit,
17441745
closed=self.closed,
17451746
origin=self.origin,
17461747
offset=self.offset,
@@ -1760,7 +1761,8 @@ def _get_time_bins(self, ax: DatetimeIndex):
17601761
name=ax.name,
17611762
ambiguous=True,
17621763
nonexistent="shift_forward",
1763-
).as_unit(ax.unit)
1764+
unit=ax.unit,
1765+
)
17641766

17651767
ax_values = ax.asi8
17661768
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
@@ -1970,6 +1972,7 @@ def _get_timestamp_range_edges(
19701972
first: Timestamp,
19711973
last: Timestamp,
19721974
freq: BaseOffset,
1975+
unit: str,
19731976
closed: Literal["right", "left"] = "left",
19741977
origin: TimeGrouperOrigin = "start_day",
19751978
offset: Timedelta | None = None,
@@ -2025,7 +2028,7 @@ def _get_timestamp_range_edges(
20252028
origin = origin.tz_localize(None)
20262029

20272030
first, last = _adjust_dates_anchored(
2028-
first, last, freq, closed=closed, origin=origin, offset=offset
2031+
first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit
20292032
)
20302033
if isinstance(freq, Day):
20312034
first = first.tz_localize(index_tz)
@@ -2092,7 +2095,7 @@ def _get_period_range_edges(
20922095
adjust_last = freq.is_on_offset(last_ts)
20932096

20942097
first_ts, last_ts = _get_timestamp_range_edges(
2095-
first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset
2098+
first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset
20962099
)
20972100

20982101
first = (first_ts + int(adjust_first) * freq).to_period(freq)
@@ -2125,32 +2128,35 @@ def _adjust_dates_anchored(
21252128
closed: Literal["right", "left"] = "right",
21262129
origin: TimeGrouperOrigin = "start_day",
21272130
offset: Timedelta | None = None,
2131+
unit: str = "ns",
21282132
) -> tuple[Timestamp, Timestamp]:
21292133
# First and last offsets should be calculated from the start day to fix an
21302134
# error cause by resampling across multiple days when a one day period is
21312135
# not a multiple of the frequency. See GH 8683
21322136
# To handle frequencies that are not multiple or divisible by a day we let
21332137
# the possibility to define a fixed origin timestamp. See GH 31809
2134-
first = first.as_unit("ns")
2135-
last = last.as_unit("ns")
2138+
first = first.as_unit(unit)
2139+
last = last.as_unit(unit)
21362140
if offset is not None:
2137-
offset = offset.as_unit("ns")
2141+
offset = offset.as_unit(unit)
2142+
2143+
freq_value = Timedelta(freq).as_unit(unit)._value
21382144

2139-
origin_nanos = 0 # origin == "epoch"
2145+
origin_timestamp = 0 # origin == "epoch"
21402146
if origin == "start_day":
2141-
origin_nanos = first.normalize()._value
2147+
origin_timestamp = first.normalize()._value
21422148
elif origin == "start":
2143-
origin_nanos = first._value
2149+
origin_timestamp = first._value
21442150
elif isinstance(origin, Timestamp):
2145-
origin_nanos = origin.as_unit("ns")._value
2151+
origin_timestamp = origin.as_unit(unit)._value
21462152
elif origin in ["end", "end_day"]:
21472153
origin_last = last if origin == "end" else last.ceil("D")
2148-
sub_freq_times = (origin_last._value - first._value) // freq.nanos
2154+
sub_freq_times = (origin_last._value - first._value) // freq_value
21492155
if closed == "left":
21502156
sub_freq_times += 1
21512157
first = origin_last - sub_freq_times * freq
2152-
origin_nanos = first._value
2153-
origin_nanos += offset._value if offset else 0
2158+
origin_timestamp = first._value
2159+
origin_timestamp += offset._value if offset else 0
21542160

21552161
# GH 10117 & GH 19375. If first and last contain timezone information,
21562162
# Perform the calculation in UTC in order to avoid localizing on an
@@ -2162,19 +2168,19 @@ def _adjust_dates_anchored(
21622168
if last_tzinfo is not None:
21632169
last = last.tz_convert("UTC")
21642170

2165-
foffset = (first._value - origin_nanos) % freq.nanos
2166-
loffset = (last._value - origin_nanos) % freq.nanos
2171+
foffset = (first._value - origin_timestamp) % freq_value
2172+
loffset = (last._value - origin_timestamp) % freq_value
21672173

21682174
if closed == "right":
21692175
if foffset > 0:
21702176
# roll back
21712177
fresult_int = first._value - foffset
21722178
else:
2173-
fresult_int = first._value - freq.nanos
2179+
fresult_int = first._value - freq_value
21742180

21752181
if loffset > 0:
21762182
# roll forward
2177-
lresult_int = last._value + (freq.nanos - loffset)
2183+
lresult_int = last._value + (freq_value - loffset)
21782184
else:
21792185
# already the end of the road
21802186
lresult_int = last._value
@@ -2187,11 +2193,11 @@ def _adjust_dates_anchored(
21872193

21882194
if loffset > 0:
21892195
# roll forward
2190-
lresult_int = last._value + (freq.nanos - loffset)
2196+
lresult_int = last._value + (freq_value - loffset)
21912197
else:
2192-
lresult_int = last._value + freq.nanos
2193-
fresult = Timestamp(fresult_int)
2194-
lresult = Timestamp(lresult_int)
2198+
lresult_int = last._value + freq_value
2199+
fresult = Timestamp(fresult_int, unit=unit)
2200+
lresult = Timestamp(lresult_int, unit=unit)
21952201
if first_tzinfo is not None:
21962202
fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
21972203
if last_tzinfo is not None:

pandas/tests/resample/test_datetime_index.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -1838,7 +1838,7 @@ def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit)
18381838
exp_last = Timestamp(exp_last)
18391839

18401840
freq = pd.tseries.frequencies.to_offset(freq)
1841-
result = _get_timestamp_range_edges(first, last, freq)
1841+
result = _get_timestamp_range_edges(first, last, freq, unit="ns")
18421842
expected = (exp_first, exp_last)
18431843
assert result == expected
18441844

@@ -1949,3 +1949,28 @@ def test_resample_unsigned_int(any_unsigned_int_numpy_dtype, unit):
19491949
),
19501950
)
19511951
tm.assert_frame_equal(result, expected)
1952+
1953+
1954+
def test_long_rule_non_nano():
1955+
# https://github.com/pandas-dev/pandas/issues/51024
1956+
idx = date_range("0300-01-01", "2000-01-01", unit="s", freq="100Y")
1957+
ser = Series([1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5], index=idx)
1958+
result = ser.resample("200Y").mean()
1959+
expected_idx = DatetimeIndex(
1960+
np.array(
1961+
[
1962+
"0300-12-31",
1963+
"0500-12-31",
1964+
"0700-12-31",
1965+
"0900-12-31",
1966+
"1100-12-31",
1967+
"1300-12-31",
1968+
"1500-12-31",
1969+
"1700-12-31",
1970+
"1900-12-31",
1971+
]
1972+
).astype("datetime64[s]"),
1973+
freq="200A-DEC",
1974+
)
1975+
expected = Series([1.0, 3.0, 6.5, 4.0, 3.0, 6.5, 4.0, 3.0, 6.5], index=expected_idx)
1976+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)