diff --git a/pandas/core/resample.py b/pandas/core/resample.py index cb129ec272ff3..96982f8727188 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1731,6 +1731,7 @@ def _get_time_bins(self, ax: DatetimeIndex): ax.min(), ax.max(), self.freq, + unit=ax.unit, closed=self.closed, origin=self.origin, offset=self.offset, @@ -1750,7 +1751,8 @@ def _get_time_bins(self, ax: DatetimeIndex): name=ax.name, ambiguous=True, nonexistent="shift_forward", - ).as_unit(ax.unit) + unit=ax.unit, + ) ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) @@ -1960,6 +1962,7 @@ def _get_timestamp_range_edges( first: Timestamp, last: Timestamp, freq: BaseOffset, + unit: str, closed: Literal["right", "left"] = "left", origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, @@ -2015,7 +2018,7 @@ def _get_timestamp_range_edges( origin = origin.tz_localize(None) first, last = _adjust_dates_anchored( - first, last, freq, closed=closed, origin=origin, offset=offset + first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit ) if isinstance(freq, Day): first = first.tz_localize(index_tz) @@ -2082,7 +2085,7 @@ def _get_period_range_edges( adjust_last = freq.is_on_offset(last_ts) first_ts, last_ts = _get_timestamp_range_edges( - first_ts, last_ts, freq, closed=closed, origin=origin, offset=offset + first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset ) first = (first_ts + int(adjust_first) * freq).to_period(freq) @@ -2115,32 +2118,35 @@ def _adjust_dates_anchored( closed: Literal["right", "left"] = "right", origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, + unit: str = "ns", ) -> tuple[Timestamp, Timestamp]: # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. See GH 8683 # To handle frequencies that are not multiple or divisible by a day we let # the possibility to define a fixed origin timestamp. See GH 31809 - first = first.as_unit("ns") - last = last.as_unit("ns") + first = first.as_unit(unit) + last = last.as_unit(unit) if offset is not None: - offset = offset.as_unit("ns") + offset = offset.as_unit(unit) + + freq_value = Timedelta(freq).as_unit(unit)._value - origin_nanos = 0 # origin == "epoch" + origin_timestamp = 0 # origin == "epoch" if origin == "start_day": - origin_nanos = first.normalize()._value + origin_timestamp = first.normalize()._value elif origin == "start": - origin_nanos = first._value + origin_timestamp = first._value elif isinstance(origin, Timestamp): - origin_nanos = origin.as_unit("ns")._value + origin_timestamp = origin.as_unit(unit)._value elif origin in ["end", "end_day"]: origin_last = last if origin == "end" else last.ceil("D") - sub_freq_times = (origin_last._value - first._value) // freq.nanos + sub_freq_times = (origin_last._value - first._value) // freq_value if closed == "left": sub_freq_times += 1 first = origin_last - sub_freq_times * freq - origin_nanos = first._value - origin_nanos += offset._value if offset else 0 + origin_timestamp = first._value + origin_timestamp += offset._value if offset else 0 # GH 10117 & GH 19375. If first and last contain timezone information, # Perform the calculation in UTC in order to avoid localizing on an @@ -2152,19 +2158,19 @@ def _adjust_dates_anchored( if last_tzinfo is not None: last = last.tz_convert("UTC") - foffset = (first._value - origin_nanos) % freq.nanos - loffset = (last._value - origin_nanos) % freq.nanos + foffset = (first._value - origin_timestamp) % freq_value + loffset = (last._value - origin_timestamp) % freq_value if closed == "right": if foffset > 0: # roll back fresult_int = first._value - foffset else: - fresult_int = first._value - freq.nanos + fresult_int = first._value - freq_value if loffset > 0: # roll forward - lresult_int = last._value + (freq.nanos - loffset) + lresult_int = last._value + (freq_value - loffset) else: # already the end of the road lresult_int = last._value @@ -2177,11 +2183,11 @@ def _adjust_dates_anchored( if loffset > 0: # roll forward - lresult_int = last._value + (freq.nanos - loffset) + lresult_int = last._value + (freq_value - loffset) else: - lresult_int = last._value + freq.nanos - fresult = Timestamp(fresult_int) - lresult = Timestamp(lresult_int) + lresult_int = last._value + freq_value + fresult = Timestamp(fresult_int, unit=unit) + lresult = Timestamp(lresult_int, unit=unit) if first_tzinfo is not None: fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) if last_tzinfo is not None: diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index d18db6ab5f643..13041a81dadcf 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1838,7 +1838,7 @@ def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit) exp_last = Timestamp(exp_last) freq = pd.tseries.frequencies.to_offset(freq) - result = _get_timestamp_range_edges(first, last, freq) + result = _get_timestamp_range_edges(first, last, freq, unit="ns") expected = (exp_first, exp_last) assert result == expected @@ -1949,3 +1949,28 @@ def test_resample_unsigned_int(any_unsigned_int_numpy_dtype, unit): ), ) tm.assert_frame_equal(result, expected) + + +def test_long_rule_non_nano(): + # https://github.com/pandas-dev/pandas/issues/51024 + idx = date_range("0300-01-01", "2000-01-01", unit="s", freq="100Y") + ser = Series([1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5], index=idx) + result = ser.resample("200Y").mean() + expected_idx = DatetimeIndex( + np.array( + [ + "0300-12-31", + "0500-12-31", + "0700-12-31", + "0900-12-31", + "1100-12-31", + "1300-12-31", + "1500-12-31", + "1700-12-31", + "1900-12-31", + ] + ).astype("datetime64[s]"), + freq="200A-DEC", + ) + expected = Series([1.0, 3.0, 6.5, 4.0, 3.0, 6.5, 4.0, 3.0, 6.5], index=expected_idx) + tm.assert_series_equal(result, expected)