Skip to content

Commit 3e30241

Browse files
hkhojastehmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#55173: BUG: .rolling() returns incorrect values when ts index is not nano seconds
1 parent 0a44346 commit 3e30241

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

doc/source/whatsnew/v2.1.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
17+
- Fixed regression in :meth:`~DataFrame.rolling` where non-nanosecond index or ``on`` column would produce incorrect results (:issue:`55026`, :issue:`55106`, :issue:`55299`)
1718
- Fixed regression in :meth:`DataFrame.resample` which was extrapolating back to ``origin`` when ``origin`` was outside its bounds (:issue:`55064`)
1819
- Fixed regression in :meth:`DataFrame.sort_index` which was not sorting correctly when the index was a sliced :class:`MultiIndex` (:issue:`55379`)
1920
- Fixed regression in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` where if the option ``compute.use_numba`` was set to True, groupby methods not supported by the numba engine would raise a ``TypeError`` (:issue:`55520`)

pandas/core/window/rolling.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from pandas._libs.tslibs import (
2323
BaseOffset,
24+
Timedelta,
2425
to_offset,
2526
)
2627
import pandas._libs.window.aggregations as window_aggregations
@@ -112,6 +113,8 @@
112113
from pandas.core.generic import NDFrame
113114
from pandas.core.groupby.ops import BaseGrouper
114115

116+
from pandas.core.arrays.datetimelike import dtype_to_unit
117+
115118

116119
class BaseWindow(SelectionMixin):
117120
"""Provides utilities for performing windowing operations."""
@@ -1882,7 +1885,12 @@ def _validate(self):
18821885
self._on.freq.nanos / self._on.freq.n
18831886
)
18841887
else:
1885-
self._win_freq_i8 = freq.nanos
1888+
try:
1889+
unit = dtype_to_unit(self._on.dtype) # type: ignore[arg-type]
1890+
except TypeError:
1891+
# if not a datetime dtype, eg for empty dataframes
1892+
unit = "ns"
1893+
self._win_freq_i8 = Timedelta(freq.nanos).as_unit(unit)._value
18861894

18871895
# min_periods must be an integer
18881896
if self.min_periods is None:

pandas/tests/window/test_rolling.py

+32
Original file line numberDiff line numberDiff line change
@@ -1878,3 +1878,35 @@ def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
18781878
op2 = getattr(rolling2, kernel)
18791879
expected = op2(*arg2, numeric_only=numeric_only)
18801880
tm.assert_series_equal(result, expected)
1881+
1882+
1883+
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
1884+
@pytest.mark.parametrize("tz", [None, "UTC", "Europe/Prague"])
1885+
def test_rolling_timedelta_window_non_nanoseconds(unit, tz):
1886+
# Test Sum, GH#55106
1887+
df_time = DataFrame(
1888+
{"A": range(5)}, index=date_range("2013-01-01", freq="1s", periods=5, tz=tz)
1889+
)
1890+
sum_in_nanosecs = df_time.rolling("1s").sum()
1891+
# microseconds / milliseconds should not break the correct rolling
1892+
df_time.index = df_time.index.as_unit(unit)
1893+
sum_in_microsecs = df_time.rolling("1s").sum()
1894+
sum_in_microsecs.index = sum_in_microsecs.index.as_unit("ns")
1895+
tm.assert_frame_equal(sum_in_nanosecs, sum_in_microsecs)
1896+
1897+
# Test max, GH#55026
1898+
ref_dates = date_range("2023-01-01", "2023-01-10", unit="ns", tz=tz)
1899+
ref_series = Series(0, index=ref_dates)
1900+
ref_series.iloc[0] = 1
1901+
ref_max_series = ref_series.rolling(Timedelta(days=4)).max()
1902+
1903+
dates = date_range("2023-01-01", "2023-01-10", unit=unit, tz=tz)
1904+
series = Series(0, index=dates)
1905+
series.iloc[0] = 1
1906+
max_series = series.rolling(Timedelta(days=4)).max()
1907+
1908+
ref_df = DataFrame(ref_max_series)
1909+
df = DataFrame(max_series)
1910+
df.index = df.index.as_unit("ns")
1911+
1912+
tm.assert_frame_equal(ref_df, df)

0 commit comments

Comments
 (0)