Skip to content

Commit 98f5a78

Browse files
authored
BUG: df.resample('MS', closed='right') incorrectly places bins (#55283)
* hardcode allowed freqs for hack * whatsnew * tests for business day issues * move whatsnew to 2.1.2
1 parent 1f16762 commit 98f5a78

File tree

3 files changed

+78
-3
lines changed

3 files changed

+78
-3
lines changed

doc/source/whatsnew/v2.1.2.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
1617
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
1718
-
1819

@@ -21,7 +22,8 @@ Fixed regressions
2122

2223
Bug fixes
2324
~~~~~~~~~
24-
-
25+
- Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
26+
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
2527
-
2628

2729
.. ---------------------------------------------------------------------------

pandas/core/resample.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -2297,7 +2297,17 @@ def _adjust_bin_edges(
22972297
) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
22982298
# Some hacks for > daily data, see #1471, #1458, #1483
22992299

2300-
if self.freq != "D" and is_superperiod(self.freq, "D"):
2300+
if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
2301+
"BQ",
2302+
"BA",
2303+
"Q",
2304+
"A",
2305+
"W",
2306+
):
2307+
# If the right end-point is on the last day of the month, roll forwards
2308+
# until the last moment of that day. Note that we only do this for offsets
2309+
# which correspond to the end of a super-daily period - "month start", for
2310+
# example, is excluded.
23012311
if self.closed == "right":
23022312
# GH 21459, GH 9119: Adjust the bins relative to the wall time
23032313
edges_dti = binner.tz_localize(None)

pandas/tests/resample/test_datetime_index.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def test_resample_reresample(unit):
660660
s = Series(np.random.default_rng(2).random(len(dti)), dti)
661661
bs = s.resample("B", closed="right", label="right").mean()
662662
result = bs.resample("8H").mean()
663-
assert len(result) == 22
663+
assert len(result) == 25
664664
assert isinstance(result.index.freq, offsets.DateOffset)
665665
assert result.index.freq == offsets.Hour(8)
666666

@@ -2051,3 +2051,66 @@ def test_resample_M_deprecated():
20512051
with tm.assert_produces_warning(UserWarning, match=depr_msg):
20522052
result = s.resample("2M").mean()
20532053
tm.assert_series_equal(result, expected)
2054+
2055+
2056+
def test_resample_ms_closed_right():
2057+
# https://github.com/pandas-dev/pandas/issues/55271
2058+
dti = date_range(start="2020-01-31", freq="1min", periods=6000)
2059+
df = DataFrame({"ts": dti}, index=dti)
2060+
grouped = df.resample("MS", closed="right")
2061+
result = grouped.last()
2062+
expected = DataFrame(
2063+
{"ts": [datetime(2020, 2, 1), datetime(2020, 2, 4, 3, 59)]},
2064+
index=DatetimeIndex([datetime(2020, 1, 1), datetime(2020, 2, 1)], freq="MS"),
2065+
)
2066+
tm.assert_frame_equal(result, expected)
2067+
2068+
2069+
@pytest.mark.parametrize("freq", ["B", "C"])
2070+
def test_resample_c_b_closed_right(freq: str):
2071+
# https://github.com/pandas-dev/pandas/issues/55281
2072+
dti = date_range(start="2020-01-31", freq="1min", periods=6000)
2073+
df = DataFrame({"ts": dti}, index=dti)
2074+
grouped = df.resample(freq, closed="right")
2075+
result = grouped.last()
2076+
expected = DataFrame(
2077+
{
2078+
"ts": [
2079+
datetime(2020, 1, 31),
2080+
datetime(2020, 2, 3),
2081+
datetime(2020, 2, 4),
2082+
datetime(2020, 2, 4, 3, 59),
2083+
]
2084+
},
2085+
index=DatetimeIndex(
2086+
[
2087+
datetime(2020, 1, 30),
2088+
datetime(2020, 1, 31),
2089+
datetime(2020, 2, 3),
2090+
datetime(2020, 2, 4),
2091+
],
2092+
freq=freq,
2093+
),
2094+
)
2095+
tm.assert_frame_equal(result, expected)
2096+
2097+
2098+
def test_resample_b_55282():
2099+
# https://github.com/pandas-dev/pandas/issues/55282
2100+
s = Series(
2101+
[1, 2, 3, 4, 5, 6], index=date_range("2023-09-26", periods=6, freq="12H")
2102+
)
2103+
result = s.resample("B", closed="right", label="right").mean()
2104+
expected = Series(
2105+
[1.0, 2.5, 4.5, 6.0],
2106+
index=DatetimeIndex(
2107+
[
2108+
datetime(2023, 9, 26),
2109+
datetime(2023, 9, 27),
2110+
datetime(2023, 9, 28),
2111+
datetime(2023, 9, 29),
2112+
],
2113+
freq="B",
2114+
),
2115+
)
2116+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)