Skip to content

Commit 104676c

Browse files
committed
BUG: fix a bug when resampling in DST context
1 parent 079c7ce commit 104676c

File tree

2 files changed

+64
-7
lines changed

2 files changed

+64
-7
lines changed

pandas/core/resample.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1356,7 +1356,7 @@ def __init__(
13561356
self.fill_method = fill_method
13571357
self.limit = limit
13581358

1359-
if origin in {"epoch", "start", "start_day"}:
1359+
if origin in ("epoch", "start", "start_day"):
13601360
self.origin = origin
13611361
else:
13621362
try:
@@ -1705,6 +1705,8 @@ def _get_timestamp_range_edges(
17051705
# So "pretend" the dates are naive when adjusting the endpoints
17061706
first = first.tz_localize(None)
17071707
last = last.tz_localize(None)
1708+
if isinstance(origin, Timestamp):
1709+
origin = origin.tz_localize(None)
17081710

17091711
first, last = _adjust_dates_anchored(
17101712
first, last, freq, closed=closed, origin=origin, offset=offset,

pandas/tests/resample/test_datetime_index.py

+61-6
Original file line numberDiff line numberDiff line change
@@ -764,21 +764,21 @@ def test_resample_origin():
764764

765765

766766
@pytest.mark.parametrize(
767-
"origin",
768-
["invalid_value", "epch", "startday", "startt", "2000-30-30", object()],
767+
"origin", ["invalid_value", "epch", "startday", "startt", "2000-30-30", object()],
769768
)
770769
def test_resample_bad_origin(origin):
771770
rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
772771
ts = Series(np.random.randn(len(rng)), index=rng)
773-
msg = ("'origin' should be equal to 'epoch', 'start', 'start_day' or "
774-
f"should be a Timestamp convertible type. Got '{origin}' instead.")
772+
msg = (
773+
"'origin' should be equal to 'epoch', 'start', 'start_day' or "
774+
f"should be a Timestamp convertible type. Got '{origin}' instead."
775+
)
775776
with pytest.raises(ValueError, match=msg):
776777
ts.resample("5min", origin=origin)
777778

778779

779780
@pytest.mark.parametrize(
780-
"offset",
781-
["invalid_value", "12dayys", "2000-30-30", object()],
781+
"offset", ["invalid_value", "12dayys", "2000-30-30", object()],
782782
)
783783
def test_resample_bad_offset(offset):
784784
rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
@@ -846,6 +846,61 @@ def test_resample_origin_with_tz():
846846
ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean()
847847

848848

849+
def test_resample_origin_with_day_freq_on_dst():
850+
# GH 31809
851+
tz = "dateutil//usr/share/zoneinfo/America/Chicago"
852+
853+
def _create_series(values, timestamps, freq="D"):
854+
return pd.Series(
855+
values,
856+
index=pd.DatetimeIndex(
857+
[Timestamp(t, tz=tz) for t in timestamps], freq=freq, ambiguous=True
858+
),
859+
)
860+
861+
# test classical behavior of origin in a DST context
862+
start = pd.Timestamp("2013-11-02", tz=tz)
863+
end = pd.Timestamp("2013-11-03 23:59", tz=tz)
864+
rng = pd.date_range(start, end, freq="1h")
865+
ts = pd.Series(np.ones(len(rng)), index=rng)
866+
867+
expected = _create_series([24.0, 25.0], ["2013-11-02", "2013-11-03"])
868+
for origin in ["epoch", "start", "start_day", start, None]:
869+
result = ts.resample("D", origin=origin).sum()
870+
tm.assert_series_equal(result, expected)
871+
872+
# test complex behavior of origin/offset in a DST context
873+
start = pd.Timestamp("2013-11-03", tz=tz)
874+
end = pd.Timestamp("2013-11-03 23:59", tz=tz)
875+
rng = pd.date_range(start, end, freq="1h")
876+
ts = pd.Series(np.ones(len(rng)), index=rng)
877+
878+
expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]
879+
expected = _create_series([23.0, 2.0], expected_ts)
880+
result = ts.resample("D", origin="start", offset="-2H").sum()
881+
tm.assert_series_equal(result, expected)
882+
883+
expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 21:00-06:00"]
884+
expected = _create_series([22.0, 3.0], expected_ts, freq="24H")
885+
result = ts.resample("24H", origin="start", offset="-2H").sum()
886+
tm.assert_series_equal(result, expected)
887+
888+
expected_ts = ["2013-11-02 02:00-05:00", "2013-11-03 02:00-06:00"]
889+
expected = _create_series([3.0, 22.0], expected_ts)
890+
result = ts.resample("D", origin="start", offset="2H").sum()
891+
tm.assert_series_equal(result, expected)
892+
893+
expected_ts = ["2013-11-02 23:00-05:00", "2013-11-03 23:00-06:00"]
894+
expected = _create_series([24.0, 1.0], expected_ts)
895+
result = ts.resample("D", origin="start", offset="-1H").sum()
896+
tm.assert_series_equal(result, expected)
897+
898+
expected_ts = ["2013-11-02 01:00-05:00", "2013-11-03 01:00:00-0500"]
899+
expected = _create_series([1.0, 24.0], expected_ts)
900+
result = ts.resample("D", origin="start", offset="1H").sum()
901+
tm.assert_series_equal(result, expected)
902+
903+
849904
def test_resample_daily_anchored():
850905
rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T")
851906
ts = Series(np.random.randn(len(rng)), index=rng)

0 commit comments

Comments
 (0)