Skip to content

Commit 3970153

Browse files
authored
BUG: fix origin epoch when freq is Day and harmonize epoch between timezones (#34474)
1 parent 4743cc9 commit 3970153

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

pandas/core/resample.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1693,11 +1693,15 @@ def _get_timestamp_range_edges(
16931693
-------
16941694
A tuple of length 2, containing the adjusted pd.Timestamp objects.
16951695
"""
1696-
index_tz = first.tz
1697-
if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):
1698-
raise ValueError("The origin must have the same timezone as the index.")
1699-
17001696
if isinstance(freq, Tick):
1697+
index_tz = first.tz
1698+
if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):
1699+
raise ValueError("The origin must have the same timezone as the index.")
1700+
elif origin == "epoch":
1701+
# set the epoch based on the timezone to have similar bins results when
1702+
# resampling on the same kind of indexes on different timezones
1703+
origin = Timestamp("1970-01-01", tz=index_tz)
1704+
17011705
if isinstance(freq, Day):
17021706
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
17031707
# might contain a DST transition (23H, 24H, or 25H).

pandas/tests/resample/test_datetime_index.py

+28
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,34 @@ def test_resample_origin_with_tz():
846846
ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean()
847847

848848

849+
def test_resample_origin_epoch_with_tz_day_vs_24h():
850+
# GH 34474
851+
start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500"
852+
rng = pd.date_range(start, end, freq="7min")
853+
random_values = np.random.randn(len(rng))
854+
ts_1 = pd.Series(random_values, index=rng)
855+
856+
result_1 = ts_1.resample("D", origin="epoch").mean()
857+
result_2 = ts_1.resample("24H", origin="epoch").mean()
858+
tm.assert_series_equal(result_1, result_2)
859+
860+
# check that we have the same behavior with epoch even if we are not timezone aware
861+
ts_no_tz = ts_1.tz_localize(None)
862+
result_3 = ts_no_tz.resample("D", origin="epoch").mean()
863+
result_4 = ts_no_tz.resample("24H", origin="epoch").mean()
864+
tm.assert_series_equal(result_1, result_3.tz_localize(rng.tz), check_freq=False)
865+
tm.assert_series_equal(result_1, result_4.tz_localize(rng.tz), check_freq=False)
866+
867+
# check that we have the similar results with two different timezones (+2H and +5H)
868+
start, end = "2000-10-01 23:30:00+0200", "2000-12-02 00:30:00+0200"
869+
rng = pd.date_range(start, end, freq="7min")
870+
ts_2 = pd.Series(random_values, index=rng)
871+
result_5 = ts_2.resample("D", origin="epoch").mean()
872+
result_6 = ts_2.resample("24H", origin="epoch").mean()
873+
tm.assert_series_equal(result_1.tz_localize(None), result_5.tz_localize(None))
874+
tm.assert_series_equal(result_1.tz_localize(None), result_6.tz_localize(None))
875+
876+
849877
def test_resample_origin_with_day_freq_on_dst():
850878
# GH 31809
851879
tz = "America/Chicago"

0 commit comments

Comments
 (0)