From 277138126427b52d36e2cf6ccc58c2bf9f6945e7 Mon Sep 17 00:00:00 2001 From: Mathis Felardos Date: Sat, 30 May 2020 01:17:57 +0200 Subject: [PATCH 1/2] BUG: fix origin epoch when freq is Day and harmonize epoch between timezones --- pandas/core/resample.py | 3 +++ pandas/tests/resample/test_datetime_index.py | 28 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4a4c9a1d7434b..8bcc7a6d5fb9d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1696,6 +1696,9 @@ def _get_timestamp_range_edges( index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") + if origin == "epoch": + # set the epoch based on the timezone to have similar result between timezones + origin = Timestamp("1970-01-01", tz=index_tz) if isinstance(freq, Tick): if isinstance(freq, Day): diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index fe005801aaa53..9909e554aa14d 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -846,6 +846,34 @@ def test_resample_origin_with_tz(): ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean() +def test_resample_origin_epoch_with_tz_day_vs_24h(): + # GH 34474 + start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500" + rng = pd.date_range(start, end, freq="7min") + random_values = np.random.randn(len(rng)) + ts_1 = pd.Series(random_values, index=rng) + + result_1 = ts_1.resample("D", origin="epoch").mean() + result_2 = ts_1.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1, result_2) + + # check that we have the same behavior with epoch even if we are not timezone aware + ts_no_tz = ts_1.tz_localize(None) + result_3 = ts_no_tz.resample("D", origin="epoch").mean() + result_4 = ts_no_tz.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1, result_3.tz_localize(rng.tz), check_freq=False) + tm.assert_series_equal(result_1, result_4.tz_localize(rng.tz), check_freq=False) + + # check that we have the similar results with two different timezones (+2H and +5H) + start, end = "2000-10-01 23:30:00+0200", "2000-12-02 00:30:00+0200" + rng = pd.date_range(start, end, freq="7min") + ts_2 = pd.Series(random_values, index=rng) + result_5 = ts_2.resample("D", origin="epoch").mean() + result_6 = ts_2.resample("24H", origin="epoch").mean() + tm.assert_series_equal(result_1.tz_localize(None), result_5.tz_localize(None)) + tm.assert_series_equal(result_1.tz_localize(None), result_6.tz_localize(None)) + + def test_resample_origin_with_day_freq_on_dst(): # GH 31809 tz = "America/Chicago" From c78e29b33df54f83c4c838a27a38fe3df9e0c335 Mon Sep 17 00:00:00 2001 From: Mathis Felardos Date: Mon, 1 Jun 2020 08:47:06 +0200 Subject: [PATCH 2/2] CLN: small refactor in resample.py --- pandas/core/resample.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 8bcc7a6d5fb9d..5df80645c2b5d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1693,14 +1693,15 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ - index_tz = first.tz - if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): - raise ValueError("The origin must have the same timezone as the index.") - if origin == "epoch": - # set the epoch based on the timezone to have similar result between timezones - origin = Timestamp("1970-01-01", tz=index_tz) - if isinstance(freq, Tick): + index_tz = first.tz + if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): + raise ValueError("The origin must have the same timezone as the index.") + elif origin == "epoch": + # set the epoch based on the timezone to have similar bins results when + # resampling on the same kind of indexes on different timezones + origin = Timestamp("1970-01-01", tz=index_tz) + if isinstance(freq, Day): # _adjust_dates_anchored assumes 'D' means 24H, but first/last # might contain a DST transition (23H, 24H, or 25H).