From 7730b266a7bbfa883f3b4930cda320346ac7ce66 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 28 Jun 2019 04:02:34 +0300 Subject: [PATCH 1/5] ENH: allow non-Tick offsets in DatetimeIndex.ceil/floor --- pandas/core/arrays/datetimelike.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 93166759d8dbd..f5c098605d18f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -299,7 +299,28 @@ class TimelikeOps: def _round(self, freq, mode, ambiguous, nonexistent): # round the local times values = _ensure_datetimelike_to_i8(self) - result = round_nsint64(values, mode, freq) + try: + result = round_nsint64(values, mode, freq) + except ValueError as e: + # non-fixed offset, cannot do ns calculation. + # user freq.rollforward/back machinery instead + offset = frequencies.to_offset(freq) + if "non-fixed" in str(e): + if mode == RoundTo.PLUS_INFTY: + result = (self + offset).asi8 + elif mode == RoundTo.MINUS_INFTY: + result = (self - offset).asi8 + elif mode == RoundTo.NEAREST_HALF_EVEN: + msg = ("round only supported fixed offsets " + "(i.e. 'Day' is ok, 'MonthEnd' is not). " + "You may use snap or floor/ceil if applicable.") + raise ValueError(msg) + # upper = (self + offset).asi8 + # lower = (self - offset).asi8 + # mask = (upper-values) <= (values-lower) + # result = np.where(mask, lower, upper).asi8 + else: + raise e result = self._maybe_mask_results(result, fill_value=NaT) dtype = self.dtype From e48c5b099d52fc6fc4e83d89d4ee24d585f77abf Mon Sep 17 00:00:00 2001 From: pilkibun Date: Sun, 30 Jun 2019 20:25:40 +0300 Subject: [PATCH 2/5] Cleanup --- pandas/core/arrays/datetimelike.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f5c098605d18f..26a4a14cd8706 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -311,14 +311,11 @@ def _round(self, freq, mode, ambiguous, nonexistent): elif mode == RoundTo.MINUS_INFTY: result = (self - offset).asi8 elif mode == RoundTo.NEAREST_HALF_EVEN: - msg = ("round only supported fixed offsets " + msg = ("round only supports fixed offsets " "(i.e. 'Day' is ok, 'MonthEnd' is not). " - "You may use snap or floor/ceil if applicable.") + "You can use dti.snap or floor/ceil if " + "applicable.") raise ValueError(msg) - # upper = (self + offset).asi8 - # lower = (self - offset).asi8 - # mask = (upper-values) <= (values-lower) - # result = np.where(mask, lower, upper).asi8 else: raise e result = self._maybe_mask_results(result, fill_value=NaT) From 49852ee94fbf1afc67827b1c6b9477802a1902d0 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Sun, 30 Jun 2019 20:53:30 +0300 Subject: [PATCH 3/5] Use to_period(offset).to_timestamp --- pandas/core/arrays/datetimelike.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 26a4a14cd8706..6e3c579ed7b17 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -302,14 +302,14 @@ def _round(self, freq, mode, ambiguous, nonexistent): try: result = round_nsint64(values, mode, freq) except ValueError as e: - # non-fixed offset, cannot do ns calculation. - # user freq.rollforward/back machinery instead - offset = frequencies.to_offset(freq) if "non-fixed" in str(e): + offset = frequencies.to_offset(freq) if mode == RoundTo.PLUS_INFTY: - result = (self + offset).asi8 + result = self.to_period(offset) \ + .to_timestamp(how='end').asi8 elif mode == RoundTo.MINUS_INFTY: - result = (self - offset).asi8 + result = self.to_period(offset) \ + .to_timestamp(how='start').asi8 elif mode == RoundTo.NEAREST_HALF_EVEN: msg = ("round only supports fixed offsets " "(i.e. 'Day' is ok, 'MonthEnd' is not). " @@ -318,6 +318,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): raise ValueError(msg) else: raise e + result = self._maybe_mask_results(result, fill_value=NaT) dtype = self.dtype From 831cda66bc4a076966361a06b3c24ebfa336784e Mon Sep 17 00:00:00 2001 From: pilkibun Date: Sun, 30 Jun 2019 20:53:38 +0300 Subject: [PATCH 4/5] Add tests --- pandas/tests/arrays/test_datetimes.py | 63 +++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index c7c0e1180ce46..df57182da3d3a 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -301,3 +301,66 @@ def test_min_max_empty(self, skipna, tz): result = arr.max(skipna=skipna) assert result is pd.NaT + + @pytest.mark.parametrize( + 'op, freq, dates, expected_dates', + [ + ( + 'floor', + 'M', + ("2001-02-01", + pd.Timestamp("2001-02-14 12:00") - pd.offsets.Nano(), + "2001-02-14 12:00", + "2001-02-15", + pd.Timestamp("2001-03-01") - pd.offsets.Nano(), + "2001-03-01" + ), + [ + "2001-02-01", + "2001-02-01", + "2001-02-01", + "2001-02-01", + "2001-02-01", + "2001-03-01" + ] + ), + ( + 'ceil', + 'M', + ("2001-02-01", + pd.Timestamp( + "2001-02-14 12:00") - pd.offsets.Nano(), + "2001-02-14 12:00", + "2001-02-15", + pd.Timestamp("2001-03-01") - pd.offsets.Nano(), + "2001-03-01" + ), + [ + pd.Timestamp("2001-03-01") - + pd.offsets.Nano(), + pd.Timestamp("2001-03-01") - + pd.offsets.Nano(), + pd.Timestamp("2001-03-01") - + pd.offsets.Nano(), + pd.Timestamp("2001-03-01") - + pd.offsets.Nano(), + pd.Timestamp("2001-03-01") - + pd.offsets.Nano(), + pd.Timestamp("2001-04-01") - + pd.offsets.Nano(), + ] + ) + ] + ) + def test_ceil_floor(self, op, freq, dates, expected_dates): + dta = DatetimeArray._from_sequence(dates) + dta[1] -= pd.offsets.Nano() + dta[-2] -= pd.offsets.Nano() + dti = pd.DatetimeIndex(dta) + result = getattr(dti, op)(freq) + expected = pd.DatetimeIndex(expected_dates) + tm.assert_index_equal(result, expected) + + # Idempotent + result = getattr(result, op)(freq) + tm.assert_index_equal(result, expected) From 46bafc84fd6a7383fd634b06dc4e14bfadb8e955 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Sun, 30 Jun 2019 21:11:00 +0300 Subject: [PATCH 5/5] Work on tests --- pandas/tests/arrays/test_datetimes.py | 71 ++++++++++++++++++++------- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index df57182da3d3a..1c1990a037ee7 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -11,9 +11,10 @@ import pandas as pd from pandas.core.arrays import DatetimeArray from pandas.core.arrays.datetimes import sequence_to_dt64ns +from pandas.tseries.offsets import Nano +from pandas import Timestamp import pandas.util.testing as tm - class TestDatetimeArrayConstructor: def test_only_1dim_accepted(self): @@ -309,10 +310,49 @@ def test_min_max_empty(self, skipna, tz): 'floor', 'M', ("2001-02-01", - pd.Timestamp("2001-02-14 12:00") - pd.offsets.Nano(), + Timestamp("2001-02-14 12:00") - Nano(), + "2001-02-14 12:00", + "2001-02-15", + Timestamp("2001-03-01") - Nano(), + "2001-03-01" + ), + [ + Timestamp("2001-02-01") - Nano(), + Timestamp("2001-02-01") - Nano(), + Timestamp("2001-02-01") - Nano(), + Timestamp("2001-02-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + ] + ), + ( + 'ceil', + 'M', + ("2001-02-01", + Timestamp( + "2001-02-14 12:00") - Nano(), "2001-02-14 12:00", "2001-02-15", - pd.Timestamp("2001-03-01") - pd.offsets.Nano(), + Timestamp("2001-03-01") - Nano(), + "2001-03-01" + ), + [ + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-03-01") - Nano(), + Timestamp("2001-04-01") - Nano(), + ] + ), + ( + 'floor', + 'MS', + ("2001-02-01", + Timestamp("2001-02-14 12:00") - Nano(), + "2001-02-14 12:00", + "2001-02-15", + Timestamp("2001-03-01") - Nano(), "2001-03-01" ), [ @@ -326,28 +366,21 @@ def test_min_max_empty(self, skipna, tz): ), ( 'ceil', - 'M', + 'MS', ("2001-02-01", - pd.Timestamp( - "2001-02-14 12:00") - pd.offsets.Nano(), + Timestamp("2001-02-14 12:00") - Nano(), "2001-02-14 12:00", "2001-02-15", - pd.Timestamp("2001-03-01") - pd.offsets.Nano(), + Timestamp("2001-03-01") - Nano(), "2001-03-01" ), [ - pd.Timestamp("2001-03-01") - - pd.offsets.Nano(), - pd.Timestamp("2001-03-01") - - pd.offsets.Nano(), - pd.Timestamp("2001-03-01") - - pd.offsets.Nano(), - pd.Timestamp("2001-03-01") - - pd.offsets.Nano(), - pd.Timestamp("2001-03-01") - - pd.offsets.Nano(), - pd.Timestamp("2001-04-01") - - pd.offsets.Nano(), + "2001-02-01", + "2001-03-01", + "2001-03-01", + "2001-03-01", + "2001-03-01", + "2001-03-01", ] ) ]