From 0bc93b03e26c1be8661ab127ae468968e7b62f53 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 23:11:56 -0700 Subject: [PATCH 1/6] BUG: Handle AmbiguousTimeError in date rounding --- pandas/core/indexes/datetimelike.py | 33 ++++--- pandas/tests/series/test_datetime_values.py | 95 +++++++++++++-------- 2 files changed, 82 insertions(+), 46 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f8c07fe7cd21..bc6e7c10221e0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -99,6 +99,16 @@ class TimelikeOps(object): frequency like 'S' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times + Only relevant for DatetimeIndex Returns ------- @@ -168,7 +178,7 @@ class TimelikeOps(object): """ ) - def _round(self, freq, rounder): + def _round(self, freq, rounder, ambiguous): # round the local times values = _ensure_datetimelike_to_i8(self) result = round_ns(values, rounder, freq) @@ -180,19 +190,20 @@ def _round(self, freq, rounder): if 'tz' in attribs: attribs['tz'] = None return self._ensure_localized( - self._shallow_copy(result, **attribs)) + self._shallow_copy(result, **attribs), ambiguous + ) @Appender((_round_doc + _round_example).format(op="round")) - def round(self, freq, *args, **kwargs): - return self._round(freq, np.round) + def round(self, freq, ambiguous='raise'): + return self._round(freq, np.round, ambiguous) @Appender((_round_doc + _floor_example).format(op="floor")) - def floor(self, freq): - return self._round(freq, np.floor) + def floor(self, freq, ambiguous='raise'): + return self._round(freq, np.floor, ambiguous) @Appender((_round_doc + _ceil_example).format(op="ceil")) - def ceil(self, freq): - return self._round(freq, np.ceil) + def ceil(self, freq, ambiguous='raise'): + return self._round(freq, np.ceil, ambiguous) class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): @@ -264,7 +275,7 @@ def _evaluate_compare(self, other, op): except TypeError: return result - def _ensure_localized(self, result): + def _ensure_localized(self, result, ambiguous='raise'): """ ensure that we are re-localized @@ -274,6 +285,8 @@ def _ensure_localized(self, result): Parameters ---------- result : DatetimeIndex / i8 ndarray + ambiguous : str, bool, or bool-ndarray + default 'raise' Returns ------- @@ -284,7 +297,7 @@ def _ensure_localized(self, result): if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=ambiguous) return result def _box_values_as_index(self): diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 5b45c6003a005..fee2323310b9c 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -5,6 +5,7 @@ import calendar import unicodedata import pytest +import pytz from datetime import datetime, time, date @@ -95,42 +96,6 @@ def compare(s, name): expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) - # round - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.round('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # round with tz - result = (s.dt.tz_localize('UTC') - .dt.tz_convert('US/Eastern') - .dt.round('D')) - exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']).tz_localize('US/Eastern') - expected = Series(exp_values, name='xxx') - tm.assert_series_equal(result, expected) - - # floor - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.floor('D') - expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # ceil - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.ceil('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-02']), name='xxx') - tm.assert_series_equal(result, expected) - # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') @@ -261,6 +226,64 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 + @pytest.mark.parametrize('method, dates', [ + ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], + ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], + ['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']] + ]) + def test_dt_round(self, method, dates): + # round + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = getattr(s.dt, method)('D') + expected = Series(pd.to_datetime(dates), name='xxx') + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = (s.dt.tz_localize('UTC') + .dt.tz_convert('US/Eastern') + .dt.round('D')) + + exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', + '2012-01-01']).tz_localize('US/Eastern') + expected = Series(exp_values, name='xxx') + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) + def test_dt_round_tz_ambiguous(self, method): + # GH 18946 round near DST + df1 = pd.DataFrame([ + pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True), + pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True), + pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True) + ], + columns=['date']) + df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid') + # infer + result = getattr(df1.date.dt, method)('H', ambiguous='infer') + expected = df1['date'] + tm.assert_series_equal(result, expected) + + # bool-array + result = getattr(df1.date.dt, method)( + 'H', ambiguous=[True, False, False] + ) + tm.assert_series_equal(result, expected) + + # NaT + result = getattr(df1.date.dt, method)('H', ambiguous='NaT') + expected = df1['date'].copy() + expected.iloc[0:2] = pd.NaT + tm.assert_series_equal(result, expected) + + # raise + with pytest.raises(pytz.AmbiguousTimeError): + getattr(df1.date.dt, method)('H', ambiguous='raise') + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2) From bba384d0d0e590f8d4519669e8a3ecc11788abeb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 23:37:09 -0700 Subject: [PATCH 2/6] fix timestamp --- pandas/_libs/tslibs/nattype.pyx | 23 +++++++++++ pandas/_libs/tslibs/timestamps.pyx | 39 +++++++++++++++---- .../tests/scalar/timestamp/test_unary_ops.py | 22 +++++++++++ 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..4bb59efa0de1c 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -478,6 +478,11 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the rounding resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time Raises ------ @@ -490,6 +495,15 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the flooring resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + Raises + ------ + ValueError if the freq cannot be converted """) ceil = _make_nat_func('ceil', # noqa:E128 """ @@ -498,6 +512,15 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the ceiling resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + Raises + ------ + ValueError if the freq cannot be converted """) tz_convert = _make_nat_func('tz_convert', # noqa:E128 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 52343593d1cc1..8e1976ed371a8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -656,7 +656,7 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) - def _round(self, freq, rounder): + def _round(self, freq, rounder, ambiguous='raise'): if self.tz is not None: value = self.tz_localize(None).value else: @@ -668,10 +668,10 @@ class Timestamp(_Timestamp): r = round_ns(value, rounder, freq)[0] result = Timestamp(r, unit='ns') if self.tz is not None: - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=ambiguous) return result - def round(self, freq): + def round(self, freq, ambiguous='raise'): """ Round the Timestamp to the specified resolution @@ -682,32 +682,55 @@ class Timestamp(_Timestamp): Parameters ---------- freq : a freq string indicating the rounding resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time Raises ------ ValueError if the freq cannot be converted """ - return self._round(freq, np.round) + return self._round(freq, np.round, ambiguous) - def floor(self, freq): + def floor(self, freq, ambiguous='raise'): """ return a new Timestamp floored to this resolution Parameters ---------- freq : a freq string indicating the flooring resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + Raises + ------ + ValueError if the freq cannot be converted """ - return self._round(freq, np.floor) + return self._round(freq, np.floor, ambiguous) - def ceil(self, freq): + def ceil(self, freq, ambiguous='raise'): """ return a new Timestamp ceiled to this resolution Parameters ---------- freq : a freq string indicating the ceiling resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + Raises + ------ + ValueError if the freq cannot be converted """ - return self._round(freq, np.ceil) + return self._round(freq, np.ceil, ambiguous) @property def tz(self): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index bf41840c58ded..f83aa31edf95a 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -132,6 +132,28 @@ def test_floor(self): expected = Timestamp('20130101') assert result == expected + @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) + def test_round_dst_border(self, method): + # GH 18946 round near DST + ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert( + 'Europe/Madrid' + ) + # + result = getattr(ts, method)('H', ambiguous=True) + assert result == ts + + result = getattr(ts, method)('H', ambiguous=False) + expected = Timestamp('2017-10-29 01:00:00', tz='UTC').tz_convert( + 'Europe/Madrid' + ) + assert result == expected + + result = getattr(ts, method)('H', ambiguous='NaT') + assert result is NaT + + with pytest.raises(pytz.AmbiguousTimeError): + getattr(ts, method)('H', ambiguous='raise') + # -------------------------------------------------------------- # Timestamp.replace From a71afb9ae027be5d80e4b9bc89d4bbe596329b02 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 23:43:41 -0700 Subject: [PATCH 3/6] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fb7af00f61534..5f0f34bf6e259 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -182,6 +182,7 @@ Other Enhancements - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) +- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). From 1c3d193b22b903eb915dcd0a68032cdb34ded2de Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 09:31:25 -0700 Subject: [PATCH 4/6] Add versionadded --- pandas/_libs/tslibs/nattype.pyx | 6 ++++++ pandas/_libs/tslibs/timestamps.pyx | 6 ++++++ pandas/core/indexes/datetimelike.py | 2 ++ 3 files changed, 14 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 4bb59efa0de1c..09b5dab956341 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -483,6 +483,8 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 Raises ------ @@ -500,6 +502,8 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 Raises ------ @@ -517,6 +521,8 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 Raises ------ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8e1976ed371a8..e985a519c3046 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -688,6 +688,8 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + .. versionadded:: 0.24.0 + Raises ------ ValueError if the freq cannot be converted @@ -707,6 +709,8 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + .. versionadded:: 0.24.0 + Raises ------ ValueError if the freq cannot be converted @@ -726,6 +730,8 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + .. versionadded:: 0.24.0 + Raises ------ ValueError if the freq cannot be converted diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bc6e7c10221e0..03941c3844317 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -109,6 +109,8 @@ class TimelikeOps(object): - 'raise' will raise an AmbiguousTimeError if there are ambiguous times Only relevant for DatetimeIndex + + .. versionadded:: 0.24.0 Returns ------- From 71a0ddb3b6373464c9f28c9a2880c0927293f340 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 11:02:16 -0700 Subject: [PATCH 5/6] Remove whitespace --- pandas/_libs/tslibs/nattype.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 5b5e8f98f28a5..ae4f9c821b5d1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -482,7 +482,7 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - + .. versionadded:: 0.24.0 Raises @@ -501,7 +501,7 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - + .. versionadded:: 0.24.0 Raises @@ -520,7 +520,7 @@ class NaTType(_NaT): that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - + .. versionadded:: 0.24.0 Raises From ba7eddde9652c2ed58dae2413e9bc8c4f0a0aa86 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 13:18:44 -0700 Subject: [PATCH 6/6] remove whitespace --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 03941c3844317..578167a7db500 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -109,7 +109,7 @@ class TimelikeOps(object): - 'raise' will raise an AmbiguousTimeError if there are ambiguous times Only relevant for DatetimeIndex - + .. versionadded:: 0.24.0 Returns