diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 487d5d0d2accd..de4d33789105a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -182,6 +182,7 @@ Other Enhancements - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) +- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index fd8486f690745..ae4f9c821b5d1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -477,6 +477,13 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the rounding resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 Raises ------ @@ -489,6 +496,17 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the flooring resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted """) ceil = _make_nat_func('ceil', # noqa:E128 """ @@ -497,6 +515,17 @@ class NaTType(_NaT): Parameters ---------- freq : a freq string indicating the ceiling resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted """) tz_convert = _make_nat_func('tz_convert', # noqa:E128 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 52343593d1cc1..e985a519c3046 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -656,7 +656,7 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) - def _round(self, freq, rounder): + def _round(self, freq, rounder, ambiguous='raise'): if self.tz is not None: value = self.tz_localize(None).value else: @@ -668,10 +668,10 @@ class Timestamp(_Timestamp): r = round_ns(value, rounder, freq)[0] result = Timestamp(r, unit='ns') if self.tz is not None: - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=ambiguous) return result - def round(self, freq): + def round(self, freq, ambiguous='raise'): """ Round the Timestamp to the specified resolution @@ -682,32 +682,61 @@ class Timestamp(_Timestamp): Parameters ---------- freq : a freq string indicating the rounding resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 Raises ------ ValueError if the freq cannot be converted """ - return self._round(freq, np.round) + return self._round(freq, np.round, ambiguous) - def floor(self, freq): + def floor(self, freq, ambiguous='raise'): """ return a new Timestamp floored to this resolution Parameters ---------- freq : a freq string indicating the flooring resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted """ - return self._round(freq, np.floor) + return self._round(freq, np.floor, ambiguous) - def ceil(self, freq): + def ceil(self, freq, ambiguous='raise'): """ return a new Timestamp ceiled to this resolution Parameters ---------- freq : a freq string indicating the ceiling resolution + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted """ - return self._round(freq, np.ceil) + return self._round(freq, np.ceil, ambiguous) @property def tz(self): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f8c07fe7cd21..578167a7db500 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -99,6 +99,18 @@ class TimelikeOps(object): frequency like 'S' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times + Only relevant for DatetimeIndex + + .. versionadded:: 0.24.0 Returns ------- @@ -168,7 +180,7 @@ class TimelikeOps(object): """ ) - def _round(self, freq, rounder): + def _round(self, freq, rounder, ambiguous): # round the local times values = _ensure_datetimelike_to_i8(self) result = round_ns(values, rounder, freq) @@ -180,19 +192,20 @@ def _round(self, freq, rounder): if 'tz' in attribs: attribs['tz'] = None return self._ensure_localized( - self._shallow_copy(result, **attribs)) + self._shallow_copy(result, **attribs), ambiguous + ) @Appender((_round_doc + _round_example).format(op="round")) - def round(self, freq, *args, **kwargs): - return self._round(freq, np.round) + def round(self, freq, ambiguous='raise'): + return self._round(freq, np.round, ambiguous) @Appender((_round_doc + _floor_example).format(op="floor")) - def floor(self, freq): - return self._round(freq, np.floor) + def floor(self, freq, ambiguous='raise'): + return self._round(freq, np.floor, ambiguous) @Appender((_round_doc + _ceil_example).format(op="ceil")) - def ceil(self, freq): - return self._round(freq, np.ceil) + def ceil(self, freq, ambiguous='raise'): + return self._round(freq, np.ceil, ambiguous) class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): @@ -264,7 +277,7 @@ def _evaluate_compare(self, other, op): except TypeError: return result - def _ensure_localized(self, result): + def _ensure_localized(self, result, ambiguous='raise'): """ ensure that we are re-localized @@ -274,6 +287,8 @@ def _ensure_localized(self, result): Parameters ---------- result : DatetimeIndex / i8 ndarray + ambiguous : str, bool, or bool-ndarray + default 'raise' Returns ------- @@ -284,7 +299,7 @@ def _ensure_localized(self, result): if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=ambiguous) return result def _box_values_as_index(self): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index bf41840c58ded..f83aa31edf95a 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -132,6 +132,28 @@ def test_floor(self): expected = Timestamp('20130101') assert result == expected + @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) + def test_round_dst_border(self, method): + # GH 18946 round near DST + ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert( + 'Europe/Madrid' + ) + # + result = getattr(ts, method)('H', ambiguous=True) + assert result == ts + + result = getattr(ts, method)('H', ambiguous=False) + expected = Timestamp('2017-10-29 01:00:00', tz='UTC').tz_convert( + 'Europe/Madrid' + ) + assert result == expected + + result = getattr(ts, method)('H', ambiguous='NaT') + assert result is NaT + + with pytest.raises(pytz.AmbiguousTimeError): + getattr(ts, method)('H', ambiguous='raise') + # -------------------------------------------------------------- # Timestamp.replace diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 5b45c6003a005..fee2323310b9c 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -5,6 +5,7 @@ import calendar import unicodedata import pytest +import pytz from datetime import datetime, time, date @@ -95,42 +96,6 @@ def compare(s, name): expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) - # round - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.round('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # round with tz - result = (s.dt.tz_localize('UTC') - .dt.tz_convert('US/Eastern') - .dt.round('D')) - exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']).tz_localize('US/Eastern') - expected = Series(exp_values, name='xxx') - tm.assert_series_equal(result, expected) - - # floor - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.floor('D') - expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # ceil - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.ceil('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-02']), name='xxx') - tm.assert_series_equal(result, expected) - # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') @@ -261,6 +226,64 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 + @pytest.mark.parametrize('method, dates', [ + ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], + ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], + ['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']] + ]) + def test_dt_round(self, method, dates): + # round + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = getattr(s.dt, method)('D') + expected = Series(pd.to_datetime(dates), name='xxx') + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = (s.dt.tz_localize('UTC') + .dt.tz_convert('US/Eastern') + .dt.round('D')) + + exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', + '2012-01-01']).tz_localize('US/Eastern') + expected = Series(exp_values, name='xxx') + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('method', ['ceil', 'round', 'floor']) + def test_dt_round_tz_ambiguous(self, method): + # GH 18946 round near DST + df1 = pd.DataFrame([ + pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True), + pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True), + pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True) + ], + columns=['date']) + df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid') + # infer + result = getattr(df1.date.dt, method)('H', ambiguous='infer') + expected = df1['date'] + tm.assert_series_equal(result, expected) + + # bool-array + result = getattr(df1.date.dt, method)( + 'H', ambiguous=[True, False, False] + ) + tm.assert_series_equal(result, expected) + + # NaT + result = getattr(df1.date.dt, method)('H', ambiguous='NaT') + expected = df1['date'].copy() + expected.iloc[0:2] = pd.NaT + tm.assert_series_equal(result, expected) + + # raise + with pytest.raises(pytz.AmbiguousTimeError): + getattr(df1.date.dt, method)('H', ambiguous='raise') + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2)