Skip to content

Commit f67b90d

Browse files
mroeschkejreback
authored andcommitted
BUG/ENH: Handle AmbiguousTimeError in date rounding (#22647)
1 parent 945bf75 commit f67b90d

File tree

6 files changed

+173
-54
lines changed

6 files changed

+173
-54
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ Other Enhancements
186186
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
187187
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
188188
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
189+
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
189190
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
190191
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
191192
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).

pandas/_libs/tslibs/nattype.pyx

+29
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,13 @@ class NaTType(_NaT):
477477
Parameters
478478
----------
479479
freq : a freq string indicating the rounding resolution
480+
ambiguous : bool, 'NaT', default 'raise'
481+
- bool contains flags to determine if time is dst or not (note
482+
that this flag is only applicable for ambiguous fall dst dates)
483+
- 'NaT' will return NaT for an ambiguous time
484+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
485+
486+
.. versionadded:: 0.24.0
480487
481488
Raises
482489
------
@@ -489,6 +496,17 @@ class NaTType(_NaT):
489496
Parameters
490497
----------
491498
freq : a freq string indicating the flooring resolution
499+
ambiguous : bool, 'NaT', default 'raise'
500+
- bool contains flags to determine if time is dst or not (note
501+
that this flag is only applicable for ambiguous fall dst dates)
502+
- 'NaT' will return NaT for an ambiguous time
503+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
504+
505+
.. versionadded:: 0.24.0
506+
507+
Raises
508+
------
509+
ValueError if the freq cannot be converted
492510
""")
493511
ceil = _make_nat_func('ceil', # noqa:E128
494512
"""
@@ -497,6 +515,17 @@ class NaTType(_NaT):
497515
Parameters
498516
----------
499517
freq : a freq string indicating the ceiling resolution
518+
ambiguous : bool, 'NaT', default 'raise'
519+
- bool contains flags to determine if time is dst or not (note
520+
that this flag is only applicable for ambiguous fall dst dates)
521+
- 'NaT' will return NaT for an ambiguous time
522+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
523+
524+
.. versionadded:: 0.24.0
525+
526+
Raises
527+
------
528+
ValueError if the freq cannot be converted
500529
""")
501530

502531
tz_convert = _make_nat_func('tz_convert', # noqa:E128

pandas/_libs/tslibs/timestamps.pyx

+37-8
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ class Timestamp(_Timestamp):
656656

657657
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
658658

659-
def _round(self, freq, rounder):
659+
def _round(self, freq, rounder, ambiguous='raise'):
660660
if self.tz is not None:
661661
value = self.tz_localize(None).value
662662
else:
@@ -668,10 +668,10 @@ class Timestamp(_Timestamp):
668668
r = round_ns(value, rounder, freq)[0]
669669
result = Timestamp(r, unit='ns')
670670
if self.tz is not None:
671-
result = result.tz_localize(self.tz)
671+
result = result.tz_localize(self.tz, ambiguous=ambiguous)
672672
return result
673673

674-
def round(self, freq):
674+
def round(self, freq, ambiguous='raise'):
675675
"""
676676
Round the Timestamp to the specified resolution
677677
@@ -682,32 +682,61 @@ class Timestamp(_Timestamp):
682682
Parameters
683683
----------
684684
freq : a freq string indicating the rounding resolution
685+
ambiguous : bool, 'NaT', default 'raise'
686+
- bool contains flags to determine if time is dst or not (note
687+
that this flag is only applicable for ambiguous fall dst dates)
688+
- 'NaT' will return NaT for an ambiguous time
689+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
690+
691+
.. versionadded:: 0.24.0
685692
686693
Raises
687694
------
688695
ValueError if the freq cannot be converted
689696
"""
690-
return self._round(freq, np.round)
697+
return self._round(freq, np.round, ambiguous)
691698

692-
def floor(self, freq):
699+
def floor(self, freq, ambiguous='raise'):
693700
"""
694701
return a new Timestamp floored to this resolution
695702
696703
Parameters
697704
----------
698705
freq : a freq string indicating the flooring resolution
706+
ambiguous : bool, 'NaT', default 'raise'
707+
- bool contains flags to determine if time is dst or not (note
708+
that this flag is only applicable for ambiguous fall dst dates)
709+
- 'NaT' will return NaT for an ambiguous time
710+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
711+
712+
.. versionadded:: 0.24.0
713+
714+
Raises
715+
------
716+
ValueError if the freq cannot be converted
699717
"""
700-
return self._round(freq, np.floor)
718+
return self._round(freq, np.floor, ambiguous)
701719

702-
def ceil(self, freq):
720+
def ceil(self, freq, ambiguous='raise'):
703721
"""
704722
return a new Timestamp ceiled to this resolution
705723
706724
Parameters
707725
----------
708726
freq : a freq string indicating the ceiling resolution
727+
ambiguous : bool, 'NaT', default 'raise'
728+
- bool contains flags to determine if time is dst or not (note
729+
that this flag is only applicable for ambiguous fall dst dates)
730+
- 'NaT' will return NaT for an ambiguous time
731+
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
732+
733+
.. versionadded:: 0.24.0
734+
735+
Raises
736+
------
737+
ValueError if the freq cannot be converted
709738
"""
710-
return self._round(freq, np.ceil)
739+
return self._round(freq, np.ceil, ambiguous)
711740

712741
@property
713742
def tz(self):

pandas/core/indexes/datetimelike.py

+25-10
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,18 @@ class TimelikeOps(object):
9999
frequency like 'S' (second) not 'ME' (month end). See
100100
:ref:`frequency aliases <timeseries.offset_aliases>` for
101101
a list of possible `freq` values.
102+
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
103+
- 'infer' will attempt to infer fall dst-transition hours based on
104+
order
105+
- bool-ndarray where True signifies a DST time, False designates
106+
a non-DST time (note that this flag is only applicable for
107+
ambiguous times)
108+
- 'NaT' will return NaT where there are ambiguous times
109+
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
110+
times
111+
Only relevant for DatetimeIndex
112+
113+
.. versionadded:: 0.24.0
102114
103115
Returns
104116
-------
@@ -168,7 +180,7 @@ class TimelikeOps(object):
168180
"""
169181
)
170182

171-
def _round(self, freq, rounder):
183+
def _round(self, freq, rounder, ambiguous):
172184
# round the local times
173185
values = _ensure_datetimelike_to_i8(self)
174186
result = round_ns(values, rounder, freq)
@@ -180,19 +192,20 @@ def _round(self, freq, rounder):
180192
if 'tz' in attribs:
181193
attribs['tz'] = None
182194
return self._ensure_localized(
183-
self._shallow_copy(result, **attribs))
195+
self._shallow_copy(result, **attribs), ambiguous
196+
)
184197

185198
@Appender((_round_doc + _round_example).format(op="round"))
186-
def round(self, freq, *args, **kwargs):
187-
return self._round(freq, np.round)
199+
def round(self, freq, ambiguous='raise'):
200+
return self._round(freq, np.round, ambiguous)
188201

189202
@Appender((_round_doc + _floor_example).format(op="floor"))
190-
def floor(self, freq):
191-
return self._round(freq, np.floor)
203+
def floor(self, freq, ambiguous='raise'):
204+
return self._round(freq, np.floor, ambiguous)
192205

193206
@Appender((_round_doc + _ceil_example).format(op="ceil"))
194-
def ceil(self, freq):
195-
return self._round(freq, np.ceil)
207+
def ceil(self, freq, ambiguous='raise'):
208+
return self._round(freq, np.ceil, ambiguous)
196209

197210

198211
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
@@ -264,7 +277,7 @@ def _evaluate_compare(self, other, op):
264277
except TypeError:
265278
return result
266279

267-
def _ensure_localized(self, result):
280+
def _ensure_localized(self, result, ambiguous='raise'):
268281
"""
269282
ensure that we are re-localized
270283
@@ -274,6 +287,8 @@ def _ensure_localized(self, result):
274287
Parameters
275288
----------
276289
result : DatetimeIndex / i8 ndarray
290+
ambiguous : str, bool, or bool-ndarray
291+
default 'raise'
277292
278293
Returns
279294
-------
@@ -284,7 +299,7 @@ def _ensure_localized(self, result):
284299
if getattr(self, 'tz', None) is not None:
285300
if not isinstance(result, ABCIndexClass):
286301
result = self._simple_new(result)
287-
result = result.tz_localize(self.tz)
302+
result = result.tz_localize(self.tz, ambiguous=ambiguous)
288303
return result
289304

290305
def _box_values_as_index(self):

pandas/tests/scalar/timestamp/test_unary_ops.py

+22
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,28 @@ def test_floor(self):
132132
expected = Timestamp('20130101')
133133
assert result == expected
134134

135+
@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
136+
def test_round_dst_border(self, method):
137+
# GH 18946 round near DST
138+
ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert(
139+
'Europe/Madrid'
140+
)
141+
#
142+
result = getattr(ts, method)('H', ambiguous=True)
143+
assert result == ts
144+
145+
result = getattr(ts, method)('H', ambiguous=False)
146+
expected = Timestamp('2017-10-29 01:00:00', tz='UTC').tz_convert(
147+
'Europe/Madrid'
148+
)
149+
assert result == expected
150+
151+
result = getattr(ts, method)('H', ambiguous='NaT')
152+
assert result is NaT
153+
154+
with pytest.raises(pytz.AmbiguousTimeError):
155+
getattr(ts, method)('H', ambiguous='raise')
156+
135157
# --------------------------------------------------------------
136158
# Timestamp.replace
137159

pandas/tests/series/test_datetime_values.py

+59-36
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import calendar
66
import unicodedata
77
import pytest
8+
import pytz
89

910
from datetime import datetime, time, date
1011

@@ -95,42 +96,6 @@ def compare(s, name):
9596
expected = Series(exp_values, index=s.index, name='xxx')
9697
tm.assert_series_equal(result, expected)
9798

98-
# round
99-
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
100-
'2012-01-01 12:01:00',
101-
'2012-01-01 08:00:00']), name='xxx')
102-
result = s.dt.round('D')
103-
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
104-
'2012-01-01']), name='xxx')
105-
tm.assert_series_equal(result, expected)
106-
107-
# round with tz
108-
result = (s.dt.tz_localize('UTC')
109-
.dt.tz_convert('US/Eastern')
110-
.dt.round('D'))
111-
exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
112-
'2012-01-01']).tz_localize('US/Eastern')
113-
expected = Series(exp_values, name='xxx')
114-
tm.assert_series_equal(result, expected)
115-
116-
# floor
117-
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
118-
'2012-01-01 12:01:00',
119-
'2012-01-01 08:00:00']), name='xxx')
120-
result = s.dt.floor('D')
121-
expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01',
122-
'2012-01-01']), name='xxx')
123-
tm.assert_series_equal(result, expected)
124-
125-
# ceil
126-
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
127-
'2012-01-01 12:01:00',
128-
'2012-01-01 08:00:00']), name='xxx')
129-
result = s.dt.ceil('D')
130-
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
131-
'2012-01-02']), name='xxx')
132-
tm.assert_series_equal(result, expected)
133-
13499
# datetimeindex with tz
135100
s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
136101
name='xxx')
@@ -261,6 +226,64 @@ def get_dir(s):
261226
with pytest.raises(com.SettingWithCopyError):
262227
s.dt.hour[0] = 5
263228

229+
@pytest.mark.parametrize('method, dates', [
230+
['round', ['2012-01-02', '2012-01-02', '2012-01-01']],
231+
['floor', ['2012-01-01', '2012-01-01', '2012-01-01']],
232+
['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']]
233+
])
234+
def test_dt_round(self, method, dates):
235+
# round
236+
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
237+
'2012-01-01 12:01:00',
238+
'2012-01-01 08:00:00']), name='xxx')
239+
result = getattr(s.dt, method)('D')
240+
expected = Series(pd.to_datetime(dates), name='xxx')
241+
tm.assert_series_equal(result, expected)
242+
243+
def test_dt_round_tz(self):
244+
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
245+
'2012-01-01 12:01:00',
246+
'2012-01-01 08:00:00']), name='xxx')
247+
result = (s.dt.tz_localize('UTC')
248+
.dt.tz_convert('US/Eastern')
249+
.dt.round('D'))
250+
251+
exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
252+
'2012-01-01']).tz_localize('US/Eastern')
253+
expected = Series(exp_values, name='xxx')
254+
tm.assert_series_equal(result, expected)
255+
256+
@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
257+
def test_dt_round_tz_ambiguous(self, method):
258+
# GH 18946 round near DST
259+
df1 = pd.DataFrame([
260+
pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True),
261+
pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True),
262+
pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True)
263+
],
264+
columns=['date'])
265+
df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid')
266+
# infer
267+
result = getattr(df1.date.dt, method)('H', ambiguous='infer')
268+
expected = df1['date']
269+
tm.assert_series_equal(result, expected)
270+
271+
# bool-array
272+
result = getattr(df1.date.dt, method)(
273+
'H', ambiguous=[True, False, False]
274+
)
275+
tm.assert_series_equal(result, expected)
276+
277+
# NaT
278+
result = getattr(df1.date.dt, method)('H', ambiguous='NaT')
279+
expected = df1['date'].copy()
280+
expected.iloc[0:2] = pd.NaT
281+
tm.assert_series_equal(result, expected)
282+
283+
# raise
284+
with pytest.raises(pytz.AmbiguousTimeError):
285+
getattr(df1.date.dt, method)('H', ambiguous='raise')
286+
264287
def test_dt_namespace_accessor_categorical(self):
265288
# GH 19468
266289
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)

0 commit comments

Comments
 (0)