Skip to content

Commit cd17115

Browse files
mroeschkePingviinituutti
authored andcommitted
BUG/ENH: Handle NonexistentTimeError in date rounding (pandas-dev#23406)
1 parent 0c6811c commit cd17115

File tree

7 files changed

+158
-27
lines changed

7 files changed

+158
-27
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Other Enhancements
227227
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
228228
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
229229
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
230+
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`22647`)
230231
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
231232
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
232233
- :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``,

pandas/_libs/tslibs/conversion.pyx

+10-6
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@ from np_datetime import OutOfBoundsDatetime
2727

2828
from util cimport (is_string_object,
2929
is_datetime64_object,
30-
is_integer_object, is_float_object, is_array)
30+
is_integer_object, is_float_object)
3131

3232
from timedeltas cimport cast_from_unit
3333
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
34-
treat_tz_as_dateutil, treat_tz_as_pytz,
3534
get_utcoffset, get_dst_info,
3635
get_timezone, maybe_get_tz, tz_compare)
3736
from parsing import parse_datetime_string
@@ -857,8 +856,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
857856
int64_t[:] deltas, idx_shifted
858857
ndarray ambiguous_array
859858
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
859+
Py_ssize_t delta_idx_offset, delta_idx
860860
int64_t *tdata
861-
int64_t v, left, right, val, v_left, v_right
861+
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
862862
ndarray[int64_t] result, result_a, result_b, dst_hours
863863
npy_datetimestruct dts
864864
bint infer_dst = False, is_dst = False, fill = False
@@ -1012,9 +1012,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
10121012
if shift:
10131013
# Shift the nonexistent time forward to the closest existing
10141014
# time
1015-
remaining_minutes = val % HOURS_NS
1016-
new_local = val + (HOURS_NS - remaining_minutes)
1017-
delta_idx = trans.searchsorted(new_local, side='right') - 1
1015+
remaining_mins = val % HOURS_NS
1016+
new_local = val + (HOURS_NS - remaining_mins)
1017+
delta_idx = trans.searchsorted(new_local, side='right')
1018+
# Need to subtract 1 from the delta_idx if the UTC offset of
1019+
# the target tz is greater than 0
1020+
delta_idx_offset = int(deltas[0] > 0)
1021+
delta_idx = delta_idx - delta_idx_offset
10181022
result[i] = new_local - deltas[delta_idx]
10191023
elif fill_nonexist:
10201024
result[i] = NPY_NAT

pandas/_libs/tslibs/nattype.pyx

+33
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,17 @@ class NaTType(_NaT):
485485
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
486486
487487
.. versionadded:: 0.24.0
488+
nonexistent : 'shift', 'NaT', default 'raise'
489+
A nonexistent time does not exist in a particular timezone
490+
where clocks moved forward due to DST.
491+
492+
- 'shift' will shift the nonexistent time forward to the closest
493+
existing time
494+
- 'NaT' will return NaT where there are nonexistent times
495+
- 'raise' will raise an NonExistentTimeError if there are
496+
nonexistent times
497+
498+
.. versionadded:: 0.24.0
488499
489500
Raises
490501
------
@@ -504,6 +515,17 @@ class NaTType(_NaT):
504515
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
505516
506517
.. versionadded:: 0.24.0
518+
nonexistent : 'shift', 'NaT', default 'raise'
519+
A nonexistent time does not exist in a particular timezone
520+
where clocks moved forward due to DST.
521+
522+
- 'shift' will shift the nonexistent time forward to the closest
523+
existing time
524+
- 'NaT' will return NaT where there are nonexistent times
525+
- 'raise' will raise an NonExistentTimeError if there are
526+
nonexistent times
527+
528+
.. versionadded:: 0.24.0
507529
508530
Raises
509531
------
@@ -523,6 +545,17 @@ class NaTType(_NaT):
523545
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
524546
525547
.. versionadded:: 0.24.0
548+
nonexistent : 'shift', 'NaT', default 'raise'
549+
A nonexistent time does not exist in a particular timezone
550+
where clocks moved forward due to DST.
551+
552+
- 'shift' will shift the nonexistent time forward to the closest
553+
existing time
554+
- 'NaT' will return NaT where there are nonexistent times
555+
- 'raise' will raise an NonExistentTimeError if there are
556+
nonexistent times
557+
558+
.. versionadded:: 0.24.0
526559
527560
Raises
528561
------

pandas/_libs/tslibs/timestamps.pyx

+45-8
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ class Timestamp(_Timestamp):
736736

737737
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
738738

739-
def _round(self, freq, mode, ambiguous='raise'):
739+
def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'):
740740
if self.tz is not None:
741741
value = self.tz_localize(None).value
742742
else:
@@ -748,10 +748,12 @@ class Timestamp(_Timestamp):
748748
r = round_nsint64(value, mode, freq)[0]
749749
result = Timestamp(r, unit='ns')
750750
if self.tz is not None:
751-
result = result.tz_localize(self.tz, ambiguous=ambiguous)
751+
result = result.tz_localize(
752+
self.tz, ambiguous=ambiguous, nonexistent=nonexistent
753+
)
752754
return result
753755

754-
def round(self, freq, ambiguous='raise'):
756+
def round(self, freq, ambiguous='raise', nonexistent='raise'):
755757
"""
756758
Round the Timestamp to the specified resolution
757759
@@ -769,14 +771,27 @@ class Timestamp(_Timestamp):
769771
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
770772
771773
.. versionadded:: 0.24.0
774+
nonexistent : 'shift', 'NaT', default 'raise'
775+
A nonexistent time does not exist in a particular timezone
776+
where clocks moved forward due to DST.
777+
778+
- 'shift' will shift the nonexistent time forward to the closest
779+
existing time
780+
- 'NaT' will return NaT where there are nonexistent times
781+
- 'raise' will raise an NonExistentTimeError if there are
782+
nonexistent times
783+
784+
.. versionadded:: 0.24.0
772785
773786
Raises
774787
------
775788
ValueError if the freq cannot be converted
776789
"""
777-
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
790+
return self._round(
791+
freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
792+
)
778793

779-
def floor(self, freq, ambiguous='raise'):
794+
def floor(self, freq, ambiguous='raise', nonexistent='raise'):
780795
"""
781796
return a new Timestamp floored to this resolution
782797
@@ -790,14 +805,25 @@ class Timestamp(_Timestamp):
790805
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
791806
792807
.. versionadded:: 0.24.0
808+
nonexistent : 'shift', 'NaT', default 'raise'
809+
A nonexistent time does not exist in a particular timezone
810+
where clocks moved forward due to DST.
811+
812+
- 'shift' will shift the nonexistent time forward to the closest
813+
existing time
814+
- 'NaT' will return NaT where there are nonexistent times
815+
- 'raise' will raise an NonExistentTimeError if there are
816+
nonexistent times
817+
818+
.. versionadded:: 0.24.0
793819
794820
Raises
795821
------
796822
ValueError if the freq cannot be converted
797823
"""
798-
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
824+
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
799825

800-
def ceil(self, freq, ambiguous='raise'):
826+
def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
801827
"""
802828
return a new Timestamp ceiled to this resolution
803829
@@ -811,12 +837,23 @@ class Timestamp(_Timestamp):
811837
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
812838
813839
.. versionadded:: 0.24.0
840+
nonexistent : 'shift', 'NaT', default 'raise'
841+
A nonexistent time does not exist in a particular timezone
842+
where clocks moved forward due to DST.
843+
844+
- 'shift' will shift the nonexistent time forward to the closest
845+
existing time
846+
- 'NaT' will return NaT where there are nonexistent times
847+
- 'raise' will raise an NonExistentTimeError if there are
848+
nonexistent times
849+
850+
.. versionadded:: 0.24.0
814851
815852
Raises
816853
------
817854
ValueError if the freq cannot be converted
818855
"""
819-
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
856+
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
820857

821858
@property
822859
def tz(self):

pandas/core/indexes/datetimelike.py

+27-10
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,17 @@ class TimelikeOps(object):
113113
Only relevant for DatetimeIndex
114114
115115
.. versionadded:: 0.24.0
116+
nonexistent : 'shift', 'NaT', default 'raise'
117+
A nonexistent time does not exist in a particular timezone
118+
where clocks moved forward due to DST.
119+
120+
- 'shift' will shift the nonexistent time forward to the closest
121+
existing time
122+
- 'NaT' will return NaT where there are nonexistent times
123+
- 'raise' will raise an NonExistentTimeError if there are
124+
nonexistent times
125+
126+
.. versionadded:: 0.24.0
116127
117128
Returns
118129
-------
@@ -182,7 +193,7 @@ class TimelikeOps(object):
182193
"""
183194
)
184195

185-
def _round(self, freq, mode, ambiguous):
196+
def _round(self, freq, mode, ambiguous, nonexistent):
186197
# round the local times
187198
values = _ensure_datetimelike_to_i8(self)
188199
result = round_nsint64(values, mode, freq)
@@ -193,20 +204,22 @@ def _round(self, freq, mode, ambiguous):
193204
if 'tz' in attribs:
194205
attribs['tz'] = None
195206
return self._ensure_localized(
196-
self._shallow_copy(result, **attribs), ambiguous
207+
self._shallow_copy(result, **attribs), ambiguous, nonexistent
197208
)
198209

199210
@Appender((_round_doc + _round_example).format(op="round"))
200-
def round(self, freq, ambiguous='raise'):
201-
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
211+
def round(self, freq, ambiguous='raise', nonexistent='raise'):
212+
return self._round(
213+
freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
214+
)
202215

203216
@Appender((_round_doc + _floor_example).format(op="floor"))
204-
def floor(self, freq, ambiguous='raise'):
205-
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
217+
def floor(self, freq, ambiguous='raise', nonexistent='raise'):
218+
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
206219

207220
@Appender((_round_doc + _ceil_example).format(op="ceil"))
208-
def ceil(self, freq, ambiguous='raise'):
209-
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
221+
def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
222+
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
210223

211224

212225
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
@@ -277,7 +290,8 @@ def _evaluate_compare(self, other, op):
277290
except TypeError:
278291
return result
279292

280-
def _ensure_localized(self, arg, ambiguous='raise', from_utc=False):
293+
def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
294+
from_utc=False):
281295
"""
282296
ensure that we are re-localized
283297
@@ -288,6 +302,7 @@ def _ensure_localized(self, arg, ambiguous='raise', from_utc=False):
288302
----------
289303
arg : DatetimeIndex / i8 ndarray
290304
ambiguous : str, bool, or bool-ndarray, default 'raise'
305+
nonexistent : str, default 'raise'
291306
from_utc : bool, default False
292307
If True, localize the i8 ndarray to UTC first before converting to
293308
the appropriate tz. If False, localize directly to the tz.
@@ -304,7 +319,9 @@ def _ensure_localized(self, arg, ambiguous='raise', from_utc=False):
304319
if from_utc:
305320
arg = arg.tz_localize('UTC').tz_convert(self.tz)
306321
else:
307-
arg = arg.tz_localize(self.tz, ambiguous=ambiguous)
322+
arg = arg.tz_localize(
323+
self.tz, ambiguous=ambiguous, nonexistent=nonexistent
324+
)
308325
return arg
309326

310327
def _box_values_as_index(self):

pandas/tests/scalar/timestamp/test_unary_ops.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ def test_floor(self):
134134
assert result == expected
135135

136136
@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
137-
def test_round_dst_border(self, method):
138-
# GH 18946 round near DST
137+
def test_round_dst_border_ambiguous(self, method):
138+
# GH 18946 round near "fall back" DST
139139
ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert(
140140
'Europe/Madrid'
141141
)
@@ -155,6 +155,24 @@ def test_round_dst_border(self, method):
155155
with pytest.raises(pytz.AmbiguousTimeError):
156156
getattr(ts, method)('H', ambiguous='raise')
157157

158+
@pytest.mark.parametrize('method, ts_str, freq', [
159+
['ceil', '2018-03-11 01:59:00-0600', '5min'],
160+
['round', '2018-03-11 01:59:00-0600', '5min'],
161+
['floor', '2018-03-11 03:01:00-0500', '2H']])
162+
def test_round_dst_border_nonexistent(self, method, ts_str, freq):
163+
# GH 23324 round near "spring forward" DST
164+
ts = Timestamp(ts_str, tz='America/Chicago')
165+
result = getattr(ts, method)(freq, nonexistent='shift')
166+
expected = Timestamp('2018-03-11 03:00:00', tz='America/Chicago')
167+
assert result == expected
168+
169+
result = getattr(ts, method)(freq, nonexistent='NaT')
170+
assert result is NaT
171+
172+
with pytest.raises(pytz.NonExistentTimeError,
173+
message='2018-03-11 02:00:00'):
174+
getattr(ts, method)(freq, nonexistent='raise')
175+
158176
@pytest.mark.parametrize('timestamp', [
159177
'2018-01-01 0:0:0.124999360',
160178
'2018-01-01 0:0:0.125000367',

pandas/tests/series/test_datetime_values.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def test_dt_round_tz(self):
253253

254254
@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
255255
def test_dt_round_tz_ambiguous(self, method):
256-
# GH 18946 round near DST
256+
# GH 18946 round near "fall back" DST
257257
df1 = pd.DataFrame([
258258
pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True),
259259
pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True),
@@ -282,6 +282,27 @@ def test_dt_round_tz_ambiguous(self, method):
282282
with pytest.raises(pytz.AmbiguousTimeError):
283283
getattr(df1.date.dt, method)('H', ambiguous='raise')
284284

285+
@pytest.mark.parametrize('method, ts_str, freq', [
286+
['ceil', '2018-03-11 01:59:00-0600', '5min'],
287+
['round', '2018-03-11 01:59:00-0600', '5min'],
288+
['floor', '2018-03-11 03:01:00-0500', '2H']])
289+
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
290+
# GH 23324 round near "spring forward" DST
291+
s = Series([pd.Timestamp(ts_str, tz='America/Chicago')])
292+
result = getattr(s.dt, method)(freq, nonexistent='shift')
293+
expected = Series(
294+
[pd.Timestamp('2018-03-11 03:00:00', tz='America/Chicago')]
295+
)
296+
tm.assert_series_equal(result, expected)
297+
298+
result = getattr(s.dt, method)(freq, nonexistent='NaT')
299+
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
300+
tm.assert_series_equal(result, expected)
301+
302+
with pytest.raises(pytz.NonExistentTimeError,
303+
message='2018-03-11 02:00:00'):
304+
getattr(s.dt, method)(freq, nonexistent='raise')
305+
285306
def test_dt_namespace_accessor_categorical(self):
286307
# GH 19468
287308
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)

0 commit comments

Comments
 (0)