Skip to content

Commit 268150f

Browse files
mroeschkejreback
authored andcommitted
ENH: Add additional options to nonexistent in tz_localize (#24493)
1 parent afc2c86 commit 268150f

File tree

15 files changed

+280
-77
lines changed

15 files changed

+280
-77
lines changed

doc/source/timeseries.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -2351,9 +2351,11 @@ A DST transition may also shift the local time ahead by 1 hour creating nonexist
23512351
local times. The behavior of localizing a timeseries with nonexistent times
23522352
can be controlled by the ``nonexistent`` argument. The following options are available:
23532353

2354-
* ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior)
2355-
* ``NaT``: Replaces nonexistent times with ``NaT``
2356-
* ``shift``: Shifts nonexistent times forward to the closest real time
2354+
* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior)
2355+
* ``'NaT'``: Replaces nonexistent times with ``NaT``
2356+
* ``'shift_forward'``: Shifts nonexistent times forward to the closest real time
2357+
* ``'shift_backward'``: Shifts nonexistent times backward to the closest real time
2358+
* timedelta object: Shifts nonexistent times by the timedelta duration
23572359

23582360
.. ipython:: python
23592361
@@ -2367,12 +2369,14 @@ Localization of nonexistent times will raise an error by default.
23672369
In [2]: dti.tz_localize('Europe/Warsaw')
23682370
NonExistentTimeError: 2015-03-29 02:30:00
23692371
2370-
Transform nonexistent times to ``NaT`` or the closest real time forward in time.
2372+
Transform nonexistent times to ``NaT`` or shift the times.
23712373

23722374
.. ipython:: python
23732375
23742376
dti
2375-
dti.tz_localize('Europe/Warsaw', nonexistent='shift')
2377+
dti.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
2378+
dti.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
2379+
dti.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta(1, unit='H'))
23762380
dti.tz_localize('Europe/Warsaw', nonexistent='NaT')
23772381
23782382

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ Other Enhancements
407407
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
408408
- :func:`read_fwf` now accepts keyword ``infer_nrows`` (:issue:`15138`).
409409
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
410-
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`)
410+
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`)
411411
- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`)
412412
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
413413
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.

pandas/_libs/tslibs/conversion.pyx

+47-17
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ from dateutil.tz import tzutc
1313
from datetime import time as datetime_time
1414
from cpython.datetime cimport (datetime, tzinfo,
1515
PyDateTime_Check, PyDate_Check,
16-
PyDateTime_CheckExact, PyDateTime_IMPORT)
16+
PyDateTime_CheckExact, PyDateTime_IMPORT,
17+
PyDelta_Check)
1718
PyDateTime_IMPORT
1819

1920
from pandas._libs.tslibs.ccalendar import DAY_SECONDS, HOUR_SECONDS
@@ -28,7 +29,8 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
2829
from pandas._libs.tslibs.util cimport (
2930
is_string_object, is_datetime64_object, is_integer_object, is_float_object)
3031

31-
from pandas._libs.tslibs.timedeltas cimport cast_from_unit
32+
from pandas._libs.tslibs.timedeltas cimport (cast_from_unit,
33+
delta_to_nanoseconds)
3234
from pandas._libs.tslibs.timezones cimport (
3335
is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info,
3436
get_timezone, maybe_get_tz, tz_compare)
@@ -868,7 +870,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
868870
- bool if True, treat all vals as DST. If False, treat them as non-DST
869871
- 'NaT' will return NaT where there are ambiguous times
870872
871-
nonexistent : {None, "NaT", "shift", "raise"}
873+
nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise",
874+
timedelta-like}
872875
How to handle non-existent times when converting wall times to UTC
873876
874877
.. versionadded:: 0.24.0
@@ -884,12 +887,14 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
884887
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
885888
int64_t *tdata
886889
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
887-
int64_t HOURS_NS = HOUR_SECONDS * 1000000000
890+
int64_t first_delta
891+
int64_t HOURS_NS = HOUR_SECONDS * 1000000000, shift_delta = 0
888892
ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
889893
ndarray trans_idx, grp, a_idx, b_idx, one_diff
890894
npy_datetimestruct dts
891895
bint infer_dst = False, is_dst = False, fill = False
892-
bint shift = False, fill_nonexist = False
896+
bint shift_forward = False, shift_backward = False
897+
bint fill_nonexist = False
893898
list trans_grp
894899
str stamp
895900

@@ -928,11 +933,16 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
928933

929934
if nonexistent == 'NaT':
930935
fill_nonexist = True
931-
elif nonexistent == 'shift':
932-
shift = True
933-
else:
934-
assert nonexistent in ('raise', None), ("nonexistent must be one of"
935-
" {'NaT', 'raise', 'shift'}")
936+
elif nonexistent == 'shift_forward':
937+
shift_forward = True
938+
elif nonexistent == 'shift_backward':
939+
shift_backward = True
940+
elif PyDelta_Check(nonexistent):
941+
shift_delta = delta_to_nanoseconds(nonexistent)
942+
elif nonexistent not in ('raise', None):
943+
msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
944+
"shift_backwards} or a timedelta object")
945+
raise ValueError(msg)
936946

937947
trans, deltas, _ = get_dst_info(tz)
938948

@@ -1041,15 +1051,35 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
10411051
result[i] = right
10421052
else:
10431053
# Handle nonexistent times
1044-
if shift:
1045-
# Shift the nonexistent time forward to the closest existing
1046-
# time
1054+
if shift_forward or shift_backward or shift_delta != 0:
1055+
# Shift the nonexistent time to the closest existing time
10471056
remaining_mins = val % HOURS_NS
1048-
new_local = val + (HOURS_NS - remaining_mins)
1057+
if shift_delta != 0:
1058+
# Validate that we don't relocalize on another nonexistent
1059+
# time
1060+
if -1 < shift_delta + remaining_mins < HOURS_NS:
1061+
raise ValueError(
1062+
"The provided timedelta will relocalize on a "
1063+
"nonexistent time: {}".format(nonexistent)
1064+
)
1065+
new_local = val + shift_delta
1066+
elif shift_forward:
1067+
new_local = val + (HOURS_NS - remaining_mins)
1068+
else:
1069+
# Subtract 1 since the beginning hour is _inclusive_ of
1070+
# nonexistent times
1071+
new_local = val - remaining_mins - 1
10491072
delta_idx = trans.searchsorted(new_local, side='right')
1050-
# Need to subtract 1 from the delta_idx if the UTC offset of
1051-
# the target tz is greater than 0
1052-
delta_idx_offset = int(deltas[0] > 0)
1073+
# Shift the delta_idx by if the UTC offset of
1074+
# the target tz is greater than 0 and we're moving forward
1075+
# or vice versa
1076+
first_delta = deltas[0]
1077+
if (shift_forward or shift_delta > 0) and first_delta > 0:
1078+
delta_idx_offset = 1
1079+
elif (shift_backward or shift_delta < 0) and first_delta < 0:
1080+
delta_idx_offset = 1
1081+
else:
1082+
delta_idx_offset = 0
10531083
delta_idx = delta_idx - delta_idx_offset
10541084
result[i] = new_local - deltas[delta_idx]
10551085
elif fill_nonexist:

pandas/_libs/tslibs/nattype.pyx

+28-12
Original file line numberDiff line numberDiff line change
@@ -481,13 +481,17 @@ class NaTType(_NaT):
481481
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
482482
483483
.. versionadded:: 0.24.0
484-
nonexistent : 'shift', 'NaT', default 'raise'
484+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
485+
default 'raise'
485486
A nonexistent time does not exist in a particular timezone
486487
where clocks moved forward due to DST.
487488
488-
- 'shift' will shift the nonexistent time forward to the closest
489-
existing time
489+
- 'shift_forward' will shift the nonexistent time forward to the
490+
closest existing time
491+
- 'shift_backward' will shift the nonexistent time backward to the
492+
closest existing time
490493
- 'NaT' will return NaT where there are nonexistent times
494+
- timedelta objects will shift nonexistent times by the timedelta
491495
- 'raise' will raise an NonExistentTimeError if there are
492496
nonexistent times
493497
@@ -515,13 +519,17 @@ class NaTType(_NaT):
515519
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
516520
517521
.. versionadded:: 0.24.0
518-
nonexistent : 'shift', 'NaT', default 'raise'
522+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
523+
default 'raise'
519524
A nonexistent time does not exist in a particular timezone
520525
where clocks moved forward due to DST.
521526
522-
- 'shift' will shift the nonexistent time forward to the closest
523-
existing time
527+
- 'shift_forward' will shift the nonexistent time forward to the
528+
closest existing time
529+
- 'shift_backward' will shift the nonexistent time backward to the
530+
closest existing time
524531
- 'NaT' will return NaT where there are nonexistent times
532+
- timedelta objects will shift nonexistent times by the timedelta
525533
- 'raise' will raise an NonExistentTimeError if there are
526534
nonexistent times
527535
@@ -545,13 +553,17 @@ class NaTType(_NaT):
545553
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
546554
547555
.. versionadded:: 0.24.0
548-
nonexistent : 'shift', 'NaT', default 'raise'
556+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
557+
default 'raise'
549558
A nonexistent time does not exist in a particular timezone
550559
where clocks moved forward due to DST.
551560
552-
- 'shift' will shift the nonexistent time forward to the closest
553-
existing time
561+
- 'shift_forward' will shift the nonexistent time forward to the
562+
closest existing time
563+
- 'shift_backward' will shift the nonexistent time backward to the
564+
closest existing time
554565
- 'NaT' will return NaT where there are nonexistent times
566+
- timedelta objects will shift nonexistent times by the timedelta
555567
- 'raise' will raise an NonExistentTimeError if there are
556568
nonexistent times
557569
@@ -605,13 +617,17 @@ class NaTType(_NaT):
605617
- 'NaT' will return NaT for an ambiguous time
606618
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
607619
608-
nonexistent : 'shift', 'NaT', default 'raise'
620+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
621+
default 'raise'
609622
A nonexistent time does not exist in a particular timezone
610623
where clocks moved forward due to DST.
611624
612-
- 'shift' will shift the nonexistent time forward to the closest
613-
existing time
625+
- 'shift_forward' will shift the nonexistent time forward to the
626+
closest existing time
627+
- 'shift_backward' will shift the nonexistent time backward to the
628+
closest existing time
614629
- 'NaT' will return NaT where there are nonexistent times
630+
- timedelta objects will shift nonexistent times by the timedelta
615631
- 'raise' will raise an NonExistentTimeError if there are
616632
nonexistent times
617633

pandas/_libs/tslibs/timestamps.pyx

+35-15
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ cimport numpy as cnp
99
from numpy cimport int64_t, int32_t, int8_t
1010
cnp.import_array()
1111

12-
from datetime import time as datetime_time
12+
from datetime import time as datetime_time, timedelta
1313
from cpython.datetime cimport (datetime,
1414
PyDateTime_Check, PyDelta_Check, PyTZInfo_Check,
1515
PyDateTime_IMPORT)
@@ -789,13 +789,17 @@ class Timestamp(_Timestamp):
789789
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
790790
791791
.. versionadded:: 0.24.0
792-
nonexistent : 'shift', 'NaT', default 'raise'
792+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
793+
default 'raise'
793794
A nonexistent time does not exist in a particular timezone
794795
where clocks moved forward due to DST.
795796
796-
- 'shift' will shift the nonexistent time forward to the closest
797-
existing time
797+
- 'shift_forward' will shift the nonexistent time forward to the
798+
closest existing time
799+
- 'shift_backward' will shift the nonexistent time backward to the
800+
closest existing time
798801
- 'NaT' will return NaT where there are nonexistent times
802+
- timedelta objects will shift nonexistent times by the timedelta
799803
- 'raise' will raise an NonExistentTimeError if there are
800804
nonexistent times
801805
@@ -827,13 +831,17 @@ class Timestamp(_Timestamp):
827831
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
828832
829833
.. versionadded:: 0.24.0
830-
nonexistent : 'shift', 'NaT', default 'raise'
834+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
835+
default 'raise'
831836
A nonexistent time does not exist in a particular timezone
832837
where clocks moved forward due to DST.
833838
834-
- 'shift' will shift the nonexistent time forward to the closest
835-
existing time
839+
- 'shift_forward' will shift the nonexistent time forward to the
840+
closest existing time
841+
- 'shift_backward' will shift the nonexistent time backward to the
842+
closest existing time
836843
- 'NaT' will return NaT where there are nonexistent times
844+
- timedelta objects will shift nonexistent times by the timedelta
837845
- 'raise' will raise an NonExistentTimeError if there are
838846
nonexistent times
839847
@@ -859,13 +867,17 @@ class Timestamp(_Timestamp):
859867
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
860868
861869
.. versionadded:: 0.24.0
862-
nonexistent : 'shift', 'NaT', default 'raise'
870+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
871+
default 'raise'
863872
A nonexistent time does not exist in a particular timezone
864873
where clocks moved forward due to DST.
865874
866-
- 'shift' will shift the nonexistent time forward to the closest
867-
existing time
875+
- 'shift_forward' will shift the nonexistent time forward to the
876+
closest existing time
877+
- 'shift_backward' will shift the nonexistent time backward to the
878+
closest existing time
868879
- 'NaT' will return NaT where there are nonexistent times
880+
- timedelta objects will shift nonexistent times by the timedelta
869881
- 'raise' will raise an NonExistentTimeError if there are
870882
nonexistent times
871883
@@ -1060,13 +1072,17 @@ class Timestamp(_Timestamp):
10601072
- 'NaT' will return NaT for an ambiguous time
10611073
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
10621074
1063-
nonexistent : 'shift', 'NaT', default 'raise'
1075+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
1076+
default 'raise'
10641077
A nonexistent time does not exist in a particular timezone
10651078
where clocks moved forward due to DST.
10661079
1067-
- 'shift' will shift the nonexistent time forward to the closest
1068-
existing time
1080+
- 'shift_forward' will shift the nonexistent time forward to the
1081+
closest existing time
1082+
- 'shift_backward' will shift the nonexistent time backward to the
1083+
closest existing time
10691084
- 'NaT' will return NaT where there are nonexistent times
1085+
- timedelta objects will shift nonexistent times by the timedelta
10701086
- 'raise' will raise an NonExistentTimeError if there are
10711087
nonexistent times
10721088
@@ -1106,9 +1122,13 @@ class Timestamp(_Timestamp):
11061122
raise ValueError("The errors argument must be either 'coerce' "
11071123
"or 'raise'.")
11081124

1109-
if nonexistent not in ('raise', 'NaT', 'shift'):
1125+
nonexistent_options = ('raise', 'NaT', 'shift_forward',
1126+
'shift_backward')
1127+
if nonexistent not in nonexistent_options and not isinstance(
1128+
nonexistent, timedelta):
11101129
raise ValueError("The nonexistent argument must be one of 'raise',"
1111-
" 'NaT' or 'shift'")
1130+
" 'NaT', 'shift_forward', 'shift_backward' or"
1131+
" a timedelta object")
11121132

11131133
if self.tzinfo is None:
11141134
# tz naive, localize

pandas/core/arrays/datetimelike.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -206,13 +206,17 @@ class TimelikeOps(object):
206206
207207
.. versionadded:: 0.24.0
208208
209-
nonexistent : 'shift', 'NaT', default 'raise'
209+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta,
210+
default 'raise'
210211
A nonexistent time does not exist in a particular timezone
211212
where clocks moved forward due to DST.
212213
213-
- 'shift' will shift the nonexistent time forward to the closest
214-
existing time
214+
- 'shift_forward' will shift the nonexistent time forward to the
215+
closest existing time
216+
- 'shift_backward' will shift the nonexistent time backward to the
217+
closest existing time
215218
- 'NaT' will return NaT where there are nonexistent times
219+
- timedelta objects will shift nonexistent times by the timedelta
216220
- 'raise' will raise an NonExistentTimeError if there are
217221
nonexistent times
218222

0 commit comments

Comments
 (0)