Skip to content

Commit d15ceaf

Browse files
mroeschkePingviinituutti
authored andcommitted
API/ENH: tz_localize handling of nonexistent times: rename keyword + add shift option (pandas-dev#22644)
1 parent e0fa388 commit d15ceaf

File tree

10 files changed

+330
-65
lines changed

10 files changed

+330
-65
lines changed

doc/source/timeseries.rst

+32
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,38 @@ constructor as well as ``tz_localize``.
23572357
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
23582358
didx.tz_convert('UCT').tz_localize(None)
23592359
2360+
.. _timeseries.timezone_nonexistent:
2361+
2362+
Nonexistent Times when Localizing
2363+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2364+
2365+
A DST transition may also shift the local time ahead by 1 hour creating nonexistent
2366+
local times. The behavior of localizing a timeseries with nonexistent times
2367+
can be controlled by the ``nonexistent`` argument. The following options are available:
2368+
2369+
* ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior)
2370+
* ``NaT``: Replaces nonexistent times with ``NaT``
2371+
* ``shift``: Shifts nonexistent times forward to the closest real time
2372+
2373+
.. ipython:: python
2374+
dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H')
2375+
# 2:30 is a nonexistent time
2376+
2377+
Localization of nonexistent times will raise an error by default.
2378+
2379+
.. code-block:: ipython
2380+
2381+
In [2]: dti.tz_localize('Europe/Warsaw')
2382+
NonExistentTimeError: 2015-03-29 02:30:00
2383+
2384+
Transform nonexistent times to ``NaT`` or the closest real time forward in time.
2385+
2386+
.. ipython:: python
2387+
dti
2388+
dti.tz_localize('Europe/Warsaw', nonexistent='shift')
2389+
dti.tz_localize('Europe/Warsaw', nonexistent='NaT')
2390+
2391+
23602392
.. _timeseries.timezone_series:
23612393

23622394
TZ Aware Dtypes

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ Other Enhancements
205205
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
206206
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
207207
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
208+
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
208209

209210
.. _whatsnew_0240.api_breaking:
210211

@@ -912,6 +913,7 @@ Deprecations
912913
- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`)
913914
- :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`)
914915
- The ``fastpath`` keyword of the different Index constructors is deprecated (:issue:`23110`).
916+
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`)
915917

916918
.. _whatsnew_0240.prior_deprecations:
917919

pandas/_libs/tslibs/conversion.pyx

+48-34
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# -*- coding: utf-8 -*-
2-
32
import cython
43
from cython import Py_ssize_t
54

@@ -44,6 +43,7 @@ from nattype cimport NPY_NAT, checknull_with_nat
4443
# Constants
4544

4645
cdef int64_t DAY_NS = 86400000000000LL
46+
cdef int64_t HOURS_NS = 3600000000000
4747
NS_DTYPE = np.dtype('M8[ns]')
4848
TD_DTYPE = np.dtype('m8[ns]')
4949

@@ -458,8 +458,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
458458
if tz is not None:
459459
# shift for localize_tso
460460
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
461-
ambiguous='raise',
462-
errors='raise')[0]
461+
ambiguous='raise')[0]
463462

464463
except OutOfBoundsDatetime:
465464
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
@@ -826,7 +825,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2):
826825
@cython.boundscheck(False)
827826
@cython.wraparound(False)
828827
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
829-
object errors='raise'):
828+
object nonexistent=None):
830829
"""
831830
Localize tzinfo-naive i8 to given time zone (using pytz). If
832831
there are ambiguities in the values, raise AmbiguousTimeError.
@@ -837,7 +836,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
837836
tz : tzinfo or None
838837
ambiguous : str, bool, or arraylike
839838
If arraylike, must have the same length as vals
840-
errors : {"raise", "coerce"}, default "raise"
839+
nonexistent : str
840+
If arraylike, must have the same length as vals
841+
842+
.. versionadded:: 0.24.0
841843
842844
Returns
843845
-------
@@ -849,16 +851,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
849851
ndarray ambiguous_array
850852
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
851853
int64_t *tdata
852-
int64_t v, left, right
854+
int64_t v, left, right, val, v_left, v_right
853855
ndarray[int64_t] result, result_a, result_b, dst_hours
854856
npy_datetimestruct dts
855857
bint infer_dst = False, is_dst = False, fill = False
856-
bint is_coerce = errors == 'coerce', is_raise = errors == 'raise'
858+
bint shift = False, fill_nonexist = False
857859

858860
# Vectorized version of DstTzInfo.localize
859-
860-
assert is_coerce or is_raise
861-
862861
if tz == UTC or tz is None:
863862
return vals
864863

@@ -888,39 +887,45 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
888887
"the same size as vals")
889888
ambiguous_array = np.asarray(ambiguous)
890889

890+
if nonexistent == 'NaT':
891+
fill_nonexist = True
892+
elif nonexistent == 'shift':
893+
shift = True
894+
else:
895+
assert nonexistent in ('raise', None), ("nonexistent must be one of"
896+
" {'NaT', 'raise', 'shift'}")
897+
891898
trans, deltas, typ = get_dst_info(tz)
892899

893900
tdata = <int64_t*> cnp.PyArray_DATA(trans)
894901
ntrans = len(trans)
895902

903+
# Determine whether each date lies left of the DST transition (store in
904+
# result_a) or right of the DST transition (store in result_b)
896905
result_a = np.empty(n, dtype=np.int64)
897906
result_b = np.empty(n, dtype=np.int64)
898907
result_a.fill(NPY_NAT)
899908
result_b.fill(NPY_NAT)
900909

901-
# left side
902-
idx_shifted = (np.maximum(0, trans.searchsorted(
910+
idx_shifted_left = (np.maximum(0, trans.searchsorted(
903911
vals - DAY_NS, side='right') - 1)).astype(np.int64)
904912

905-
for i in range(n):
906-
v = vals[i] - deltas[idx_shifted[i]]
907-
pos = bisect_right_i8(tdata, v, ntrans) - 1
908-
909-
# timestamp falls to the left side of the DST transition
910-
if v + deltas[pos] == vals[i]:
911-
result_a[i] = v
912-
913-
# right side
914-
idx_shifted = (np.maximum(0, trans.searchsorted(
913+
idx_shifted_right = (np.maximum(0, trans.searchsorted(
915914
vals + DAY_NS, side='right') - 1)).astype(np.int64)
916915

917916
for i in range(n):
918-
v = vals[i] - deltas[idx_shifted[i]]
919-
pos = bisect_right_i8(tdata, v, ntrans) - 1
917+
val = vals[i]
918+
v_left = val - deltas[idx_shifted_left[i]]
919+
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
920+
# timestamp falls to the left side of the DST transition
921+
if v_left + deltas[pos_left] == val:
922+
result_a[i] = v_left
920923

924+
v_right = val - deltas[idx_shifted_right[i]]
925+
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
921926
# timestamp falls to the right side of the DST transition
922-
if v + deltas[pos] == vals[i]:
923-
result_b[i] = v
927+
if v_right + deltas[pos_right] == val:
928+
result_b[i] = v_right
924929

925930
if infer_dst:
926931
dst_hours = np.empty(n, dtype=np.int64)
@@ -935,7 +940,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
935940
stamp = _render_tstamp(vals[trans_idx])
936941
raise pytz.AmbiguousTimeError(
937942
"Cannot infer dst time from %s as there "
938-
"are no repeated times" % stamp)
943+
"are no repeated times".format(stamp))
939944
# Split the array into contiguous chunks (where the difference between
940945
# indices is 1). These are effectively dst transitions in different
941946
# years which is useful for checking that there is not an ambiguous
@@ -960,18 +965,19 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
960965
if switch_idx.size > 1:
961966
raise pytz.AmbiguousTimeError(
962967
"There are %i dst switches when "
963-
"there should only be 1." % switch_idx.size)
968+
"there should only be 1.".format(switch_idx.size))
964969
switch_idx = switch_idx[0] + 1
965970
# Pull the only index and adjust
966971
a_idx = grp[:switch_idx]
967972
b_idx = grp[switch_idx:]
968973
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
969974

970975
for i in range(n):
976+
val = vals[i]
971977
left = result_a[i]
972978
right = result_b[i]
973-
if vals[i] == NPY_NAT:
974-
result[i] = vals[i]
979+
if val == NPY_NAT:
980+
result[i] = val
975981
elif left != NPY_NAT and right != NPY_NAT:
976982
if left == right:
977983
result[i] = left
@@ -986,19 +992,27 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
986992
elif fill:
987993
result[i] = NPY_NAT
988994
else:
989-
stamp = _render_tstamp(vals[i])
995+
stamp = _render_tstamp(val)
990996
raise pytz.AmbiguousTimeError(
991997
"Cannot infer dst time from %r, try using the "
992-
"'ambiguous' argument" % stamp)
998+
"'ambiguous' argument".format(stamp))
993999
elif left != NPY_NAT:
9941000
result[i] = left
9951001
elif right != NPY_NAT:
9961002
result[i] = right
9971003
else:
998-
if is_coerce:
1004+
# Handle nonexistent times
1005+
if shift:
1006+
# Shift the nonexistent time forward to the closest existing
1007+
# time
1008+
remaining_minutes = val % HOURS_NS
1009+
new_local = val + (HOURS_NS - remaining_minutes)
1010+
delta_idx = trans.searchsorted(new_local, side='right') - 1
1011+
result[i] = new_local - deltas[delta_idx]
1012+
elif fill_nonexist:
9991013
result[i] = NPY_NAT
10001014
else:
1001-
stamp = _render_tstamp(vals[i])
1015+
stamp = _render_tstamp(val)
10021016
raise pytz.NonExistentTimeError(stamp)
10031017

10041018
return result

pandas/_libs/tslibs/nattype.pyx

+16-4
Original file line numberDiff line numberDiff line change
@@ -564,14 +564,26 @@ class NaTType(_NaT):
564564
- 'NaT' will return NaT for an ambiguous time
565565
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
566566
567-
errors : 'raise', 'coerce', default 'raise'
567+
nonexistent : 'shift', 'NaT', default 'raise'
568+
A nonexistent time does not exist in a particular timezone
569+
where clocks moved forward due to DST.
570+
571+
- 'shift' will shift the nonexistent time forward to the closest
572+
existing time
573+
- 'NaT' will return NaT where there are nonexistent times
574+
- 'raise' will raise an NonExistentTimeError if there are
575+
nonexistent times
576+
577+
.. versionadded:: 0.24.0
578+
579+
errors : 'raise', 'coerce', default None
568580
- 'raise' will raise a NonExistentTimeError if a timestamp is not
569581
valid in the specified timezone (e.g. due to a transition from
570-
or to DST time)
582+
or to DST time). Use ``nonexistent='raise'`` instead.
571583
- 'coerce' will return NaT if the timestamp can not be converted
572-
into the specified timezone
584+
into the specified timezone. Use ``nonexistent='NaT'`` instead.
573585
574-
.. versionadded:: 0.19.0
586+
.. deprecated:: 0.24.0
575587
576588
Returns
577589
-------

pandas/_libs/tslibs/timestamps.pyx

+37-6
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,8 @@ class Timestamp(_Timestamp):
961961
def is_leap_year(self):
962962
return bool(ccalendar.is_leapyear(self.year))
963963

964-
def tz_localize(self, tz, ambiguous='raise', errors='raise'):
964+
def tz_localize(self, tz, ambiguous='raise', nonexistent='raise',
965+
errors=None):
965966
"""
966967
Convert naive Timestamp to local time zone, or remove
967968
timezone from tz-aware Timestamp.
@@ -978,14 +979,26 @@ class Timestamp(_Timestamp):
978979
- 'NaT' will return NaT for an ambiguous time
979980
- 'raise' will raise an AmbiguousTimeError for an ambiguous time
980981
981-
errors : 'raise', 'coerce', default 'raise'
982+
nonexistent : 'shift', 'NaT', default 'raise'
983+
A nonexistent time does not exist in a particular timezone
984+
where clocks moved forward due to DST.
985+
986+
- 'shift' will shift the nonexistent time forward to the closest
987+
existing time
988+
- 'NaT' will return NaT where there are nonexistent times
989+
- 'raise' will raise an NonExistentTimeError if there are
990+
nonexistent times
991+
992+
.. versionadded:: 0.24.0
993+
994+
errors : 'raise', 'coerce', default None
982995
- 'raise' will raise a NonExistentTimeError if a timestamp is not
983996
valid in the specified timezone (e.g. due to a transition from
984-
or to DST time)
997+
or to DST time). Use ``nonexistent='raise'`` instead.
985998
- 'coerce' will return NaT if the timestamp can not be converted
986-
into the specified timezone
999+
into the specified timezone. Use ``nonexistent='NaT'`` instead.
9871000
988-
.. versionadded:: 0.19.0
1001+
.. deprecated:: 0.24.0
9891002
9901003
Returns
9911004
-------
@@ -999,13 +1012,31 @@ class Timestamp(_Timestamp):
9991012
if ambiguous == 'infer':
10001013
raise ValueError('Cannot infer offset with only one time.')
10011014

1015+
if errors is not None:
1016+
warnings.warn("The errors argument is deprecated and will be "
1017+
"removed in a future release. Use "
1018+
"nonexistent='NaT' or nonexistent='raise' "
1019+
"instead.", FutureWarning)
1020+
if errors == 'coerce':
1021+
nonexistent = 'NaT'
1022+
elif errors == 'raise':
1023+
nonexistent = 'raise'
1024+
else:
1025+
raise ValueError("The errors argument must be either 'coerce' "
1026+
"or 'raise'.")
1027+
1028+
if nonexistent not in ('raise', 'NaT', 'shift'):
1029+
raise ValueError("The nonexistent argument must be one of 'raise',"
1030+
" 'NaT' or 'shift'")
1031+
10021032
if self.tzinfo is None:
10031033
# tz naive, localize
10041034
tz = maybe_get_tz(tz)
10051035
if not is_string_object(ambiguous):
10061036
ambiguous = [ambiguous]
10071037
value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz,
1008-
ambiguous=ambiguous, errors=errors)[0]
1038+
ambiguous=ambiguous,
1039+
nonexistent=nonexistent)[0]
10091040
return Timestamp(value, tz=tz)
10101041
else:
10111042
if tz is None:

0 commit comments

Comments
 (0)