-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API/ENH: tz_localize handling of nonexistent times: rename keyword + add shift option #22644
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 43 commits
bf5e7bf
36d13c7
a5ea445
8753d00
e1a6c6a
a7c86c8
1884c7b
a6a05df
c4dc8aa
1bc81db
c81d58c
b2c8429
a65987d
710014c
93159e5
a0ffcdd
219256f
d435481
7c849b6
56ac4fe
b7b09bd
94a72a5
39b769e
18664d8
8852d43
38b95e9
c88b0d8
1bae682
d30f891
f337692
6a12a7e
a7b8357
7ad87ec
abad726
6be1c25
f8be4b6
c192c9f
8909f38
49f203f
01678c7
707fdde
ae27a50
85ed25e
9041ebe
a4cdac2
0a9c1db
efb382e
61c73ca
20cc925
394a0db
a5253ee
5185683
ba1bfed
8b06c96
42ae923
fe575fe
3482f92
f0e43e2
b98d4cf
e6c5b2d
83423ad
1ca0ab2
5bcc977
8cf16e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -194,6 +194,7 @@ Other Enhancements | |
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). | ||
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). | ||
- Compatibility with Matplotlib 3.0 (:issue:`22790`). | ||
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) | ||
|
||
.. _whatsnew_0240.api_breaking: | ||
|
||
|
@@ -574,6 +575,7 @@ Deprecations | |
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) | ||
- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) | ||
- :func:`DatetimeIndex.shift` now accepts ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`) | ||
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors` argument in favor of the ``ambiguous`` and ``nonexistent`` arguments (:issue:`8917`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is the ambigous keyword related? |
||
|
||
.. _whatsnew_0240.prior_deprecations: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import cython | ||
from cython import Py_ssize_t | ||
|
||
|
@@ -44,6 +43,7 @@ from nattype cimport NPY_NAT, checknull_with_nat | |
# Constants | ||
|
||
cdef int64_t DAY_NS = 86400000000000LL | ||
cdef int64_t HOURS_NS = 3600000000000 | ||
NS_DTYPE = np.dtype('M8[ns]') | ||
TD_DTYPE = np.dtype('m8[ns]') | ||
|
||
|
@@ -458,8 +458,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, | |
if tz is not None: | ||
# shift for localize_tso | ||
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, | ||
ambiguous='raise', | ||
errors='raise')[0] | ||
ambiguous='raise')[0] | ||
|
||
except OutOfBoundsDatetime: | ||
# GH#19382 for just-barely-OutOfBounds falling back to dateutil | ||
|
@@ -826,7 +825,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): | |
@cython.boundscheck(False) | ||
@cython.wraparound(False) | ||
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | ||
object errors='raise'): | ||
object nonexistent=None): | ||
""" | ||
Localize tzinfo-naive i8 to given time zone (using pytz). If | ||
there are ambiguities in the values, raise AmbiguousTimeError. | ||
|
@@ -837,7 +836,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
tz : tzinfo or None | ||
ambiguous : str, bool, or arraylike | ||
If arraylike, must have the same length as vals | ||
errors : {"raise", "coerce"}, default "raise" | ||
nonexistent : str, bool, or arraylike | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. versionadded |
||
If arraylike, must have the same length as vals | ||
|
||
.. versionadded:: 0.24.0 | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
|
@@ -849,16 +851,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
ndarray ambiguous_array | ||
Py_ssize_t i, idx, pos, ntrans, n = len(vals) | ||
int64_t *tdata | ||
int64_t v, left, right | ||
int64_t v, left, right, val, v_left, v_right | ||
ndarray[int64_t] result, result_a, result_b, dst_hours | ||
npy_datetimestruct dts | ||
bint infer_dst = False, is_dst = False, fill = False | ||
bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' | ||
bint shift = False, fill_nonexist = False | ||
|
||
# Vectorized version of DstTzInfo.localize | ||
|
||
assert is_coerce or is_raise | ||
|
||
if tz == UTC or tz is None: | ||
return vals | ||
|
||
|
@@ -888,39 +887,43 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
"the same size as vals") | ||
ambiguous_array = np.asarray(ambiguous) | ||
|
||
if is_string_object(nonexistent): | ||
if nonexistent == 'NaT': | ||
fill_nonexist = True | ||
elif nonexistent == 'shift': | ||
shift = True | ||
|
||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
trans, deltas, typ = get_dst_info(tz) | ||
|
||
tdata = <int64_t*> cnp.PyArray_DATA(trans) | ||
ntrans = len(trans) | ||
|
||
# Determine whether each date lies left of the DST transition (store in | ||
# result_a) or right of the DST transition (store in result_b) | ||
result_a = np.empty(n, dtype=np.int64) | ||
result_b = np.empty(n, dtype=np.int64) | ||
result_a.fill(NPY_NAT) | ||
result_b.fill(NPY_NAT) | ||
|
||
# left side | ||
idx_shifted = (np.maximum(0, trans.searchsorted( | ||
idx_shifted_left = (np.maximum(0, trans.searchsorted( | ||
vals - DAY_NS, side='right') - 1)).astype(np.int64) | ||
|
||
for i in range(n): | ||
v = vals[i] - deltas[idx_shifted[i]] | ||
pos = bisect_right_i8(tdata, v, ntrans) - 1 | ||
|
||
# timestamp falls to the left side of the DST transition | ||
if v + deltas[pos] == vals[i]: | ||
result_a[i] = v | ||
|
||
# right side | ||
idx_shifted = (np.maximum(0, trans.searchsorted( | ||
idx_shifted_right = (np.maximum(0, trans.searchsorted( | ||
vals + DAY_NS, side='right') - 1)).astype(np.int64) | ||
|
||
for i in range(n): | ||
v = vals[i] - deltas[idx_shifted[i]] | ||
pos = bisect_right_i8(tdata, v, ntrans) - 1 | ||
val = vals[i] | ||
v_left = val - deltas[idx_shifted_left[i]] | ||
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 | ||
# timestamp falls to the left side of the DST transition | ||
if v_left + deltas[pos_left] == val: | ||
result_a[i] = v_left | ||
|
||
v_right = val - deltas[idx_shifted_right[i]] | ||
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 | ||
# timestamp falls to the right side of the DST transition | ||
if v + deltas[pos] == vals[i]: | ||
result_b[i] = v | ||
if v_right + deltas[pos_right] == val: | ||
result_b[i] = v_right | ||
|
||
if infer_dst: | ||
dst_hours = np.empty(n, dtype=np.int64) | ||
|
@@ -935,7 +938,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
stamp = _render_tstamp(vals[trans_idx]) | ||
raise pytz.AmbiguousTimeError( | ||
"Cannot infer dst time from %s as there " | ||
"are no repeated times" % stamp) | ||
"are no repeated times".format(stamp)) | ||
# Split the array into contiguous chunks (where the difference between | ||
# indices is 1). These are effectively dst transitions in different | ||
# years which is useful for checking that there is not an ambiguous | ||
|
@@ -960,18 +963,19 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
if switch_idx.size > 1: | ||
raise pytz.AmbiguousTimeError( | ||
"There are %i dst switches when " | ||
"there should only be 1." % switch_idx.size) | ||
"there should only be 1.".format(switch_idx.size)) | ||
switch_idx = switch_idx[0] + 1 | ||
# Pull the only index and adjust | ||
a_idx = grp[:switch_idx] | ||
b_idx = grp[switch_idx:] | ||
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) | ||
|
||
for i in range(n): | ||
val = vals[i] | ||
left = result_a[i] | ||
right = result_b[i] | ||
if vals[i] == NPY_NAT: | ||
result[i] = vals[i] | ||
if val == NPY_NAT: | ||
result[i] = val | ||
elif left != NPY_NAT and right != NPY_NAT: | ||
if left == right: | ||
result[i] = left | ||
|
@@ -986,19 +990,27 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
elif fill: | ||
result[i] = NPY_NAT | ||
else: | ||
stamp = _render_tstamp(vals[i]) | ||
stamp = _render_tstamp(val) | ||
raise pytz.AmbiguousTimeError( | ||
"Cannot infer dst time from %r, try using the " | ||
"'ambiguous' argument" % stamp) | ||
"'ambiguous' argument".format(stamp)) | ||
elif left != NPY_NAT: | ||
result[i] = left | ||
elif right != NPY_NAT: | ||
result[i] = right | ||
else: | ||
if is_coerce: | ||
# Handle nonexistent times | ||
if shift: | ||
# Shift the nonexistent time forward to the closest existing | ||
# time | ||
remaining_minutes = val % HOURS_NS | ||
new_local = val + (HOURS_NS - remaining_minutes) | ||
delta_idx = trans.searchsorted(new_local, side='right') - 1 | ||
result[i] = new_local - deltas[delta_idx] | ||
elif fill_nonexist: | ||
result[i] = NPY_NAT | ||
else: | ||
stamp = _render_tstamp(vals[i]) | ||
stamp = _render_tstamp(val) | ||
raise pytz.NonExistentTimeError(stamp) | ||
|
||
return result | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -564,14 +564,23 @@ class NaTType(_NaT): | |
- 'NaT' will return NaT for an ambiguous time | ||
- 'raise' will raise an AmbiguousTimeError for an ambiguous time | ||
|
||
errors : 'raise', 'coerce', default 'raise' | ||
nonexistent : 'shift', 'NaT', default 'raise' | ||
- 'shift' will shift the nonexistent time forward to the closest | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, rst formatting nitpick: there needs to be a blank line between the first sentences, and the start of this list ... (getting rst right can be annoying ..) |
||
existing time | ||
- 'NaT' will return NaT where there are nonexistent times | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
- 'raise' will raise an NonExistentTimeError if there are | ||
nonexistent times | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
errors : 'raise', 'coerce', default None | ||
- 'raise' will raise a NonExistentTimeError if a timestamp is not | ||
valid in the specified timezone (e.g. due to a transition from | ||
or to DST time) | ||
- 'coerce' will return NaT if the timestamp can not be converted | ||
into the specified timezone | ||
|
||
.. versionadded:: 0.19.0 | ||
.. deprecated:: 0.24.0 | ||
|
||
Returns | ||
------- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -961,7 +961,8 @@ class Timestamp(_Timestamp): | |
def is_leap_year(self): | ||
return bool(ccalendar.is_leapyear(self.year)) | ||
|
||
def tz_localize(self, tz, ambiguous='raise', errors='raise'): | ||
def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', | ||
errors=None): | ||
""" | ||
Convert naive Timestamp to local time zone, or remove | ||
timezone from tz-aware Timestamp. | ||
|
@@ -978,14 +979,23 @@ class Timestamp(_Timestamp): | |
- 'NaT' will return NaT for an ambiguous time | ||
- 'raise' will raise an AmbiguousTimeError for an ambiguous time | ||
|
||
errors : 'raise', 'coerce', default 'raise' | ||
nonexistent : 'shift', 'NaT', default 'raise' | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
- 'shift' will shift the nonexistent time forward to the closest | ||
existing time | ||
- 'NaT' will return NaT where there are nonexistent times | ||
- 'raise' will raise an NonExistentTimeError if there are | ||
nonexistent times | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
errors : 'raise', 'coerce', default None | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
- 'raise' will raise a NonExistentTimeError if a timestamp is not | ||
valid in the specified timezone (e.g. due to a transition from | ||
or to DST time) | ||
- 'coerce' will return NaT if the timestamp can not be converted | ||
into the specified timezone | ||
|
||
.. versionadded:: 0.19.0 | ||
.. deprecated:: 0.24.0 | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
|
@@ -999,13 +1009,26 @@ class Timestamp(_Timestamp): | |
if ambiguous == 'infer': | ||
raise ValueError('Cannot infer offset with only one time.') | ||
|
||
if errors is not None: | ||
warnings.warn("The errors argument is deprecated and will be " | ||
"removed in a future release. Use the ambiguous or " | ||
"nonexistent argument instead.", FutureWarning, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can be more explicit here about how to change your code (basically the replacement that you do the lines below in code) |
||
stacklevel=2) | ||
if errors == 'coerce': | ||
nonexistent = 'NaT' | ||
elif errors == 'raise': | ||
nonexistent = 'raise' | ||
else: | ||
raise ValueError("The errors argument must be either coerce " | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"or raise.") | ||
if self.tzinfo is None: | ||
# tz naive, localize | ||
tz = maybe_get_tz(tz) | ||
if not is_string_object(ambiguous): | ||
ambiguous = [ambiguous] | ||
value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, | ||
ambiguous=ambiguous, errors=errors)[0] | ||
ambiguous=ambiguous, | ||
nonexistent=nonexistent)[0] | ||
return Timestamp(value, tz=tz) | ||
else: | ||
if tz is None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you put this comment one line below?