From bf5e7bfa264cb1b0e4a9ddb8f1b839b699bb098c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 3 Sep 2018 23:12:54 -0700 Subject: [PATCH 01/37] ENH: Add handling of nonexistent times --- pandas/_libs/tslibs/conversion.pyx | 18 +++++++++++++++--- pandas/_libs/tslibs/timestamps.pyx | 14 ++++++++++++-- pandas/core/arrays/datetimes.py | 17 +++++++++++++---- pandas/core/generic.py | 20 +++++++++++++++----- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..aa516c68fdd2c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -31,6 +31,7 @@ from util cimport (is_string_object, is_integer_object, is_float_object, is_array) from timedeltas cimport cast_from_unit +from timestamps import Timestamp from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, @@ -826,7 +827,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - object errors='raise'): + object nonexistent=None, object errors='raise'): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -837,6 +838,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tz : tzinfo or None ambiguous : str, bool, or arraylike If arraylike, must have the same length as vals + nonexistent : str errors : {"raise", "coerce"}, default "raise" Returns @@ -853,7 +855,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False - bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' + bint infer_nonexisit = nonexistent == 'infer' + bint is_coerce = errors == 'coerce' or nonexistent == 'NaT' + bint is_raise = errors == 'raise' or nonexistent == 'raise' # Vectorized version of DstTzInfo.localize @@ -995,7 +999,15 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, elif right != NPY_NAT: result[i] = right else: - if is_coerce: + if infer_nonexisit: + # Infer the timestamp; based on pytz's DstTzInfo.normalize + val = vals[i] + utc_offset = get_utcoffset(tz, Timestamp(val)) + utc_offset = int(utc_offset.total_seconds()) * 1000000000 + utc_val = vals[i] - utc_offset + local_val = tz_convert_single(utc_val, 'UTC', tz) + result[i] = local_val + elif is_coerce: result[i] = NPY_NAT else: stamp = _render_tstamp(vals[i]) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ab1396c0fe38..d6cead9c577e2 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -861,7 +861,8 @@ class Timestamp(_Timestamp): def is_leap_year(self): return bool(ccalendar.is_leapyear(self.year)) - def tz_localize(self, tz, ambiguous='raise', errors='raise'): + def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', + errors='raise'): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -878,6 +879,13 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + nonexisitent : str {'NaT', 'raise'} + + - 'infer' will shift the non-existent time to a real local time + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an NonExistentTimeError if there are ambiguous + times + errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from @@ -905,7 +913,9 @@ class Timestamp(_Timestamp): if not is_string_object(ambiguous): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, - ambiguous=ambiguous, errors=errors)[0] + ambiguous=ambiguous, + nonexisitent=nonexisitent, + errors=errors)[0] return Timestamp(value, tz=tz) else: if tz is None: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 484eb430c82b1..83673827d0ecf 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -639,7 +639,8 @@ def tz_convert(self, tz): # No conversion since timestamps are all UTC to begin with return self._shallow_copy(tz=tz) - def tz_localize(self, tz, ambiguous='raise', errors='raise'): + def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', + errors='raise'): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -667,6 +668,13 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + nonexisitent : str {'NaT', 'raise'} + + - 'infer' will shift the non-existent time to a real local time + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an NonExistentTimeError if there are ambiguous + times + errors : {'raise', 'coerce'}, default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not @@ -726,9 +734,10 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): tz = timezones.maybe_get_tz(tz) # Convert to UTC - new_dates = conversion.tz_localize_to_utc(self.asi8, tz, - ambiguous=ambiguous, - errors=errors) + new_dates = conversion.tz_localize_to_utc( + self.asi8, tz, ambiguous=ambiguous, nonexisitent=nonexisitent, + errors=errors + ) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85bd6065314f4..05fdde6a1ea21 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8475,7 +8475,7 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize(self, tz, axis=0, level=None, copy=True, - ambiguous='raise'): + ambiguous='raise', nonexisitent='raise'): """ Localize tz-naive TimeSeries to target time zone. @@ -8497,6 +8497,12 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + nonexisitent : str {'NaT', 'raise'} + + - 'infer' will shift the non-existent time to a real local time + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an NonExistentTimeError if there are ambiguous + times Returns ------- @@ -8509,7 +8515,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, axis = self._get_axis_number(axis) ax = self._get_axis(axis) - def _tz_localize(ax, tz, ambiguous): + def _tz_localize(ax, tz, ambiguous, nonexisitent): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) @@ -8518,19 +8524,23 @@ def _tz_localize(ax, tz, ambiguous): else: ax = DatetimeIndex([], tz=tz) else: - ax = ax.tz_localize(tz, ambiguous=ambiguous) + ax = ax.tz_localize( + tz, ambiguous=ambiguous, nonexisitent=nonexisitent + ) return ax # if a level is given it must be a MultiIndex level or # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) - new_level = _tz_localize(ax.levels[level], tz, ambiguous) + new_level = _tz_localize( + ax.levels[level], tz, ambiguous, nonexisitent + ) ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, ambiguous) + ax = _tz_localize(ax, tz, ambiguous, nonexisitent) result = self._constructor(self._data, copy=copy) result.set_axis(ax, axis=axis, inplace=True) From 8753d00f8c1117d14cb927ca03beeff5eb6f7fcd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 6 Sep 2018 23:47:27 -0700 Subject: [PATCH 02/37] correct misspelling --- pandas/_libs/tslibs/conversion.pyx | 6 ++++-- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- pandas/core/arrays/datetimes.py | 6 +++--- pandas/core/generic.py | 12 ++++++------ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index aa516c68fdd2c..c0ba7aee7c7de 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -31,7 +31,6 @@ from util cimport (is_string_object, is_integer_object, is_float_object, is_array) from timedeltas cimport cast_from_unit -from timestamps import Timestamp from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, @@ -1002,7 +1001,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if infer_nonexisit: # Infer the timestamp; based on pytz's DstTzInfo.normalize val = vals[i] - utc_offset = get_utcoffset(tz, Timestamp(val)) + dt64_to_dtstruct(val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + utc_offset = get_utcoffset(tz, dt) utc_offset = int(utc_offset.total_seconds()) * 1000000000 utc_val = vals[i] - utc_offset local_val = tz_convert_single(utc_val, 'UTC', tz) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d6cead9c577e2..2d9f083436b32 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -861,7 +861,7 @@ class Timestamp(_Timestamp): def is_leap_year(self): return bool(ccalendar.is_leapyear(self.year)) - def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', errors='raise'): """ Convert naive Timestamp to local time zone, or remove @@ -879,7 +879,7 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - nonexisitent : str {'NaT', 'raise'} + nonexistent : str {'NaT', 'raise'} - 'infer' will shift the non-existent time to a real local time - 'NaT' will return NaT where there are ambiguous times @@ -914,7 +914,7 @@ class Timestamp(_Timestamp): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, - nonexisitent=nonexisitent, + nonexistent=nonexistent, errors=errors)[0] return Timestamp(value, tz=tz) else: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 83673827d0ecf..f3ac0d0406cf7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -639,7 +639,7 @@ def tz_convert(self, tz): # No conversion since timestamps are all UTC to begin with return self._shallow_copy(tz=tz) - def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', errors='raise'): """ Localize tz-naive Datetime Array/Index to tz-aware @@ -668,7 +668,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexisitent : str {'NaT', 'raise'} + nonexistent : str {'NaT', 'raise'} - 'infer' will shift the non-existent time to a real local time - 'NaT' will return NaT where there are ambiguous times @@ -735,7 +735,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexisitent='raise', # Convert to UTC new_dates = conversion.tz_localize_to_utc( - self.asi8, tz, ambiguous=ambiguous, nonexisitent=nonexisitent, + self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent, errors=errors ) new_dates = new_dates.view(_NS_DTYPE) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fe24e170698f7..0f5b21ad41400 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8552,7 +8552,7 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize(self, tz, axis=0, level=None, copy=True, - ambiguous='raise', nonexisitent='raise'): + ambiguous='raise', nonexistent='raise'): """ Localize tz-naive TimeSeries to target time zone. @@ -8574,7 +8574,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexisitent : str {'NaT', 'raise'} + nonexistent : str {'NaT', 'raise'} - 'infer' will shift the non-existent time to a real local time - 'NaT' will return NaT where there are ambiguous times @@ -8592,7 +8592,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, axis = self._get_axis_number(axis) ax = self._get_axis(axis) - def _tz_localize(ax, tz, ambiguous, nonexisitent): + def _tz_localize(ax, tz, ambiguous, nonexistent): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) @@ -8602,7 +8602,7 @@ def _tz_localize(ax, tz, ambiguous, nonexisitent): ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_localize( - tz, ambiguous=ambiguous, nonexisitent=nonexisitent + tz, ambiguous=ambiguous, nonexistent=nonexistent ) return ax @@ -8611,13 +8611,13 @@ def _tz_localize(ax, tz, ambiguous, nonexisitent): if isinstance(ax, MultiIndex): level = ax._get_level_number(level) new_level = _tz_localize( - ax.levels[level], tz, ambiguous, nonexisitent + ax.levels[level], tz, ambiguous, nonexistent ) ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, ambiguous, nonexisitent) + ax = _tz_localize(ax, tz, ambiguous, nonexistent) result = self._constructor(self._data, copy=copy) result.set_axis(ax, axis=axis, inplace=True) From a6a05df384c72787135a3308af83f2021e90ee57 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 15:52:05 -0700 Subject: [PATCH 03/37] change method of handling nonexistent times --- pandas/_libs/tslibs/conversion.pyx | 80 +++++++++++++++--------------- pandas/_libs/tslibs/timestamps.pyx | 6 +-- pandas/core/arrays/datetimes.py | 15 +++--- pandas/core/generic.py | 12 ++--- 4 files changed, 57 insertions(+), 56 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c0ba7aee7c7de..f7f88e5315777 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -44,6 +44,7 @@ from nattype cimport NPY_NAT, checknull_with_nat # Constants cdef int64_t DAY_NS = 86400000000000LL +cdef int64_t HOURS_NS = 3600000000000 NS_DTYPE = np.dtype('M8[ns]') TD_DTYPE = np.dtype('m8[ns]') @@ -837,7 +838,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tz : tzinfo or None ambiguous : str, bool, or arraylike If arraylike, must have the same length as vals - nonexistent : str + nonexistent : str, bool, or arraylike + If arraylike, must have the same length as vals errors : {"raise", "coerce"}, default "raise" Returns @@ -854,9 +856,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False - bint infer_nonexisit = nonexistent == 'infer' - bint is_coerce = errors == 'coerce' or nonexistent == 'NaT' - bint is_raise = errors == 'raise' or nonexistent == 'raise' + bint shift = False, fill_nonexist = False + bint is_coerce = errors == 'coerce' + bint is_raise = errors == 'raise' # Vectorized version of DstTzInfo.localize @@ -891,39 +893,43 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) + if is_string_object(nonexistent): + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift': + shift = True + trans, deltas, typ = get_dst_info(tz) tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) result_a.fill(NPY_NAT) result_b.fill(NPY_NAT) - # left side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the left side of the DST transition - if v + deltas[pos] == vals[i]: - result_a[i] = v - - # right side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_right = (np.maximum(0, trans.searchsorted( vals + DAY_NS, side='right') - 1)).astype(np.int64) for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 + val = vals[i] + v_left = val - deltas[idx_shifted_left[i]] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + v_right = val - deltas[idx_shifted_right[i]] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 # timestamp falls to the right side of the DST transition - if v + deltas[pos] == vals[i]: - result_b[i] = v + if v_right + deltas[pos_right] == val: + result_b[i] = v_right if infer_dst: dst_hours = np.empty(n, dtype=np.int64) @@ -938,7 +944,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, stamp = _render_tstamp(vals[trans_idx]) raise pytz.AmbiguousTimeError( "Cannot infer dst time from %s as there " - "are no repeated times" % stamp) + "are no repeated times".format(stamp)) # Split the array into contiguous chunks (where the difference between # indices is 1). These are effectively dst transitions in different # years which is useful for checking that there is not an ambiguous @@ -963,7 +969,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if switch_idx.size > 1: raise pytz.AmbiguousTimeError( "There are %i dst switches when " - "there should only be 1." % switch_idx.size) + "there should only be 1.".format(switch_idx.size)) switch_idx = switch_idx[0] + 1 # Pull the only index and adjust a_idx = grp[:switch_idx] @@ -971,10 +977,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) for i in range(n): + val = vals[i] left = result_a[i] right = result_b[i] - if vals[i] == NPY_NAT: - result[i] = vals[i] + if val == NPY_NAT: + result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: result[i] = left @@ -989,30 +996,25 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, elif fill: result[i] = NPY_NAT else: - stamp = _render_tstamp(vals[i]) + stamp = _render_tstamp(val) raise pytz.AmbiguousTimeError( "Cannot infer dst time from %r, try using the " - "'ambiguous' argument" % stamp) + "'ambiguous' argument".format(stamp)) elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: result[i] = right else: - if infer_nonexisit: - # Infer the timestamp; based on pytz's DstTzInfo.normalize - val = vals[i] - dt64_to_dtstruct(val, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - utc_offset = get_utcoffset(tz, dt) - utc_offset = int(utc_offset.total_seconds()) * 1000000000 - utc_val = vals[i] - utc_offset - local_val = tz_convert_single(utc_val, 'UTC', tz) - result[i] = local_val - elif is_coerce: + # Handle nonexistent times + if shift: + remaining_minutes = val % HOURS_NS + new_local = val + (HOURS_NS - remaining_minutes) + delta_idx = trans.searchsorted(new_local, side='right') - 1 + result[i] = new_local - deltas[delta_idx] + elif fill_nonexist: result[i] = NPY_NAT else: - stamp = _render_tstamp(vals[i]) + stamp = _render_tstamp(val) raise pytz.NonExistentTimeError(stamp) return result diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f69a36a6db5be..8e793576031b9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -885,9 +885,9 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - nonexistent : str {'NaT', 'raise'} - - - 'infer' will shift the non-existent time to a real local time + nonexistent : shift, 'NaT', default 'raise' + - 'shift' will shift the nonexistent times forward to the closest + existing time - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an NonExistentTimeError if there are ambiguous times diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a32b2f15317af..1bac80585891e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -633,8 +633,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. - ambiguous : str {'infer', 'NaT', 'raise'} or bool array, - default 'raise' + ambiguous : 'infer', 'NaT', bool array, default 'raise' - 'infer' will attempt to infer fall dst-transition hours based on order @@ -645,12 +644,12 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexistent : str {'NaT', 'raise'} - - - 'infer' will shift the non-existent time to a real local time - - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an NonExistentTimeError if there are ambiguous - times + nonexistent : 'shift', 'NaT' default 'raise' + - 'shift' will shift the nonexistent times forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times errors : {'raise', 'coerce'}, default 'raise' diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 773d02e4a761b..175ea9d2f1019 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8614,12 +8614,12 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexistent : str {'NaT', 'raise'} - - - 'infer' will shift the non-existent time to a real local time - - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an NonExistentTimeError if there are ambiguous - times + nonexistent : 'shift', 'NaT', default 'raise' + - 'shift' will shift the nonexistent times forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times Returns ------- From c4dc8aa22fb19215cbfc233253ae0e69e5bd10c9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 15:58:01 -0700 Subject: [PATCH 04/37] Add another comment --- pandas/_libs/tslibs/conversion.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f7f88e5315777..b53e4bf53b0ae 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -857,8 +857,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift = False, fill_nonexist = False - bint is_coerce = errors == 'coerce' - bint is_raise = errors == 'raise' + bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' # Vectorized version of DstTzInfo.localize @@ -1007,6 +1006,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, else: # Handle nonexistent times if shift: + # Shift the nonexistent time forward to the closest existing + # time remaining_minutes = val % HOURS_NS new_local = val + (HOURS_NS - remaining_minutes) delta_idx = trans.searchsorted(new_local, side='right') - 1 From 1bc81dbf9a46cc7f55a75cea9a14cff4f8927afc Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 17:45:26 -0700 Subject: [PATCH 05/37] Add tests for timestamps --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- .../tests/scalar/timestamp/test_timezones.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b53e4bf53b0ae..ebcddbe95b300 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -852,7 +852,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata - int64_t v, left, right + int64_t v, left, right, val, v_left, v_right ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False @@ -1012,7 +1012,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, new_local = val + (HOURS_NS - remaining_minutes) delta_idx = trans.searchsorted(new_local, side='right') - 1 result[i] = new_local - deltas[delta_idx] - elif fill_nonexist: + elif fill_nonexist or is_coerce: result[i] = NPY_NAT else: stamp = _render_tstamp(val) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 8cebfafeae82a..2924f96e77362 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -158,6 +158,25 @@ def test_timestamp_tz_localize(self, tz): assert result.hour == expected.hour assert result == expected + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_shift(self, tz): + ts = Timestamp('2015-03-29 02:20:00') + result = ts.tz_localize(tz, nonexistent='shift') + expected = Timestamp('2015-03-29 03:00:00').tz_localize(tz) + assert result == expected + + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + ts = Timestamp('2015-03-29 02:20:00') + result = ts.tz_localize(tz, nonexistent='NaT') + assert result is NaT + + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_raise(self, tz): + ts = Timestamp('2015-03-29 02:20:00') + with pytest.raises(pytz.NonExistentTimeError): + ts.tz_localize(tz, nonexistent='raise') + # ------------------------------------------------------------------ # Timestamp.tz_convert From c81d58cab7eb3efd6e3a9c00343bc2d5a63011b1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 17:58:36 -0700 Subject: [PATCH 06/37] Add tests for datetimeindex --- .../tests/indexes/datetimes/test_timezones.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index dc01f7ccbd496..54f4c5e037ab0 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -574,6 +574,23 @@ def test_dti_tz_localize_bdate_range(self): localized = dr.tz_localize(pytz.utc) tm.assert_index_equal(dr_utc, localized) + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + @pytest.mark.parametrize('method, exp', [ + ['shift', '2015-03-29 03:00:00'], + ['NaT', pd.NaT], + ['raise', None] + ]) + def test_dti_tz_localize_nonexsistent(self, tz, method, exp): + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + if method == 'raise': + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tz, nonexistent=method) + else: + result = dti.tz_localize(tz, nonexistent=method) + expected = DatetimeIndex([exp] * n, tz=tz) + tm.assert_index_equal(result, expected) + # ------------------------------------------------------------- # DatetimeIndex.normalize From b2c84293443be8fc27ba2f771f8c63223fbc779b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 18:21:36 -0700 Subject: [PATCH 07/37] Add series test and entry in timeseries.rst --- doc/source/timeseries.rst | 25 +++++++++++++++++++ .../tests/indexes/datetimes/test_timezones.py | 3 ++- .../tests/scalar/timestamp/test_timezones.py | 3 +++ pandas/tests/series/test_timezones.py | 21 +++++++++++++++- 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 5dfac98d069e7..c9e8b6ad1b714 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2305,6 +2305,31 @@ constructor as well as ``tz_localize``. # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) didx.tz_convert('UCT').tz_localize(None) +.. _timeseries.timezone_nonexsistent: + +Nonexistent Times when Localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A DST transition may also shift the local time ahead by 1 hour creating nonexistent +local times. The behavior of localizing a timeseries with nonexistent times +can be controlled by the ``nonexistent`` argument. The following options are available: + +* ``shift``: Shifts nonexistent times forward to the closest real time +* ``NaT``: Replaces nonexistent times with ``NaT`` +* ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) + +.. ipython:: python + # 2:30 is a nonexistent time + dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H') + dti + dti.tz_localize('Europe/Warsaw', nonexistent='shift') + dti.tz_localize('Europe/Warsaw', nonexistent='NaT') + +.. code-block:: ipython + + In [2]: dti.tz_localize('Europe/Warsaw') + NonExistentTimeError: 2015-03-29 02:30:00 + .. _timeseries.timezone_series: TZ Aware Dtypes diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 54f4c5e037ab0..26aa8cbe05053 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -580,7 +580,8 @@ def test_dti_tz_localize_bdate_range(self): ['NaT', pd.NaT], ['raise', None] ]) - def test_dti_tz_localize_nonexsistent(self, tz, method, exp): + def test_dti_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 n = 60 dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') if method == 'raise': diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 2924f96e77362..1eb3dd07975ca 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -160,6 +160,7 @@ def test_timestamp_tz_localize(self, tz): @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) def test_timestamp_tz_localize_nonexistent_shift(self, tz): + # GH 8917 ts = Timestamp('2015-03-29 02:20:00') result = ts.tz_localize(tz, nonexistent='shift') expected = Timestamp('2015-03-29 03:00:00').tz_localize(tz) @@ -167,12 +168,14 @@ def test_timestamp_tz_localize_nonexistent_shift(self, tz): @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + # GH 8917 ts = Timestamp('2015-03-29 02:20:00') result = ts.tz_localize(tz, nonexistent='NaT') assert result is NaT @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) def test_timestamp_tz_localize_nonexistent_raise(self, tz): + # GH 8917 ts = Timestamp('2015-03-29 02:20:00') with pytest.raises(pytz.NonExistentTimeError): ts.tz_localize(tz, nonexistent='raise') diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 472b2c5644fa5..3ee110df3839e 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -13,7 +13,7 @@ from pandas._libs.tslibs import timezones, conversion from pandas.compat import lrange from pandas.core.indexes.datetimes import date_range -from pandas import Series, Timestamp, DatetimeIndex, Index +from pandas import Series, Timestamp, DatetimeIndex, Index, NaT class TestSeriesTimezones(object): @@ -60,6 +60,25 @@ def test_series_tz_localize_ambiguous_bool(self): result = ser.dt.tz_localize('US/Central', ambiguous=[False]) tm.assert_series_equal(result, expected1) + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + @pytest.mark.parametrize('method, exp', [ + ['shift', '2015-03-29 03:00:00'], + ['NaT', NaT], + ['raise', None] + ]) + def test_series_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + s = Series(1, dti) + if method == 'raise': + with pytest.raises(pytz.NonExistentTimeError): + s.tz_localize(tz, nonexistent=method) + else: + result = s.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) def test_series_tz_localize_empty(self, tzstr): # GH#2248 From a65987d424abf6be2a74636b9588aaefe28e8816 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 18:24:47 -0700 Subject: [PATCH 08/37] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fb7af00f61534..818419a6b077d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times (:issue:`8917`) .. _whatsnew_0240.api_breaking: From 710014cefece3c5e67604def64ab14cbf69205f0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 18:29:45 -0700 Subject: [PATCH 09/37] Clean up docstring --- pandas/_libs/tslibs/timestamps.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8e793576031b9..e3393c8c7caff 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -885,12 +885,12 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - nonexistent : shift, 'NaT', default 'raise' - - 'shift' will shift the nonexistent times forward to the closest + nonexistent : 'shift', 'NaT', default 'raise' + - 'shift' will shift the nonexistent time forward to the closest existing time - - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an NonExistentTimeError if there are ambiguous - times + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not From 93159e52f8dcd5a7502e0f448879a5f2d65b7290 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 8 Sep 2018 21:53:07 -0700 Subject: [PATCH 10/37] Fix nat doc --- pandas/_libs/tslibs/nattype.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..f138036f7a3d3 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -536,6 +536,13 @@ class NaTType(_NaT): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + nonexistent : 'shift', 'NaT', default 'raise' + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from From 94a72a501f9342a5b1854f602fa903c6b820fd9a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 10:12:32 -0700 Subject: [PATCH 11/37] add versionadded --- pandas/_libs/tslibs/nattype.pyx | 2 ++ pandas/_libs/tslibs/timestamps.pyx | 2 ++ pandas/core/arrays/datetimes.py | 2 ++ pandas/core/generic.py | 2 ++ 4 files changed, 8 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index e25409925832e..b589d2717c89d 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -541,6 +541,8 @@ class NaTType(_NaT): - 'NaT' will return NaT where there are nonexistent times - 'raise' will raise an NonExistentTimeError if there are nonexistent times + + .. versionadded:: 0.24.0 errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e3393c8c7caff..43c86335c3345 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -892,6 +892,8 @@ class Timestamp(_Timestamp): - 'raise' will raise an NonExistentTimeError if there are nonexistent times + .. versionadded:: 0.24.0 + errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1bac80585891e..042b0cf9ff194 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -651,6 +651,8 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'raise' will raise an NonExistentTimeError if there are nonexistent times + .. versionadded:: 0.24.0 + errors : {'raise', 'coerce'}, default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6147ba610c8b3..c2507989379fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8641,6 +8641,8 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'raise' will raise an NonExistentTimeError if there are nonexistent times + .. versionadded:: 0.24.0 + Returns ------- From 39b769e37db5a62860e3c9d2c0aa5019405ab691 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 19 Sep 2018 11:05:39 -0700 Subject: [PATCH 12/37] Remove whitespace --- pandas/_libs/tslibs/nattype.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index b589d2717c89d..7857c3371d828 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -541,7 +541,7 @@ class NaTType(_NaT): - 'NaT' will return NaT where there are nonexistent times - 'raise' will raise an NonExistentTimeError if there are nonexistent times - + .. versionadded:: 0.24.0 errors : 'raise', 'coerce', default 'raise' From 8852d430fbd4119e56d8ef46b480743a26621e1f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 23 Sep 2018 23:29:16 -0700 Subject: [PATCH 13/37] Depreciate errors and see what needs warning captures --- pandas/_libs/tslibs/conversion.pyx | 7 ++++++- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/core/arrays/datetimes.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 292c2c65bf40e..bfdd98bccf058 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import warnings import cython from cython import Py_ssize_t @@ -860,9 +861,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' # Vectorized version of DstTzInfo.localize - assert is_coerce or is_raise + if is_coerce: + warnings.warn("the errors argument is deprecated, will be removed " + "in a future release. Use the ambiguous or nonexistent " + "argument instead.", DeprecationWarning) + if tz == UTC or tz is None: return vals diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ee72aad0da4c9..f4565ebe94998 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -930,7 +930,7 @@ class Timestamp(_Timestamp): - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. versionadded:: 0.19.0 + .. depreciated:: 0.24.0 Returns ------- diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 042b0cf9ff194..2f4925d6c0a3b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -661,7 +661,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'coerce' will return NaT if the timestamp can not be converted to the specified time zone - .. versionadded:: 0.19.0 + .. depreciated:: 0.24.0 Returns ------- From 38b95e98c9b9b325fe24191ac75c268fcd06b2c8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 24 Sep 2018 09:55:49 -0700 Subject: [PATCH 14/37] Correct NaT docstring --- pandas/_libs/tslibs/nattype.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 07938f3460b81..7c980b5faf038 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -580,7 +580,7 @@ class NaTType(_NaT): - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. versionadded:: 0.19.0 + .. depreciated:: 0.24.0 Returns ------- From c88b0d84c030dd6f45307529c460793d24109bed Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 24 Sep 2018 15:43:14 -0700 Subject: [PATCH 15/37] edit whatsnew and check for raised DeprecationWarning --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/tests/scalar/timestamp/test_timezones.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8761c98d19fb7..91438459386db 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -570,6 +570,7 @@ Deprecations many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) - :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) - :func:`DatetimeIndex.shift` now accepts ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors` argument in favor of the ``ambiguous`` and ``nonexistent`` arguments (:issue:`8917`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 1eb3dd07975ca..b672f307b801c 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -86,7 +86,8 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz) with pytest.raises(NonExistentTimeError): ts.tz_localize(tz, errors='raise') - assert ts.tz_localize(tz, errors='coerce') is NaT + with tm.assert_produces_warning(DeprecationWarning): + assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): # GH#13057 From 1bae682d42e38c0295ed6299ba0f9ac439427fe6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 25 Sep 2018 17:28:34 -0700 Subject: [PATCH 16/37] Address review --- doc/source/timeseries.rst | 2 +- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/_libs/tslibs/conversion.pyx | 12 +++++------- pandas/_libs/tslibs/timestamps.pyx | 6 ++++++ pandas/core/arrays/datetimes.py | 5 +++++ pandas/tests/indexes/datetimes/test_timezones.py | 8 ++++++++ pandas/tests/scalar/timestamp/test_timezones.py | 4 +++- pandas/tests/series/test_timezones.py | 8 ++++++++ 8 files changed, 37 insertions(+), 10 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 38a0ea1fecef3..d9de21a293aaf 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2353,7 +2353,7 @@ constructor as well as ``tz_localize``. .. _timeseries.timezone_nonexsistent: Nonexistent Times when Localizing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A DST transition may also shift the local time ahead by 1 hour creating nonexistent local times. The behavior of localizing a timeseries with nonexistent times diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 91438459386db..69ed3efd09ea7 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -191,7 +191,7 @@ Other Enhancements - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). -- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times (:issue:`8917`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bfdd98bccf058..6e45ac2f8551b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import warnings - import cython from cython import Py_ssize_t @@ -841,8 +839,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, If arraylike, must have the same length as vals nonexistent : str, bool, or arraylike If arraylike, must have the same length as vals + + .. versionadded:: 0.24.0 + errors : {"raise", "coerce"}, default "raise" + .. depreciated:: 0.24.0 + Returns ------- localized : ndarray[int64_t] @@ -863,11 +866,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, # Vectorized version of DstTzInfo.localize assert is_coerce or is_raise - if is_coerce: - warnings.warn("the errors argument is deprecated, will be removed " - "in a future release. Use the ambiguous or nonexistent " - "argument instead.", DeprecationWarning) - if tz == UTC or tz is None: return vals diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f4565ebe94998..2883a7435019f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -944,6 +944,12 @@ class Timestamp(_Timestamp): if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') + if errors != 'raise': + warnings.warn("The errors argument is deprecated and will be " + "removed in a future release. Use the ambiguous or " + "nonexistent argument instead.", FutureWarning, + stacklevel=2) + if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2f4925d6c0a3b..dc10c92d3a575 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -703,6 +703,11 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', '2018-03-03 09:00:00'], dtype='datetime64[ns]', freq='D') """ + if errors != 'raise': + warnings.warn("The errors argument is deprecated and will be " + "removed in a future release. Use the ambiguous or " + "nonexistent argument instead.", FutureWarning, + stacklevel=2) if self.tz is not None: if tz is None: new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 26aa8cbe05053..55d3f080d4cfd 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -592,6 +592,14 @@ def test_dti_tz_localize_nonexistent(self, tz, method, exp): expected = DatetimeIndex([exp] * n, tz=tz) tm.assert_index_equal(result, expected) + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_dti_tz_localize_errors_deprecation(self): + # GH 22644 + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + dti.tz_localize('UTC', errors='coerce') + # ------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index b672f307b801c..40a6946a5a162 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -79,6 +79,7 @@ def test_tz_localize_ambiguous(self): ('2015-03-08 02:30', 'US/Pacific'), ('2015-03-29 02:00', 'Europe/Paris'), ('2015-03-29 02:30', 'Europe/Belgrade')]) + @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_nonexistent(self, stamp, tz): # GH#13057 ts = Timestamp(stamp) @@ -86,7 +87,8 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz) with pytest.raises(NonExistentTimeError): ts.tz_localize(tz, errors='raise') - with tm.assert_produces_warning(DeprecationWarning): + # GH 22644 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 3ee110df3839e..e96218e1bedbb 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -33,6 +33,14 @@ def test_series_tz_localize(self): tm.assert_raises_regex(TypeError, 'Already tz-aware', ts.tz_localize, 'US/Eastern') + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_tz_localize_errors_deprecation(self): + rng = date_range('1/1/2011', periods=100, freq='H') + ts = Series(rng) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH 22644 + ts.dt.tz_localize('UTC', errors='coerce') + def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous From d30f8916665e22f0f2f0b49f98cff376ef4a0bf5 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 26 Sep 2018 10:21:24 -0700 Subject: [PATCH 17/37] change default errors argument to None --- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 8 ++++---- pandas/core/arrays/datetimes.py | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6e45ac2f8551b..79b425035e1f0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -844,7 +844,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, errors : {"raise", "coerce"}, default "raise" - .. depreciated:: 0.24.0 + .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 7c980b5faf038..804fa74c1fd01 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -573,14 +573,14 @@ class NaTType(_NaT): .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default 'raise' + errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from or to DST time) - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. depreciated:: 0.24.0 + .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2883a7435019f..bbe44e50caad8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -897,7 +897,7 @@ class Timestamp(_Timestamp): return bool(ccalendar.is_leapyear(self.year)) def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - errors='raise'): + errors=None): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -923,14 +923,14 @@ class Timestamp(_Timestamp): .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default 'raise' + errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from or to DST time) - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. depreciated:: 0.24.0 + .. deprecated:: 0.24.0 Returns ------- @@ -944,7 +944,7 @@ class Timestamp(_Timestamp): if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') - if errors != 'raise': + if errors is not None: warnings.warn("The errors argument is deprecated and will be " "removed in a future release. Use the ambiguous or " "nonexistent argument instead.", FutureWarning, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dc10c92d3a575..e9f521fbf64d0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -617,7 +617,7 @@ def tz_convert(self, tz): return self._shallow_copy(tz=tz) def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - errors='raise'): + errors=None): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -653,7 +653,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', .. versionadded:: 0.24.0 - errors : {'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified time zone (e.g. due to a transition from @@ -703,7 +703,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', '2018-03-03 09:00:00'], dtype='datetime64[ns]', freq='D') """ - if errors != 'raise': + if errors is not None: warnings.warn("The errors argument is deprecated and will be " "removed in a future release. Use the ambiguous or " "nonexistent argument instead.", FutureWarning, From f3376926639408e1d8f81054307210433c429696 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 26 Sep 2018 11:22:35 -0700 Subject: [PATCH 18/37] Map depreciation correctly and test --- pandas/_libs/tslibs/conversion.pyx | 15 ++++++--------- pandas/_libs/tslibs/timestamps.pyx | 8 +++++++- pandas/core/arrays/datetimes.py | 9 ++++++++- pandas/tests/indexes/datetimes/test_timezones.py | 8 +++++++- pandas/tests/scalar/timestamp/test_timezones.py | 13 +++++++++++++ pandas/tests/series/test_timezones.py | 13 ++++++++++--- 6 files changed, 51 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 79b425035e1f0..6b03f742943f0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -458,8 +458,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, if tz is not None: # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise', - errors='raise')[0] + ambiguous='raise')[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -826,7 +825,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - object nonexistent=None, object errors='raise'): + object nonexistent=None, object errors=None): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -842,7 +841,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, .. versionadded:: 0.24.0 - errors : {"raise", "coerce"}, default "raise" + errors : {"raise", "coerce"}, default None .. deprecated:: 0.24.0 @@ -861,11 +860,9 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift = False, fill_nonexist = False - bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' + bint is_coerce = errors == 'coerce' # Vectorized version of DstTzInfo.localize - assert is_coerce or is_raise - if tz == UTC or tz is None: return vals @@ -896,7 +893,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ambiguous_array = np.asarray(ambiguous) if is_string_object(nonexistent): - if nonexistent == 'NaT': + if nonexistent == 'NaT' or is_coerce: fill_nonexist = True elif nonexistent == 'shift': shift = True @@ -1015,7 +1012,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, new_local = val + (HOURS_NS - remaining_minutes) delta_idx = trans.searchsorted(new_local, side='right') - 1 result[i] = new_local - deltas[delta_idx] - elif fill_nonexist or is_coerce: + elif fill_nonexist: result[i] = NPY_NAT else: stamp = _render_tstamp(val) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bbe44e50caad8..4804e5e98a9b9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -949,7 +949,13 @@ class Timestamp(_Timestamp): "removed in a future release. Use the ambiguous or " "nonexistent argument instead.", FutureWarning, stacklevel=2) - + if errors == 'coerce': + nonexistent = 'NaT' + elif errors == 'raise': + nonexistent = 'raise' + else: + raise ValueError("The errors argument must be either coerce " + "or raise.") if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e9f521fbf64d0..56d3e1afdffff 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -661,7 +661,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'coerce' will return NaT if the timestamp can not be converted to the specified time zone - .. depreciated:: 0.24.0 + .. deprecated:: 0.24.0 Returns ------- @@ -708,6 +708,13 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', "removed in a future release. Use the ambiguous or " "nonexistent argument instead.", FutureWarning, stacklevel=2) + if errors == 'coerce': + nonexistent = 'NaT' + elif errors == 'raise': + nonexistent = 'raise' + else: + raise ValueError("The errors argument must be either coerce " + "or raise.") if self.tz is not None: if tz is None: new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 55d3f080d4cfd..b39dd02e47a1d 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -595,10 +595,16 @@ def test_dti_tz_localize_nonexistent(self, tz, method, exp): @pytest.mark.filterwarnings('ignore::FutureWarning') def test_dti_tz_localize_errors_deprecation(self): # GH 22644 + tz = 'Europe/Warsaw' n = 60 dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - dti.tz_localize('UTC', errors='coerce') + with pytest.raises(ValueError): + dti.tz_localize(tz, errors='foo') + # make sure errors='coerce' gets mapped correctly to nonexistent + result = dti.tz_localize(tz, errors='coerce') + expected = dti.tz_localize(tz, nonexistent='NaT') + tm.assert_index_equal(result, expected) # ------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 40a6946a5a162..4de2abdf9a7bb 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -97,6 +97,19 @@ def test_tz_localize_errors_ambiguous(self): with pytest.raises(AmbiguousTimeError): ts.tz_localize('US/Pacific', errors='coerce') + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_tz_localize_errors_depreciation(self): + # GH 22644 + tz = 'Europe/Warsaw' + ts = Timestamp('2015-03-29 02:00:00') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(ValueError): + ts.tz_localize(tz, errors='foo') + # make sure errors='coerce' gets mapped correctly to nonexistent + result = ts.tz_localize(tz, errors='coerce') + expected = ts.tz_localize(tz, nonexistent='NaT') + assert result is expected + @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', '2014-11-01 17:00', '2014-11-05 00:00']) def test_tz_localize_roundtrip(self, stamp, tz_aware_fixture): diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index e96218e1bedbb..5560bb53e332f 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -35,11 +35,18 @@ def test_series_tz_localize(self): @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_errors_deprecation(self): - rng = date_range('1/1/2011', periods=100, freq='H') + # GH 22644 + tz = 'Europe/Warsaw' + n = 60 + rng = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') ts = Series(rng) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # GH 22644 - ts.dt.tz_localize('UTC', errors='coerce') + with pytest.raises(ValueError): + ts.dt.tz_localize(tz, errors='foo') + # make sure errors='coerce' gets mapped correctly to nonexistent + result = ts.dt.tz_localize(tz, errors='coerce') + expected = ts.dt.tz_localize(tz, nonexistent='NaT') + tm.assert_series_equal(result, expected) def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous From a7b83579639b52ec8ad8d9105c136df1064daa21 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 26 Sep 2018 16:56:50 -0700 Subject: [PATCH 19/37] Try to correctly test for FutureWarning --- pandas/tests/scalar/timestamp/test_timezones.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 4de2abdf9a7bb..6e36b71554185 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -105,10 +105,11 @@ def test_tz_localize_errors_depreciation(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): with pytest.raises(ValueError): ts.tz_localize(tz, errors='foo') - # make sure errors='coerce' gets mapped correctly to nonexistent + # make sure errors='coerce' gets mapped correctly to nonexistent + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ts.tz_localize(tz, errors='coerce') - expected = ts.tz_localize(tz, nonexistent='NaT') - assert result is expected + expected = ts.tz_localize(tz, nonexistent='NaT') + assert result is expected @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', '2014-11-01 17:00', '2014-11-05 00:00']) From 7ad87ec3b343aa87debb8ee0fd8dafc9675a2f70 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 26 Sep 2018 23:18:56 -0700 Subject: [PATCH 20/37] Try adjusting catching FutureWarning --- pandas/tests/scalar/timestamp/test_timezones.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6e36b71554185..17162f7561854 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -85,9 +85,11 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts = Timestamp(stamp) with pytest.raises(NonExistentTimeError): ts.tz_localize(tz) - with pytest.raises(NonExistentTimeError): - ts.tz_localize(tz, errors='raise') # GH 22644 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + pytest.raises( + NonExistentTimeError, ts.tz_localize(tz, errors='raise') + ) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert ts.tz_localize(tz, errors='coerce') is NaT @@ -103,8 +105,7 @@ def test_tz_localize_errors_depreciation(self): tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with pytest.raises(ValueError): - ts.tz_localize(tz, errors='foo') + pytest.raises(ValueError, ts.tz_localize(tz, errors='foo')) # make sure errors='coerce' gets mapped correctly to nonexistent with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ts.tz_localize(tz, errors='coerce') From 6be1c253c19e1641b1f588e784fe86e8b6c69587 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 27 Sep 2018 15:43:40 -0700 Subject: [PATCH 21/37] Reorder context managers --- pandas/tests/scalar/timestamp/test_timezones.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 17162f7561854..29c8e9554c710 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -86,10 +86,10 @@ def test_tz_localize_nonexistent(self, stamp, tz): with pytest.raises(NonExistentTimeError): ts.tz_localize(tz) # GH 22644 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - pytest.raises( - NonExistentTimeError, ts.tz_localize(tz, errors='raise') - ) + with pytest.raises(NonExistentTimeError): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts.tz_localize(tz, errors='raise') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert ts.tz_localize(tz, errors='coerce') is NaT @@ -104,8 +104,10 @@ def test_tz_localize_errors_depreciation(self): # GH 22644 tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - pytest.raises(ValueError, ts.tz_localize(tz, errors='foo')) + with pytest.raises(ValueError): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ts.tz_localize(tz, errors='coerce') From f8be4b65e8fe24c2d2c1f076505cecc8b1c7b82a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 27 Sep 2018 19:09:29 -0700 Subject: [PATCH 22/37] clear previously seen FutureWarning --- pandas/tests/scalar/timestamp/test_timezones.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 29c8e9554c710..fb9f99218bae5 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -109,7 +109,8 @@ def test_tz_localize_errors_depreciation(self): check_stacklevel=False): ts.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, + clear=FutureWarning): result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected From c192c9fd6d0ae8737728c1269f07d97837d8c0db Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 27 Sep 2018 23:29:12 -0700 Subject: [PATCH 23/37] separate test --- pandas/tests/scalar/timestamp/test_timezones.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index fb9f99218bae5..6236b2db18338 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -100,7 +100,7 @@ def test_tz_localize_errors_ambiguous(self): ts.tz_localize('US/Pacific', errors='coerce') @pytest.mark.filterwarnings('ignore::FutureWarning') - def test_tz_localize_errors_depreciation(self): + def test_tz_localize_errors_invalid_arg(self): # GH 22644 tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') @@ -108,9 +108,14 @@ def test_tz_localize_errors_depreciation(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ts.tz_localize(tz, errors='foo') + + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_tz_localize_errors_deprecation(self): + # GH 22644 # make sure errors='coerce' gets mapped correctly to nonexistent - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, - clear=FutureWarning): + tz = 'Europe/Warsaw' + ts = Timestamp('2015-03-29 02:00:00') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected From 01678c7758e2b6af47dafd62eac9ca28a46549d3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 29 Sep 2018 21:47:14 -0700 Subject: [PATCH 24/37] adjust test --- pandas/tests/scalar/timestamp/test_timezones.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6236b2db18338..ee358b053f886 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -109,14 +109,12 @@ def test_tz_localize_errors_invalid_arg(self): check_stacklevel=False): ts.tz_localize(tz, errors='foo') - @pytest.mark.filterwarnings('ignore::FutureWarning') - def test_tz_localize_errors_deprecation(self): + def test_tz_localize_errors_coerce(self): # GH 22644 # make sure errors='coerce' gets mapped correctly to nonexistent tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ts.tz_localize(tz, errors='coerce') + result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected From ae27a5094ca5c469a5f38267e687098da28a934c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 30 Sep 2018 14:19:56 -0700 Subject: [PATCH 25/37] Remove errors argument to tz_localize_to_utc --- pandas/_libs/tslibs/conversion.pyx | 9 ++------- pandas/_libs/tslibs/timestamps.pyx | 3 +-- pandas/core/arrays/datetimes.py | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6b03f742943f0..e81c510c195f0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -825,7 +825,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - object nonexistent=None, object errors=None): + object nonexistent=None): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -841,10 +841,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, .. versionadded:: 0.24.0 - errors : {"raise", "coerce"}, default None - - .. deprecated:: 0.24.0 - Returns ------- localized : ndarray[int64_t] @@ -860,7 +856,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift = False, fill_nonexist = False - bint is_coerce = errors == 'coerce' # Vectorized version of DstTzInfo.localize if tz == UTC or tz is None: @@ -893,7 +888,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ambiguous_array = np.asarray(ambiguous) if is_string_object(nonexistent): - if nonexistent == 'NaT' or is_coerce: + if nonexistent == 'NaT': fill_nonexist = True elif nonexistent == 'shift': shift = True diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4804e5e98a9b9..6e3330f3d42ce 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -963,8 +963,7 @@ class Timestamp(_Timestamp): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, - nonexistent=nonexistent, - errors=errors)[0] + nonexistent=nonexistent)[0] return Timestamp(value, tz=tz) else: if tz is None: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 56d3e1afdffff..23e5c672df7f2 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -726,7 +726,6 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', new_dates = conversion.tz_localize_to_utc( self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent, - errors=errors ) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) From 9041ebe8a0aa199d8a354883708a0120c758fb3e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 4 Oct 2018 15:27:39 -0700 Subject: [PATCH 26/37] Add nonexistent assert --- pandas/_libs/tslibs/conversion.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index e81c510c195f0..5da813b511cb9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -836,7 +836,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tz : tzinfo or None ambiguous : str, bool, or arraylike If arraylike, must have the same length as vals - nonexistent : str, bool, or arraylike + nonexistent : str If arraylike, must have the same length as vals .. versionadded:: 0.24.0 @@ -887,11 +887,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) - if is_string_object(nonexistent): - if nonexistent == 'NaT': - fill_nonexist = True - elif nonexistent == 'shift': - shift = True + assert nonexistent in ('NaT', 'raise', 'shift'), ("nonexistent must be " + "one of {'NaT', 'raise'," + " 'shift'}") + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift': + shift = True trans, deltas, typ = get_dst_info(tz) From a4cdac2d71d6939f962c52effabc6bf9c6f27748 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 4 Oct 2018 17:01:38 -0700 Subject: [PATCH 27/37] Handle default None arg --- pandas/_libs/tslibs/conversion.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5da813b511cb9..f919a545f6d25 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -887,13 +887,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) - assert nonexistent in ('NaT', 'raise', 'shift'), ("nonexistent must be " - "one of {'NaT', 'raise'," - " 'shift'}") if nonexistent == 'NaT': fill_nonexist = True elif nonexistent == 'shift': shift = True + else: + assert nonexistent in ('raise', None), ("nonexistent must be one of" + "{'NaT', 'raise', 'shift'}") trans, deltas, typ = get_dst_info(tz) From efb382e147d878d897b59e91bd017ce0b3d4b5a9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 5 Oct 2018 22:46:12 -0700 Subject: [PATCH 28/37] Address review --- doc/source/timeseries.rst | 21 ++++++++++++------- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/_libs/tslibs/timestamps.pyx | 5 +++-- pandas/core/arrays/datetimes.py | 5 +++-- .../tests/indexes/datetimes/test_timezones.py | 4 +++- .../tests/scalar/timestamp/test_timezones.py | 7 +++++-- 6 files changed, 29 insertions(+), 15 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 835dedbbb5b8c..a52c80106f100 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2357,7 +2357,7 @@ constructor as well as ``tz_localize``. # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) didx.tz_convert('UCT').tz_localize(None) -.. _timeseries.timezone_nonexsistent: +.. _timeseries.timezone_nonexistent: Nonexistent Times when Localizing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2366,22 +2366,29 @@ A DST transition may also shift the local time ahead by 1 hour creating nonexist local times. The behavior of localizing a timeseries with nonexistent times can be controlled by the ``nonexistent`` argument. The following options are available: -* ``shift``: Shifts nonexistent times forward to the closest real time -* ``NaT``: Replaces nonexistent times with ``NaT`` * ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``NaT``: Replaces nonexistent times with ``NaT`` +* ``shift``: Shifts nonexistent times forward to the closest real time .. ipython:: python - # 2:30 is a nonexistent time dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H') - dti - dti.tz_localize('Europe/Warsaw', nonexistent='shift') - dti.tz_localize('Europe/Warsaw', nonexistent='NaT') + # 2:30 is a nonexistent time + +Localization of nonexistent times will raise an error by default. .. code-block:: ipython In [2]: dti.tz_localize('Europe/Warsaw') NonExistentTimeError: 2015-03-29 02:30:00 +Transform nonexistent times to ``NaT`` or the closest real time forward in time. + +.. ipython:: python + dti + dti.tz_localize('Europe/Warsaw', nonexistent='shift') + dti.tz_localize('Europe/Warsaw', nonexistent='NaT') + + .. _timeseries.timezone_series: TZ Aware Dtypes diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 97a4111a6f146..30311e13a64ea 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -606,7 +606,7 @@ Deprecations many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) - :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) - :func:`DatetimeIndex.shift` now accepts ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`) -- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors` argument in favor of the ``ambiguous`` and ``nonexistent`` arguments (:issue:`8917`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fadcf60cbb0c0..be193aebb8bd4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1011,8 +1011,9 @@ class Timestamp(_Timestamp): if errors is not None: warnings.warn("The errors argument is deprecated and will be " - "removed in a future release. Use the ambiguous or " - "nonexistent argument instead.", FutureWarning, + "removed in a future release. Use " + "nonexistent='NaT' or nonexistent='raise' " + "instead.", FutureWarning, stacklevel=2) if errors == 'coerce': nonexistent = 'NaT' diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 23e5c672df7f2..6a0d845679bdc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -705,8 +705,9 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', """ if errors is not None: warnings.warn("The errors argument is deprecated and will be " - "removed in a future release. Use the ambiguous or " - "nonexistent argument instead.", FutureWarning, + "removed in a future release. Use " + "nonexistent='NaT' or nonexistent='raise' " + "instead.", FutureWarning, stacklevel=2) if errors == 'coerce': nonexistent = 'NaT' diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index b39dd02e47a1d..7651058562d9d 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -312,7 +312,9 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): index.tz_localize(tz=tz) with pytest.raises(pytz.NonExistentTimeError): - index.tz_localize(tz=tz, errors='raise') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + index.tz_localize(tz=tz, errors='raise') result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index ee358b053f886..b7c47e3a3c3ce 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -97,7 +97,9 @@ def test_tz_localize_errors_ambiguous(self): # GH#13057 ts = Timestamp('2015-11-1 01:00') with pytest.raises(AmbiguousTimeError): - ts.tz_localize('US/Pacific', errors='coerce') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts.tz_localize('US/Pacific', errors='coerce') @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_errors_invalid_arg(self): @@ -114,7 +116,8 @@ def test_tz_localize_errors_coerce(self): # make sure errors='coerce' gets mapped correctly to nonexistent tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') - result = ts.tz_localize(tz, errors='coerce') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected From 61c73cac35ce37d8f01e1ddef3ea7836e6d233d9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 5 Oct 2018 23:22:51 -0700 Subject: [PATCH 29/37] Catch another warning --- pandas/tests/indexes/datetimes/test_timezones.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 7651058562d9d..cebd4140ad4ec 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -316,7 +316,9 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): check_stacklevel=False): index.tz_localize(tz=tz, errors='raise') - result = index.tz_localize(tz=tz, errors='coerce') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, + clear=FutureWarning): + result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] dti = to_datetime(test_times, utc=True) From 394a0dbd770180bd1028a55307822ffb0d45f48c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 6 Oct 2018 23:30:58 -0700 Subject: [PATCH 30/37] Add extra docstring --- pandas/_libs/tslibs/nattype.pyx | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/core/arrays/datetimes.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 804fa74c1fd01..4cf3f6b33a067 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -576,9 +576,9 @@ class NaTType(_NaT): errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone + into the specified timezone. Use ``nonexistent='NaT'`` instead. .. deprecated:: 0.24.0 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index be193aebb8bd4..912cf93b80655 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -991,9 +991,9 @@ class Timestamp(_Timestamp): errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone + into the specified timezone. Use ``nonexistent='NaT'`` instead. .. deprecated:: 0.24.0 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6a0d845679bdc..e448e1685095d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -657,9 +657,9 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified time zone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - to the specified time zone + to the specified time zone. Use ``nonexistent='NaT'`` instead. .. deprecated:: 0.24.0 From 51856838a0fa42b1cdad63f50275090d63d6baa9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 8 Oct 2018 16:09:42 -0700 Subject: [PATCH 31/37] Edit whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e818dbcb56788..7b4b13673a7fe 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -647,7 +647,6 @@ Deprecations - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) - :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) -- :func:`DatetimeIndex.shift` now accepts ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`) - :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) From 8b06c96de782ec83df0e29f532663d93a92b3e05 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 11 Oct 2018 15:26:41 -0700 Subject: [PATCH 32/37] Address comments --- pandas/_libs/tslibs/nattype.pyx | 2 ++ pandas/_libs/tslibs/timestamps.pyx | 2 ++ pandas/core/arrays/datetimes.py | 2 ++ pandas/core/generic.py | 2 ++ pandas/tests/indexes/datetimes/test_timezones.py | 8 +++----- pandas/tests/scalar/timestamp/test_timezones.py | 13 +++++-------- pandas/tests/series/test_timezones.py | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 4cf3f6b33a067..844d8a548a4a0 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -565,6 +565,8 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time doesn't not exist in a particular timezone + where clocks moved forward due to DST. - 'shift' will shift the nonexistent time forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 912cf93b80655..9739e12442237 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -980,6 +980,8 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time doesn't not exist in a particular timezone + where clocks moved forward due to DST. - 'shift' will shift the nonexistent time forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 066f8d109d6ad..f75f8a9998b6e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -649,6 +649,8 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', times nonexistent : 'shift', 'NaT' default 'raise' + A nonexistent time doesn't not exist in a particular timezone + where clocks moved forward due to DST. - 'shift' will shift the nonexistent times forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0bbbd40a9d156..abf4318969cb5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8656,6 +8656,8 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'raise' will raise an AmbiguousTimeError if there are ambiguous times nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time doesn't not exist in a particular timezone + where clocks moved forward due to DST. - 'shift' will shift the nonexistent times forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index cebd4140ad4ec..2786546a364df 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -312,12 +312,10 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): index.tz_localize(tz=tz) with pytest.raises(pytz.NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): index.tz_localize(tz=tz, errors='raise') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False, - clear=FutureWarning): + with tm.assert_produces_warning(FutureWarning, clear=FutureWarning): result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] @@ -602,7 +600,7 @@ def test_dti_tz_localize_errors_deprecation(self): tz = 'Europe/Warsaw' n = 60 dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): with pytest.raises(ValueError): dti.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index b7c47e3a3c3ce..654eebc55735a 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -87,18 +87,16 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz) # GH 22644 with pytest.raises(NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): ts.tz_localize(tz, errors='raise') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): # GH#13057 ts = Timestamp('2015-11-1 01:00') with pytest.raises(AmbiguousTimeError): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): ts.tz_localize('US/Pacific', errors='coerce') @pytest.mark.filterwarnings('ignore::FutureWarning') @@ -107,8 +105,7 @@ def test_tz_localize_errors_invalid_arg(self): tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') with pytest.raises(ValueError): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): ts.tz_localize(tz, errors='foo') def test_tz_localize_errors_coerce(self): @@ -116,7 +113,7 @@ def test_tz_localize_errors_coerce(self): # make sure errors='coerce' gets mapped correctly to nonexistent tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 5560bb53e332f..5cca0bd88dd43 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -40,7 +40,7 @@ def test_tz_localize_errors_deprecation(self): n = 60 rng = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') ts = Series(rng) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): with pytest.raises(ValueError): ts.dt.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent From 42ae9230baebf8bbc4b3b4214e441d8822bff54a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 11 Oct 2018 19:27:13 -0700 Subject: [PATCH 33/37] Remove stacklevel --- pandas/_libs/tslibs/timestamps.pyx | 3 +-- pandas/core/arrays/datetimes.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9739e12442237..3eb837838f680 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1015,8 +1015,7 @@ class Timestamp(_Timestamp): warnings.warn("The errors argument is deprecated and will be " "removed in a future release. Use " "nonexistent='NaT' or nonexistent='raise' " - "instead.", FutureWarning, - stacklevel=2) + "instead.", FutureWarning) if errors == 'coerce': nonexistent = 'NaT' elif errors == 'raise': diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f75f8a9998b6e..ec4c6715efb20 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -713,8 +713,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', warnings.warn("The errors argument is deprecated and will be " "removed in a future release. Use " "nonexistent='NaT' or nonexistent='raise' " - "instead.", FutureWarning, - stacklevel=2) + "instead.", FutureWarning) if errors == 'coerce': nonexistent = 'NaT' elif errors == 'raise': From fe575fe33045e09097404cd88da692c26a914d55 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 11 Oct 2018 20:30:32 -0700 Subject: [PATCH 34/37] Add back check_stacklevel --- pandas/tests/indexes/datetimes/test_timezones.py | 6 ++++-- pandas/tests/series/test_timezones.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 2786546a364df..03c3f8e498b63 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -315,7 +315,9 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): with tm.assert_produces_warning(FutureWarning): index.tz_localize(tz=tz, errors='raise') - with tm.assert_produces_warning(FutureWarning, clear=FutureWarning): + with tm.assert_produces_warning(FutureWarning, + clear=FutureWarning, + check_stacklevel=False): result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] @@ -600,7 +602,7 @@ def test_dti_tz_localize_errors_deprecation(self): tz = 'Europe/Warsaw' n = 60 dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): with pytest.raises(ValueError): dti.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 5cca0bd88dd43..5560bb53e332f 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -40,7 +40,7 @@ def test_tz_localize_errors_deprecation(self): n = 60 rng = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') ts = Series(rng) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): with pytest.raises(ValueError): ts.dt.tz_localize(tz, errors='foo') # make sure errors='coerce' gets mapped correctly to nonexistent From 3482f9263db84ba1515c801ef0e58d2cef04e3b0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 17 Oct 2018 09:58:10 -0700 Subject: [PATCH 35/37] Add blank line for rendering --- pandas/_libs/tslibs/nattype.pyx | 1 + pandas/_libs/tslibs/timestamps.pyx | 1 + pandas/core/arrays/datetimes.py | 1 + pandas/core/generic.py | 1 + 4 files changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 844d8a548a4a0..a4f808db9bd9b 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -567,6 +567,7 @@ class NaTType(_NaT): nonexistent : 'shift', 'NaT', default 'raise' A nonexistent time doesn't not exist in a particular timezone where clocks moved forward due to DST. + - 'shift' will shift the nonexistent time forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3eb837838f680..a3cac41648d80 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -982,6 +982,7 @@ class Timestamp(_Timestamp): nonexistent : 'shift', 'NaT', default 'raise' A nonexistent time doesn't not exist in a particular timezone where clocks moved forward due to DST. + - 'shift' will shift the nonexistent time forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ec4c6715efb20..1489299bb3768 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -651,6 +651,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', nonexistent : 'shift', 'NaT' default 'raise' A nonexistent time doesn't not exist in a particular timezone where clocks moved forward due to DST. + - 'shift' will shift the nonexistent times forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times diff --git a/pandas/core/generic.py b/pandas/core/generic.py index abf4318969cb5..f9f08ca96dcb6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8658,6 +8658,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, nonexistent : 'shift', 'NaT', default 'raise' A nonexistent time doesn't not exist in a particular timezone where clocks moved forward due to DST. + - 'shift' will shift the nonexistent times forward to the closest existing time - 'NaT' will return NaT where there are nonexistent times From e6c5b2d69f209fc10405777dc00b3f4da5915449 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 18 Oct 2018 15:25:11 -0700 Subject: [PATCH 36/37] Validate nonexistent argument --- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 9 +++++++-- pandas/core/arrays/datetimes.py | 9 +++++++-- pandas/core/generic.py | 4 ++++ pandas/tests/indexes/datetimes/test_timezones.py | 6 +++++- pandas/tests/scalar/timestamp/test_timezones.py | 2 ++ pandas/tests/series/test_timezones.py | 6 +++++- 7 files changed, 31 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f919a545f6d25..f9c604cd76472 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -893,7 +893,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, shift = True else: assert nonexistent in ('raise', None), ("nonexistent must be one of" - "{'NaT', 'raise', 'shift'}") + " {'NaT', 'raise', 'shift'}") trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a3cac41648d80..e475ddfaff796 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1022,8 +1022,13 @@ class Timestamp(_Timestamp): elif errors == 'raise': nonexistent = 'raise' else: - raise ValueError("The errors argument must be either coerce " - "or raise.") + raise ValueError("The errors argument must be either 'coerce' " + "or 'raise'.") + + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c937c0de22e68..d6c46d20df121 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -711,8 +711,13 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', elif errors == 'raise': nonexistent = 'raise' else: - raise ValueError("The errors argument must be either coerce " - "or raise.") + raise ValueError("The errors argument must be either 'coerce' " + "or 'raise'.") + + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + if self.tz is not None: if tz is None: new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1cdb54cff72f5..9202feab18968 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8679,6 +8679,10 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, TypeError If the TimeSeries is tz-aware and tz is not None. """ + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + axis = self._get_axis_number(axis) ax = self._get_axis(axis) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 03c3f8e498b63..1369783657f92 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -582,7 +582,8 @@ def test_dti_tz_localize_bdate_range(self): @pytest.mark.parametrize('method, exp', [ ['shift', '2015-03-29 03:00:00'], ['NaT', pd.NaT], - ['raise', None] + ['raise', None], + ['foo', 'invalid'] ]) def test_dti_tz_localize_nonexistent(self, tz, method, exp): # GH 8917 @@ -591,6 +592,9 @@ def test_dti_tz_localize_nonexistent(self, tz, method, exp): if method == 'raise': with pytest.raises(pytz.NonExistentTimeError): dti.tz_localize(tz, nonexistent=method) + elif exp == 'invalid': + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) else: result = dti.tz_localize(tz, nonexistent=method) expected = DatetimeIndex([exp] * n, tz=tz) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 654eebc55735a..827ad3581cd49 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -203,6 +203,8 @@ def test_timestamp_tz_localize_nonexistent_raise(self, tz): ts = Timestamp('2015-03-29 02:20:00') with pytest.raises(pytz.NonExistentTimeError): ts.tz_localize(tz, nonexistent='raise') + with pytest.raises(ValueError): + ts.tz_localize(tz, nonexistent='foo') # ------------------------------------------------------------------ # Timestamp.tz_convert diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 5560bb53e332f..8c1ea6bff5f4d 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -79,7 +79,8 @@ def test_series_tz_localize_ambiguous_bool(self): @pytest.mark.parametrize('method, exp', [ ['shift', '2015-03-29 03:00:00'], ['NaT', NaT], - ['raise', None] + ['raise', None], + ['foo', 'invalid'] ]) def test_series_tz_localize_nonexistent(self, tz, method, exp): # GH 8917 @@ -89,6 +90,9 @@ def test_series_tz_localize_nonexistent(self, tz, method, exp): if method == 'raise': with pytest.raises(pytz.NonExistentTimeError): s.tz_localize(tz, nonexistent=method) + elif exp == 'invalid': + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) else: result = s.tz_localize(tz, nonexistent=method) expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) From 1ca0ab26d8b6ae064541e5644d5ca831b669017d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 19 Oct 2018 10:08:01 -0700 Subject: [PATCH 37/37] Fix type --- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/generic.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index a4f808db9bd9b..0eec84ecf8285 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -565,7 +565,7 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time nonexistent : 'shift', 'NaT', default 'raise' - A nonexistent time doesn't not exist in a particular timezone + A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - 'shift' will shift the nonexistent time forward to the closest diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e475ddfaff796..08b0c5472549e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -980,7 +980,7 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time nonexistent : 'shift', 'NaT', default 'raise' - A nonexistent time doesn't not exist in a particular timezone + A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - 'shift' will shift the nonexistent time forward to the closest diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d6c46d20df121..1ec906cc1ab61 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -640,7 +640,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', times nonexistent : 'shift', 'NaT' default 'raise' - A nonexistent time doesn't not exist in a particular timezone + A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - 'shift' will shift the nonexistent times forward to the closest diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 62a34806b0938..c24872d7c89e9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8650,7 +8650,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'raise' will raise an AmbiguousTimeError if there are ambiguous times nonexistent : 'shift', 'NaT', default 'raise' - A nonexistent time doesn't not exist in a particular timezone + A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - 'shift' will shift the nonexistent times forward to the closest