diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 511c9f94a47d8..341f2176f5eb4 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -15,8 +15,6 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil cpdef int get_lastbday(int year, int month) nogil cpdef int get_firstbday(int year, int month) nogil -cdef int64_t DAY_NANOS -cdef int64_t HOUR_NANOS cdef dict c_MONTH_NUMBERS cdef int32_t* month_offset diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ff6f1721ca6c9..00ee15b73f551 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -47,11 +47,6 @@ DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', int_to_weekday = {num: name for num, name in enumerate(DAYS)} weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} -DAY_SECONDS = 86400 -HOUR_SECONDS = 3600 - -cdef const int64_t DAY_NANOS = DAY_SECONDS * 1_000_000_000 -cdef const int64_t HOUR_NANOS = HOUR_SECONDS * 1_000_000_000 # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 0c05037097839..e32b0fd2bba3f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -60,7 +60,6 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( - c_MONTH_NUMBERS, dayofweek, get_day_of_year, get_days_in_month, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2694991b54d4a..711d10222c133 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -84,13 +84,13 @@ from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, cmp_dtstructs, cmp_scalar, - dt64_to_dtstruct, get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, npy_datetimestruct, + npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, - pydatetime_to_dt64, + pydatetime_to_dtstruct, ) from pandas._libs.tslibs.np_datetime import ( @@ -530,7 +530,8 @@ cdef class _Timestamp(ABCTimestamp): npy_datetimestruct dts if own_tz is not None and not is_utc(own_tz): - val = pydatetime_to_dt64(self, &dts) + self.nanosecond + pydatetime_to_dtstruct(self, &dts) + val = npy_datetimestruct_to_datetime(self._reso, &dts) + self.nanosecond else: val = self.value return val @@ -2044,11 +2045,6 @@ default 'raise' >>> pd.NaT.tz_localize() NaT """ - if self._reso != NPY_FR_ns: - if tz is None and self.tz is None: - return self - raise NotImplementedError(self._reso) - if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') @@ -2077,7 +2073,7 @@ default 'raise' "Cannot localize tz-aware Timestamp, use tz_convert for conversions" ) - out = Timestamp(value, tz=tz) + out = type(self)._from_value_and_reso(value, self._reso, tz=tz) if out is not NaT: out._set_freq(self._freq) # avoid warning in constructor return out @@ -2124,7 +2120,6 @@ default 'raise' >>> pd.NaT.tz_convert(tz='Asia/Tokyo') NaT """ - if self.tzinfo is None: # tz naive, use tz_localize raise TypeError( diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 7657633c7215a..dffe02ef15148 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -27,11 +27,10 @@ from numpy cimport ( cnp.import_array() -from pandas._libs.tslibs.ccalendar cimport ( - DAY_NANOS, - HOUR_NANOS, +from pandas._libs.tslibs.dtypes cimport ( + periods_per_day, + periods_per_second, ) -from pandas._libs.tslibs.dtypes cimport periods_per_second from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, @@ -153,6 +152,7 @@ cdef int64_t tz_localize_to_utc_single( return val elif is_utc(tz) or tz is None: + # TODO: test with non-nano return val elif is_tzlocal(tz) or is_zoneinfo(tz): @@ -161,6 +161,15 @@ cdef int64_t tz_localize_to_utc_single( elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] + # TODO: de-duplicate with Localizer.__init__ + if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + if reso == NPY_DATETIMEUNIT.NPY_FR_us: + delta = delta // 1000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + delta = delta // 1_000_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + delta = delta // 1_000_000_000 + return val - delta else: @@ -229,6 +238,7 @@ timedelta-like} bint fill_nonexist = False str stamp Localizer info = Localizer(tz, reso=reso) + int64_t pph = periods_per_day(reso) // 24 # Vectorized version of DstTzInfo.localize if info.use_utc: @@ -242,7 +252,9 @@ timedelta-like} if v == NPY_NAT: result[i] = NPY_NAT else: - result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True, reso=reso) + result[i] = v - _tz_localize_using_tzinfo_api( + v, tz, to_utc=True, reso=reso + ) return result.base # to return underlying ndarray elif info.use_fixed: @@ -283,7 +295,7 @@ timedelta-like} shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds - shift_delta = delta_to_nanoseconds(nonexistent) + shift_delta = delta_to_nanoseconds(nonexistent, reso=reso) elif nonexistent not in ('raise', None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") @@ -291,12 +303,14 @@ timedelta-like} # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) - result_a, result_b =_get_utc_bounds(vals, info.tdata, info.ntrans, info.deltas) + result_a, result_b =_get_utc_bounds( + vals, info.tdata, info.ntrans, info.deltas, reso=reso + ) # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) if infer_dst: - dst_hours = _get_dst_hours(vals, result_a, result_b) + dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso) # Pre-compute delta_idx_offset that will be used if we go down non-existent # paths. @@ -316,12 +330,15 @@ timedelta-like} left = result_a[i] right = result_b[i] if val == NPY_NAT: + # TODO: test with non-nano result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: + # TODO: test with non-nano result[i] = left else: if infer_dst and dst_hours[i] != NPY_NAT: + # TODO: test with non-nano result[i] = dst_hours[i] elif is_dst: if ambiguous_array[i]: @@ -329,9 +346,10 @@ timedelta-like} else: result[i] = right elif fill: + # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous result[i] = NPY_NAT else: - stamp = _render_tstamp(val) + stamp = _render_tstamp(val, reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp}, try using the " "'ambiguous' argument" @@ -339,23 +357,24 @@ timedelta-like} elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: + # TODO: test with non-nano result[i] = right else: # Handle nonexistent times if shift_forward or shift_backward or shift_delta != 0: # Shift the nonexistent time to the closest existing time - remaining_mins = val % HOUR_NANOS + remaining_mins = val % pph if shift_delta != 0: # Validate that we don't relocalize on another nonexistent # time - if -1 < shift_delta + remaining_mins < HOUR_NANOS: + if -1 < shift_delta + remaining_mins < pph: raise ValueError( "The provided timedelta will relocalize on a " f"nonexistent time: {nonexistent}" ) new_local = val + shift_delta elif shift_forward: - new_local = val + (HOUR_NANOS - remaining_mins) + new_local = val + (pph - remaining_mins) else: # Subtract 1 since the beginning hour is _inclusive_ of # nonexistent times @@ -368,7 +387,7 @@ timedelta-like} elif fill_nonexist: result[i] = NPY_NAT else: - stamp = _render_tstamp(val) + stamp = _render_tstamp(val, reso=reso) raise pytz.NonExistentTimeError(stamp) return result.base # .base to get underlying ndarray @@ -404,10 +423,11 @@ cdef inline Py_ssize_t bisect_right_i8(int64_t *data, return left -cdef inline str _render_tstamp(int64_t val): +cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp - return str(Timestamp(val)) + ts = Timestamp._from_value_and_reso(val, reso, None) + return str(ts) cdef _get_utc_bounds( @@ -415,6 +435,7 @@ cdef _get_utc_bounds( int64_t* tdata, Py_ssize_t ntrans, const int64_t[::1] deltas, + NPY_DATETIMEUNIT reso, ): # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) @@ -424,6 +445,7 @@ cdef _get_utc_bounds( Py_ssize_t i, n = vals.size int64_t val, v_left, v_right Py_ssize_t isl, isr, pos_left, pos_right + int64_t ppd = periods_per_day(reso) result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) @@ -438,8 +460,8 @@ cdef _get_utc_bounds( if val == NPY_NAT: continue - # TODO: be careful of overflow in val-DAY_NANOS - isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1 + # TODO: be careful of overflow in val-ppd + isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1 if isl < 0: isl = 0 @@ -449,8 +471,8 @@ cdef _get_utc_bounds( if v_left + deltas[pos_left] == val: result_a[i] = v_left - # TODO: be careful of overflow in val+DAY_NANOS - isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1 + # TODO: be careful of overflow in val+ppd + isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1 if isr < 0: isr = 0 @@ -465,10 +487,11 @@ cdef _get_utc_bounds( @cython.boundscheck(False) cdef ndarray[int64_t] _get_dst_hours( - # vals only needed here to potential render an exception message + # vals, reso only needed here to potential render an exception message const int64_t[:] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b, + NPY_DATETIMEUNIT reso, ): cdef: Py_ssize_t i, n = vals.shape[0] @@ -497,7 +520,7 @@ cdef ndarray[int64_t] _get_dst_hours( if trans_idx.size == 1: # TODO: not reached in tests 2022-05-02; possible? - stamp = _render_tstamp(vals[trans_idx[0]]) + stamp = _render_tstamp(vals[trans_idx[0]], reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" @@ -519,7 +542,7 @@ cdef ndarray[int64_t] _get_dst_hours( delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): # TODO: not reached in tests 2022-05-02; possible? - stamp = _render_tstamp(vals[grp[0]]) + stamp = _render_tstamp(vals[grp[0]], reso=reso) raise pytz.AmbiguousTimeError(stamp) # Find the index for the switch and pull from a for dst and b diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 75efe6d4113cf..6201c94ecc155 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -19,7 +19,6 @@ cnp.import_array() from .dtypes import Resolution -from .ccalendar cimport DAY_NANOS from .dtypes cimport ( c_Resolution, periods_per_day, diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index a7f7393fb3263..874575fa9ad4c 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -20,6 +20,7 @@ ) from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -57,10 +58,11 @@ def test_tz_localize_pushes_out_of_bounds(self): with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp.max.tz_localize("US/Pacific") - def test_tz_localize_ambiguous_bool(self): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_tz_localize_ambiguous_bool(self, unit): # make sure that we are correctly accepting bool values as ambiguous # GH#14402 - ts = Timestamp("2015-11-01 01:00:03") + ts = Timestamp("2015-11-01 01:00:03")._as_unit(unit) expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") @@ -70,9 +72,11 @@ def test_tz_localize_ambiguous_bool(self): result = ts.tz_localize("US/Central", ambiguous=True) assert result == expected0 + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = ts.tz_localize("US/Central", ambiguous=False) assert result == expected1 + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): ts = Timestamp("2014-11-02 01:00") @@ -245,17 +249,28 @@ def test_timestamp_tz_localize(self, tz): ], ) @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) def test_timestamp_tz_localize_nonexistent_shift( - self, start_ts, tz, end_ts, shift, tz_type + self, start_ts, tz, end_ts, shift, tz_type, unit ): # GH 8917, 24466 tz = tz_type + tz if isinstance(shift, str): shift = "shift_" + shift - ts = Timestamp(start_ts) + ts = Timestamp(start_ts)._as_unit(unit) result = ts.tz_localize(tz, nonexistent=shift) expected = Timestamp(end_ts).tz_localize(tz) - assert result == expected + + if unit == "us": + assert result == expected.replace(nanosecond=0) + elif unit == "ms": + micros = expected.microsecond - expected.microsecond % 1000 + assert result == expected.replace(microsecond=micros, nanosecond=0) + elif unit == "s": + assert result == expected.replace(microsecond=0, nanosecond=0) + else: + assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("offset", [-1, 1]) @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) @@ -268,16 +283,18 @@ def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): ts.tz_localize(tz, nonexistent=timedelta(seconds=offset)) @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz, unit): # GH 8917 - ts = Timestamp("2015-03-29 02:20:00") + ts = Timestamp("2015-03-29 02:20:00")._as_unit(unit) result = ts.tz_localize(tz, nonexistent="NaT") assert result is NaT @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) - def test_timestamp_tz_localize_nonexistent_raise(self, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_timestamp_tz_localize_nonexistent_raise(self, tz, unit): # GH 8917 - ts = Timestamp("2015-03-29 02:20:00") + ts = Timestamp("2015-03-29 02:20:00")._as_unit(unit) msg = "2015-03-29 02:20:00" with pytest.raises(pytz.NonExistentTimeError, match=msg): ts.tz_localize(tz, nonexistent="raise") diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 35065a3c9877c..4ac50e3f4e034 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -161,18 +161,30 @@ def test_floor(self): assert result == expected @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) - def test_round_dst_border_ambiguous(self, method): + @pytest.mark.parametrize( + "unit", + [ + "ns", + pytest.param("us", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("ms", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("s", marks=pytest.mark.xfail(reason="round not implemented")), + ], + ) + def test_round_dst_border_ambiguous(self, method, unit): # GH 18946 round near "fall back" DST ts = Timestamp("2017-10-29 00:00:00", tz="UTC").tz_convert("Europe/Madrid") + ts = ts._as_unit(unit) # result = getattr(ts, method)("H", ambiguous=True) assert result == ts + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous=False) expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( "Europe/Madrid" ) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)("H", ambiguous="NaT") assert result is NaT @@ -189,12 +201,22 @@ def test_round_dst_border_ambiguous(self, method): ["floor", "2018-03-11 03:01:00-0500", "2H"], ], ) - def test_round_dst_border_nonexistent(self, method, ts_str, freq): + @pytest.mark.parametrize( + "unit", + [ + "ns", + pytest.param("us", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("ms", marks=pytest.mark.xfail(reason="round not implemented")), + pytest.param("s", marks=pytest.mark.xfail(reason="round not implemented")), + ], + ) + def test_round_dst_border_nonexistent(self, method, ts_str, freq, unit): # GH 23324 round near "spring forward" DST - ts = Timestamp(ts_str, tz="America/Chicago") + ts = Timestamp(ts_str, tz="America/Chicago")._as_unit(unit) result = getattr(ts, method)(freq, nonexistent="shift_forward") expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value result = getattr(ts, method)(freq, nonexistent="NaT") assert result is NaT @@ -466,35 +488,41 @@ def test_replace_across_dst(self, tz, normalize): ts2b = normalize(ts2) assert ts2 == ts2b - def test_replace_dst_border(self): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_replace_dst_border(self, unit): # Gh 7825 - t = Timestamp("2013-11-3", tz="America/Chicago") + t = Timestamp("2013-11-3", tz="America/Chicago")._as_unit(unit) result = t.replace(hour=3) expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) - def test_replace_dst_fold(self, fold, tz): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_replace_dst_fold(self, fold, tz, unit): # GH 25017 d = datetime(2019, 10, 27, 2, 30) - ts = Timestamp(d, tz=tz) + ts = Timestamp(d, tz=tz)._as_unit(unit) result = ts.replace(hour=1, fold=fold) expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize( tz, ambiguous=not fold ) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value # -------------------------------------------------------------- # Timestamp.normalize @pytest.mark.parametrize("arg", ["2013-11-30", "2013-11-30 12:00:00"]) - def test_normalize(self, tz_naive_fixture, arg): + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_normalize(self, tz_naive_fixture, arg, unit): tz = tz_naive_fixture - ts = Timestamp(arg, tz=tz) + ts = Timestamp(arg, tz=tz)._as_unit(unit) result = ts.normalize() expected = Timestamp("2013-11-30", tz=tz) assert result == expected + assert result._reso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_normalize_pre_epoch_dates(self): # GH: 36294