diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 9db2e2c2a9732..496db66c78569 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -420,7 +420,7 @@ def time_add_overflow_both_arg_nan(self): hcal = pd.tseries.holiday.USFederalHolidayCalendar() -# These offsets currently raise a NotImplimentedError with .apply_index() +# These offsets currently raise a NotImplementedError with .apply_index() non_apply = [ pd.offsets.Day(), pd.offsets.BYearEnd(), diff --git a/asv_bench/benchmarks/tslibs/normalize.py b/asv_bench/benchmarks/tslibs/normalize.py index 69732018aea9a..b263ae21422b6 100644 --- a/asv_bench/benchmarks/tslibs/normalize.py +++ b/asv_bench/benchmarks/tslibs/normalize.py @@ -36,8 +36,10 @@ def setup(self, size, tz): raise NotImplementedError def time_normalize_i8_timestamps(self, size, tz): - normalize_i8_timestamps(self.i8data, tz) + # 10 i.e. NPY_FR_ns + normalize_i8_timestamps(self.i8data, tz, 10) def time_is_date_array_normalized(self, size, tz): # TODO: cases with different levels of short-circuiting - is_date_array_normalized(self.i8data, tz) + # 10 i.e. NPY_FR_ns + is_date_array_normalized(self.i8data, tz, 10) diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py index 0aea8332398b1..978a36e470cbb 100644 --- a/asv_bench/benchmarks/tslibs/offsets.py +++ b/asv_bench/benchmarks/tslibs/offsets.py @@ -14,7 +14,7 @@ pass hcal = pandas.tseries.holiday.USFederalHolidayCalendar() -# These offsets currently raise a NotImplimentedError with .apply_index() +# These offsets currently raise a NotImplementedError with .apply_index() non_apply = [ offsets.Day(), offsets.BYearEnd(), diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d06dc3160995d..05c91379cbdc5 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -48,21 +48,23 @@ from pandas._libs.tslibs.ccalendar import ( ) from pandas._libs.tslibs.ccalendar cimport ( - DAY_NANOS, dayofweek, get_days_in_month, get_firstbday, get_lastbday, ) from pandas._libs.tslibs.conversion cimport localize_pydatetime +from pandas._libs.tslibs.dtypes cimport periods_per_day from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, ) from pandas._libs.tslibs.np_datetime cimport ( - dt64_to_dtstruct, - dtstruct_to_dt64, + NPY_DATETIMEUNIT, + get_unit_from_dtype, npy_datetimestruct, + npy_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, pydate_to_dtstruct, ) @@ -111,25 +113,12 @@ def apply_wrapper_core(func, self, other) -> ndarray: if self.normalize: # TODO: Avoid circular/runtime import from .vectorized import normalize_i8_timestamps - result = normalize_i8_timestamps(result.view("i8"), None) + reso = get_unit_from_dtype(other.dtype) + result = normalize_i8_timestamps(result.view("i8"), None, reso=reso) return result -def apply_index_wraps(func): - # Note: normally we would use `@functools.wraps(func)`, but this does - # not play nicely with cython class methods - def wrapper(self, other): - # other is a DatetimeArray - result = apply_wrapper_core(func, self, other) - result = type(other)(result) - warnings.warn("'Offset.apply_index(other)' is deprecated. " - "Use 'offset + other' instead.", FutureWarning) - return result - - return wrapper - - def apply_array_wraps(func): # Note: normally we would use `@functools.wraps(func)`, but this does # not play nicely with cython class methods @@ -593,7 +582,6 @@ cdef class BaseOffset: # ------------------------------------------------------------------ - @apply_index_wraps def apply_index(self, dtindex): """ Vectorized apply of DateOffset to DatetimeIndex, @@ -618,10 +606,11 @@ cdef class BaseOffset: When the specific offset subclass does not have a vectorized implementation. """ - raise NotImplementedError( # pragma: no cover - f"DateOffset subclass {type(self).__name__} " - "does not have a vectorized implementation" - ) + warnings.warn("'Offset.apply_index(other)' is deprecated. " + "Use 'offset + other' instead.", FutureWarning) + + res = self._apply_array(dtindex) + return type(dtindex)(res) @apply_array_wraps def _apply_array(self, dtarr): @@ -1119,25 +1108,9 @@ cdef class RelativeDeltaOffset(BaseOffset): else: return other + timedelta(self.n) - @apply_index_wraps - def apply_index(self, dtindex): - """ - Vectorized apply of DateOffset to DatetimeIndex, - raises NotImplementedError for offsets without a - vectorized implementation. - - Parameters - ---------- - index : DatetimeIndex - - Returns - ------- - ndarray[datetime64[ns]] - """ - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) dt64other = np.asarray(dtarr) kwds = self.kwds relativedelta_fast = { @@ -1155,8 +1128,8 @@ cdef class RelativeDeltaOffset(BaseOffset): months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n if months: - shifted = shift_months(dt64other.view("i8"), months) - dt64other = shifted.view("datetime64[ns]") + shifted = shift_months(dt64other.view("i8"), months, reso=reso) + dt64other = shifted.view(dtarr.dtype) weeks = kwds.get("weeks", 0) * self.n if weeks: @@ -1170,9 +1143,11 @@ cdef class RelativeDeltaOffset(BaseOffset): if timedelta_kwds: delta = Timedelta(**timedelta_kwds) dt64other = dt64other + (self.n * delta) + # FIXME: fails to preserve non-nano return dt64other elif not self._use_relativedelta and hasattr(self, "_offset"): # timedelta + # FIXME: fails to preserve non-nano return dt64other + Timedelta(self._offset * self.n) else: # relativedelta with other keywords @@ -1475,16 +1450,13 @@ cdef class BusinessDay(BusinessMixin): "Only know how to combine business day with datetime or timedelta." ) - @apply_index_wraps - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): i8other = dtarr.view("i8") - res = _shift_bdays(i8other, self.n) + reso = get_unit_from_dtype(dtarr.dtype) + res = _shift_bdays(i8other, self.n, reso=reso) if self.offset: - res = res.view("M8[ns]") + Timedelta(self.offset) + res = res.view(dtarr.dtype) + Timedelta(self.offset) res = res.view("i8") return res @@ -1980,14 +1952,11 @@ cdef class YearOffset(SingleConstructorOffset): months = years * 12 + (self.month - other.month) return shift_month(other, months, self._day_opt) - @apply_index_wraps - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.month, self._day_opt, modby=12 + dtarr.view("i8"), self.n, self.month, self._day_opt, modby=12, reso=reso ) return shifted @@ -2137,14 +2106,11 @@ cdef class QuarterOffset(SingleConstructorOffset): months = qtrs * 3 - months_since return shift_month(other, months, self._day_opt) - @apply_index_wraps - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): + reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt + dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso ) return shifted @@ -2256,13 +2222,10 @@ cdef class MonthOffset(SingleConstructorOffset): n = roll_convention(other.day, self.n, compare_day) return shift_month(other, n, self._day_opt) - @apply_index_wraps - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): - shifted = shift_months(dtarr.view("i8"), self.n, self._day_opt) + reso = get_unit_from_dtype(dtarr.dtype) + shifted = shift_months(dtarr.view("i8"), self.n, self._day_opt, reso=reso) return shifted cpdef __setstate__(self, state): @@ -2395,12 +2358,6 @@ cdef class SemiMonthOffset(SingleConstructorOffset): return shift_month(other, months, to_day) - @apply_index_wraps - @cython.wraparound(False) - @cython.boundscheck(False) - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps @cython.wraparound(False) @cython.boundscheck(False) @@ -2414,6 +2371,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month bint is_start = isinstance(self, SemiMonthBegin) + NPY_DATETIMEUNIT reso = get_unit_from_dtype(dtarr.dtype) with nogil: for i in range(count): @@ -2422,7 +2380,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): out[i] = NPY_NAT continue - dt64_to_dtstruct(val, &dts) + pandas_datetime_to_datetimestruct(val, reso, &dts) day = dts.day # Adjust so that we are always looking at self.day_of_month, @@ -2455,7 +2413,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): days_in_month = get_days_in_month(dts.year, dts.month) dts.day = min(to_day, days_in_month) - out[i] = dtstruct_to_dt64(&dts) + out[i] = npy_datetimestruct_to_datetime(reso, &dts) return out.base @@ -2562,10 +2520,6 @@ cdef class Week(SingleConstructorOffset): return other + timedelta(weeks=k) - @apply_index_wraps - def apply_index(self, dtindex): - return self._apply_array(dtindex) - @apply_array_wraps def _apply_array(self, dtarr): if self.weekday is None: @@ -2573,12 +2527,13 @@ cdef class Week(SingleConstructorOffset): td64 = np.timedelta64(td, "ns") return dtarr + td64 else: + reso = get_unit_from_dtype(dtarr.dtype) i8other = dtarr.view("i8") - return self._end_apply_index(i8other) + return self._end_apply_index(i8other, reso=reso) @cython.wraparound(False) @cython.boundscheck(False) - cdef _end_apply_index(self, const int64_t[:] i8other): + cdef _end_apply_index(self, const int64_t[:] i8other, NPY_DATETIMEUNIT reso): """ Add self to the given DatetimeIndex, specialized for case where self.weekday is non-null. @@ -2586,6 +2541,7 @@ cdef class Week(SingleConstructorOffset): Parameters ---------- i8other : const int64_t[:] + reso : NPY_DATETIMEUNIT Returns ------- @@ -2598,6 +2554,7 @@ cdef class Week(SingleConstructorOffset): npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday + int64_t DAY_PERIODS = periods_per_day(reso) with nogil: for i in range(count): @@ -2606,7 +2563,7 @@ cdef class Week(SingleConstructorOffset): out[i] = NPY_NAT continue - dt64_to_dtstruct(val, &dts) + pandas_datetime_to_datetimestruct(val, reso, &dts) wday = dayofweek(dts.year, dts.month, dts.day) days = 0 @@ -2616,7 +2573,7 @@ cdef class Week(SingleConstructorOffset): if weeks > 0: weeks -= 1 - out[i] = val + (7 * weeks + days) * DAY_NANOS + out[i] = val + (7 * weeks + days) * DAY_PERIODS return out.base @@ -3294,6 +3251,8 @@ cdef class CustomBusinessDay(BusinessDay): ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] ) + _apply_array = BaseOffset._apply_array + def __init__( self, n=1, @@ -3342,12 +3301,6 @@ cdef class CustomBusinessDay(BusinessDay): "datetime, datetime64 or timedelta." ) - def apply_index(self, dtindex): - raise NotImplementedError - - def _apply_array(self, dtarr): - raise NotImplementedError - def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False @@ -3772,6 +3725,7 @@ cdef shift_quarters( int q1start_month, object day_opt, int modby=3, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ Given an int64 array representing nanosecond timestamps, shift all elements @@ -3784,6 +3738,7 @@ cdef shift_quarters( q1start_month : int month in which Q1 begins by convention day_opt : {'start', 'end', 'business_start', 'business_end'} modby : int (3 for quarters, 12 for years) + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -3797,13 +3752,20 @@ cdef shift_quarters( raise ValueError("day must be None, 'start', 'end', " "'business_start', or 'business_end'") - _shift_quarters(dtindex, out, count, quarters, q1start_month, day_opt, modby) + _shift_quarters( + dtindex, out, count, quarters, q1start_month, day_opt, modby, reso=reso + ) return np.asarray(out) @cython.wraparound(False) @cython.boundscheck(False) -def shift_months(const int64_t[:] dtindex, int months, object day_opt=None): +def shift_months( + const int64_t[:] dtindex, + int months, + object day_opt=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): """ Given an int64-based datetime index, shift all elements specified number of months using DateOffset semantics @@ -3826,14 +3788,15 @@ def shift_months(const int64_t[:] dtindex, int months, object day_opt=None): out[i] = NPY_NAT continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) dts.year = year_add_months(dts, months) dts.month = month_add_months(dts, months) dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) - out[i] = dtstruct_to_dt64(&dts) + out[i] = npy_datetimestruct_to_datetime(reso, &dts) + elif day_opt in ["start", "end", "business_start", "business_end"]: - _shift_months(dtindex, out, count, months, day_opt) + _shift_months(dtindex, out, count, months, day_opt, reso=reso) else: raise ValueError("day must be None, 'start', 'end', " "'business_start', or 'business_end'") @@ -3847,7 +3810,9 @@ cdef inline void _shift_months(const int64_t[:] dtindex, int64_t[::1] out, Py_ssize_t count, int months, - str day_opt) nogil: + str day_opt, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + ) nogil: """ See shift_months.__doc__ """ @@ -3861,7 +3826,7 @@ cdef inline void _shift_months(const int64_t[:] dtindex, out[i] = NPY_NAT continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) months_to_roll = months months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) @@ -3870,7 +3835,7 @@ cdef inline void _shift_months(const int64_t[:] dtindex, dts.month = month_add_months(dts, months_to_roll) dts.day = get_day_of_month(&dts, day_opt) - out[i] = dtstruct_to_dt64(&dts) + out[i] = npy_datetimestruct_to_datetime(reso, &dts) @cython.wraparound(False) @@ -3881,7 +3846,9 @@ cdef inline void _shift_quarters(const int64_t[:] dtindex, int quarters, int q1start_month, str day_opt, - int modby) nogil: + int modby, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + ) nogil: """ See shift_quarters.__doc__ """ @@ -3895,7 +3862,7 @@ cdef inline void _shift_quarters(const int64_t[:] dtindex, out[i] = NPY_NAT continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) n = quarters months_since = (dts.month - q1start_month) % modby @@ -3905,12 +3872,16 @@ cdef inline void _shift_quarters(const int64_t[:] dtindex, dts.month = month_add_months(dts, modby * n - months_since) dts.day = get_day_of_month(&dts, day_opt) - out[i] = dtstruct_to_dt64(&dts) + out[i] = npy_datetimestruct_to_datetime(reso, &dts) @cython.wraparound(False) @cython.boundscheck(False) -cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): +cdef ndarray[int64_t] _shift_bdays( + const int64_t[:] i8other, + int periods, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): """ Implementation of BusinessDay.apply_offset. @@ -3918,6 +3889,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): ---------- i8other : const int64_t[:] periods : int + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -3929,6 +3901,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): int64_t val, res int wday, nadj, days npy_datetimestruct dts + int64_t DAY_PERIODS = periods_per_day(reso) for i in range(n): val = i8other[i] @@ -3938,7 +3911,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): # The rest of this is effectively a copy of BusinessDay.apply nadj = periods weeks = nadj // 5 - dt64_to_dtstruct(val, &dts) + pandas_datetime_to_datetimestruct(val, reso, &dts) wday = dayofweek(dts.year, dts.month, dts.day) if nadj <= 0 and wday > 4: @@ -3961,7 +3934,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods): # shift by nadj days plus 2 to get past the weekend days = nadj + 2 - res = val + (7 * weeks + days) * DAY_NANOS + res = val + (7 * weeks + days) * DAY_PERIODS result[i] = res return result.base @@ -3973,7 +3946,7 @@ def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetim option `day_opt`, return a new datetimelike that many months later, with day determined by `day_opt` using relativedelta semantics. - Scalar analogue of shift_months + Scalar analogue of shift_months. Parameters ---------- diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index 61148605aadea..919457724606d 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -17,11 +17,13 @@ def dt64arr_to_periodarr( ) -> npt.NDArray[np.int64]: ... def is_date_array_normalized( stamps: npt.NDArray[np.int64], - tz: tzinfo | None = ..., + tz: tzinfo | None, + reso: int, # NPY_DATETIMEUNIT ) -> bool: ... def normalize_i8_timestamps( stamps: npt.NDArray[np.int64], tz: tzinfo | None, + reso: int, # NPY_DATETIMEUNIT ) -> npt.NDArray[np.int64]: ... def get_resolution( stamps: npt.NDArray[np.int64], diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 511ce26feeefa..8e0d5ece0e155 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -20,12 +20,17 @@ cnp.import_array() from .dtypes import Resolution from .ccalendar cimport DAY_NANOS -from .dtypes cimport c_Resolution +from .dtypes cimport ( + c_Resolution, + periods_per_day, +) from .nattype cimport ( NPY_NAT, c_NaT as NaT, ) from .np_datetime cimport ( + NPY_DATETIMEUNIT, + NPY_FR_ns, dt64_to_dtstruct, npy_datetimestruct, ) @@ -258,7 +263,7 @@ def get_resolution(ndarray stamps, tzinfo tz=None) -> Resolution: @cython.cdivision(False) @cython.wraparound(False) @cython.boundscheck(False) -cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): +cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso): # stamps is int64_t, arbitrary ndim """ Normalize each of the (nanosecond) timezone aware timestamps in the given @@ -269,6 +274,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): ---------- stamps : int64 ndarray tz : tzinfo or None + reso : NPY_DATETIMEUNIT Returns ------- @@ -282,6 +288,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps) + int64_t ppd = periods_per_day(reso) for i in range(n): # Analogous to: utc_val = stamps[i] @@ -291,7 +298,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): res_val = NPY_NAT else: local_val = info.utc_val_to_local_val(utc_val, &pos) - res_val = local_val - (local_val % DAY_NANOS) + res_val = local_val - (local_val % ppd) # Analogous to: result[i] = res_val (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val @@ -303,7 +310,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz): @cython.wraparound(False) @cython.boundscheck(False) -def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool: +def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -> bool: # stamps is int64_t, arbitrary ndim """ Check if all of the given (nanosecond) timestamps are normalized to @@ -314,6 +321,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool: ---------- stamps : int64 ndarray tz : tzinfo or None + reso : NPY_DATETIMEUNIT Returns ------- @@ -325,6 +333,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool: Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning cnp.flatiter it = cnp.PyArray_IterNew(stamps) + int64_t ppd = periods_per_day(reso) for i in range(n): # Analogous to: utc_val = stamps[i] @@ -332,7 +341,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz=None) -> bool: local_val = info.utc_val_to_local_val(utc_val, &pos) - if local_val % DAY_NANOS != 0: + if local_val % ppd != 0: return False cnp.PyArray_ITER_NEXT(it) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9ced8f225c3a8..4ee5838ab5c17 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -44,6 +44,7 @@ RoundTo, round_nsint64, ) +from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._libs.tslibs.timestamps import integer_op_not_supported from pandas._typing import ( ArrayLike, @@ -1119,7 +1120,29 @@ def _add_timedeltalike_scalar(self, other): new_values.fill(iNaT) return type(self)(new_values, dtype=self.dtype) + # FIXME: this may overflow with non-nano inc = delta_to_nanoseconds(other) + + if not is_period_dtype(self.dtype): + # FIXME: don't hardcode 7, 8, 9, 10 here + # TODO: maybe patch delta_to_nanoseconds to take reso? + + # error: "DatetimeLikeArrayMixin" has no attribute "_reso" + reso = self._reso # type: ignore[attr-defined] + if reso == 10: + pass + elif reso == 9: + # microsecond + inc = inc // 1000 + elif reso == 8: + # millisecond + inc = inc // 1_000_000 + elif reso == 7: + # second + inc = inc // 1_000_000_000 + else: + raise NotImplementedError(reso) + new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan) new_values = new_values.view("i8") new_values = self._maybe_mask_results(new_values) @@ -1788,6 +1811,10 @@ class TimelikeOps(DatetimeLikeArrayMixin): Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. """ + @cache_readonly + def _reso(self) -> int: + return py_get_unit_from_dtype(self._ndarray.dtype) + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if ( ufunc in [np.isnan, np.isinf, np.isfinite] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ec6da61bde6c6..63a3165b65e10 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -38,13 +38,11 @@ tz_convert_from_utc, tzconversion, ) -from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._typing import npt from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, ) -from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -546,10 +544,6 @@ def _check_compatible_with(self, other, setitem: bool = False): # ----------------------------------------------------------------- # Descriptive Properties - @cache_readonly - def _reso(self): - return py_get_unit_from_dtype(self._ndarray.dtype) - def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: # GH#42228 value = x.view("i8") @@ -620,7 +614,7 @@ def is_normalized(self) -> bool: """ Returns True if all of the dates are at midnight ("no time") """ - return is_date_array_normalized(self.asi8, self.tz) + return is_date_array_normalized(self.asi8, self.tz, reso=self._reso) @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Resolution: @@ -772,9 +766,11 @@ def _add_offset(self, offset) -> DatetimeArray: values = self.tz_localize(None) else: values = self - result = offset._apply_array(values).view("M8[ns]") + result = offset._apply_array(values).view(values.dtype) result = DatetimeArray._simple_new(result, dtype=result.dtype) - result = result.tz_localize(self.tz) + if self.tz is not None: + # FIXME: tz_localize with non-nano + result = result.tz_localize(self.tz) except NotImplementedError: warnings.warn( @@ -782,11 +778,12 @@ def _add_offset(self, offset) -> DatetimeArray: PerformanceWarning, ) result = self.astype("O") + offset + result = type(self)._from_sequence(result) if not len(self): # GH#30336 _from_sequence won't be able to infer self.tz - return type(self)._from_sequence(result).tz_localize(self.tz) + return result.tz_localize(self.tz) - return type(self)._from_sequence(result) + return result def _sub_datetimelike_scalar(self, other): # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] @@ -1131,8 +1128,14 @@ def normalize(self) -> DatetimeArray: '2014-08-01 00:00:00+05:30'], dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ - new_values = normalize_i8_timestamps(self.asi8, self.tz) - return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) + new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso) + dt64_values = new_values.view(self._ndarray.dtype) + + dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype) + dta = dta._with_freq("infer") + if self.tz is not None: + dta = dta.tz_localize(self.tz) + return dta def to_period(self, freq=None) -> PeriodArray: """ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 816f07b076ef8..0cb1776d6046d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -29,7 +29,6 @@ precision_from_unit, ) from pandas._libs.tslibs.fields import get_timedelta_field -from pandas._libs.tslibs.np_datetime import py_get_unit_from_dtype from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, ints_to_pytimedelta, @@ -41,7 +40,6 @@ npt, ) from pandas.compat.numpy import function as nv -from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_endpoints from pandas.core.dtypes.astype import astype_td64_unit_conversion @@ -156,10 +154,6 @@ class TimedeltaArray(dtl.TimelikeOps): # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray) # operates pointwise. - @cache_readonly - def _reso(self): - return py_get_unit_from_dtype(self.dtype) - def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType: y = x.view("i8") if y == NaT.value: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d6dda373bdf92..d4d61df915acb 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1019,6 +1019,7 @@ def to_datetime( - Inputs can contain both naive and aware, string or datetime, the above rules still apply + >>> from datetime import timezone, timedelta >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530', ... datetime(2020, 1, 1, 18), ... datetime(2020, 1, 1, 18, diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 897528cf18122..d00eab8c10e1c 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -45,6 +45,21 @@ def test_fields(self, unit, reso, field): expected = getattr(dti._data, field) tm.assert_numpy_array_equal(res, expected) + def test_normalize(self, unit): + dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D") + arr = np.asarray(dti).astype(f"M8[{unit}]") + + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + + assert not dta.is_normalized + + # TODO: simplify once we can just .astype to other unit + exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]") + expected = DatetimeArray._simple_new(exp, dtype=exp.dtype) + + res = dta.normalize() + tm.assert_extension_array_equal(res, expected) + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 1e84a05e2ae97..e42039a86fc16 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1124,7 +1124,7 @@ def key(self): @pytest.fixture def expected(self, val): if val[0] == 2: - # NB: this condition is based on currently-harcoded "val" cases + # NB: this condition is based on currently-hardcoded "val" cases dtype = np.int64 else: dtype = np.float64 diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index fd764a60d0644..cf5cbe6e2af66 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -12,6 +12,7 @@ List, Tuple, ) +import warnings import numpy as np import pytest @@ -30,7 +31,10 @@ from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.errors import PerformanceWarning -from pandas import DatetimeIndex +from pandas import ( + DatetimeIndex, + date_range, +) import pandas._testing as tm from pandas.tests.tseries.offsets.common import ( Base, @@ -547,6 +551,38 @@ def test_offsets_hashable(self, offset_types): off = self._get_offset(offset_types) assert hash(off) is not None + @pytest.mark.parametrize("unit", ["s", "ms", "us"]) + def test_add_dt64_ndarray_non_nano(self, offset_types, unit, request): + # check that the result with non-nano matches nano + off = self._get_offset(offset_types) + + if type(off) is DateOffset: + mark = pytest.mark.xfail(reason="non-nano not implemented") + request.node.add_marker(mark) + + dti = date_range("2016-01-01", periods=35, freq="D") + + arr = dti._data._ndarray.astype(f"M8[{unit}]") + dta = type(dti._data)._simple_new(arr, dtype=arr.dtype) + + with warnings.catch_warnings(record=True) as w: + expected = dti._data + off + result = dta + off + + if len(w): + # PerformanceWarning was issued bc _apply_array raised, so we + # fell back to object dtype, for which the code path does + # not yet cast back to the original resolution + mark = pytest.mark.xfail( + reason="Goes through object dtype in DatetimeArray._add_offset, " + "doesn't restore reso in result" + ) + request.node.add_marker(mark) + + tm.assert_numpy_array_equal( + result._ndarray, expected._ndarray.astype(arr.dtype) + ) + class TestDateOffset(Base): def setup_method(self): diff --git a/setup.py b/setup.py index bca919a3aa6f8..11ab8c5a183be 100755 --- a/setup.py +++ b/setup.py @@ -520,6 +520,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing",