From 83dc0c3e6819979bd48575146be2832baa195fc1 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 10:35:42 -0800 Subject: [PATCH 1/6] TYP: tighter typing in _apply_array --- pandas/_libs/tslibs/offsets.pyx | 51 ++++++--------------------------- pandas/core/arrays/datetimes.py | 8 ++++-- 2 files changed, 14 insertions(+), 45 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b79b8b23118a6..9c0a45fb8521e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -110,33 +110,6 @@ cdef bint _is_normalized(datetime dt): return True -def apply_wrapper_core(func, self, other) -> ndarray: - result = func(self, other) - result = np.asarray(result) - - if self.normalize: - # TODO: Avoid circular/runtime import - from .vectorized import normalize_i8_timestamps - reso = get_unit_from_dtype(other.dtype) - result = normalize_i8_timestamps(result.view("i8"), None, reso=reso) - - return result - - -def apply_array_wraps(func): - # Note: normally we would use `@functools.wraps(func)`, but this does - # not play nicely with cython class methods - def wrapper(self, other) -> np.ndarray: - # other is a DatetimeArray - result = apply_wrapper_core(func, self, other) - return result - - # do @functools.wraps(func) manually since it doesn't work on cdef funcs - wrapper.__name__ = func.__name__ - wrapper.__doc__ = func.__doc__ - return wrapper - - def apply_wraps(func): # Note: normally we would use `@functools.wraps(func)`, but this does # not play nicely with cython class methods @@ -644,8 +617,7 @@ cdef class BaseOffset: def _apply(self, other): raise NotImplementedError("implemented by subclasses") - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: raise NotImplementedError( f"DateOffset subclass {type(self).__name__} " "does not have a vectorized implementation" @@ -1399,8 +1371,7 @@ cdef class RelativeDeltaOffset(BaseOffset): "applied vectorized" ) - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: reso = get_unit_from_dtype(dtarr.dtype) dt64other = np.asarray(dtarr) @@ -1814,8 +1785,7 @@ cdef class BusinessDay(BusinessMixin): days = n + 2 return days - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: i8other = dtarr.view("i8") reso = get_unit_from_dtype(dtarr.dtype) res = self._shift_bdays(i8other, reso=reso) @@ -2361,8 +2331,7 @@ cdef class YearOffset(SingleConstructorOffset): months = years * 12 + (self.month - other.month) return shift_month(other, months, self._day_opt) - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( dtarr.view("i8"), self.n, self.month, self._day_opt, modby=12, reso=reso @@ -2613,8 +2582,7 @@ cdef class QuarterOffset(SingleConstructorOffset): months = qtrs * 3 - months_since return shift_month(other, months, self._day_opt) - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( dtarr.view("i8"), @@ -2798,8 +2766,7 @@ cdef class MonthOffset(SingleConstructorOffset): n = roll_convention(other.day, self.n, compare_day) return shift_month(other, n, self._day_opt) - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_months(dtarr.view("i8"), self.n, self._day_opt, reso=reso) return shifted @@ -3029,10 +2996,9 @@ cdef class SemiMonthOffset(SingleConstructorOffset): return shift_month(other, months, to_day) - @apply_array_wraps @cython.wraparound(False) @cython.boundscheck(False) - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: cdef: ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size @@ -3254,8 +3220,7 @@ cdef class Week(SingleConstructorOffset): return other + timedelta(weeks=k) - @apply_array_wraps - def _apply_array(self, dtarr): + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: if self.weekday is None: td = timedelta(days=7 * self.n) unit = np.datetime_data(dtarr.dtype)[0] diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 34a6e118733ae..f3edc71eabb28 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -787,7 +787,7 @@ def _assert_tzawareness_compat(self, other) -> None: # ----------------------------------------------------------------- # Arithmetic Methods - def _add_offset(self, offset) -> Self: + def _add_offset(self, offset: BaseOffset) -> Self: assert not isinstance(offset, Tick) if self.tz is not None: @@ -796,7 +796,7 @@ def _add_offset(self, offset) -> Self: values = self try: - result = offset._apply_array(values) + result = offset._apply_array(values._ndarray) if result.dtype.kind == "i": result = result.view(values.dtype) except NotImplementedError: @@ -814,6 +814,10 @@ def _add_offset(self, offset) -> Self: else: result = type(self)._simple_new(result, dtype=result.dtype) + if offset.normalize: + result = result.normalize() + result._freq = None + if self.tz is not None: result = result.tz_localize(self.tz) From fb25ce8fef1b24121a719356c0d56a07cf88b0f9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 10:36:39 -0800 Subject: [PATCH 2/6] comment --- pandas/_libs/tslibs/offsets.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 9c0a45fb8521e..c2a5ba435fa53 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -618,6 +618,8 @@ cdef class BaseOffset: raise NotImplementedError("implemented by subclasses") def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: + # NB: _apply_array does not handle respecting `self.normalize`, the + # caller (DatetimeArray) handles that in post-processing. raise NotImplementedError( f"DateOffset subclass {type(self).__name__} " "does not have a vectorized implementation" From 5090415053dedcfd3c9af1bdae9dd71093dd0dc4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 09:58:44 -0800 Subject: [PATCH 3/6] mypy fixup --- pandas/_libs/tslibs/offsets.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index aecc8cf681cf8..bf43b2c8470b9 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -33,6 +33,7 @@ class ApplyTypeError(TypeError): ... class BaseOffset: n: int + normalize: bool def __init__(self, n: int = ..., normalize: bool = ...) -> None: ... def __eq__(self, other) -> bool: ... def __ne__(self, other) -> bool: ... From cf9a3b0cdc839680e71d5449383f90ab7ab74526 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 11:00:40 -0800 Subject: [PATCH 4/6] mypy fixup --- pandas/_libs/tslibs/offsets.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index bf43b2c8470b9..7eb8dc0813868 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -86,7 +86,7 @@ class BaseOffset: @property def freqstr(self) -> str: ... def _apply(self, other): ... - def _apply_array(self, dtarr) -> None: ... + def _apply_array(self, dtarr: np.ndarray) -> np.ndarray: ... def rollback(self, dt: datetime) -> datetime: ... def rollforward(self, dt: datetime) -> datetime: ... def is_on_offset(self, dt: datetime) -> bool: ... From 297438b784757abedb18c2e8f6509bfce74fa46c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Nov 2023 08:04:04 -0800 Subject: [PATCH 5/6] mypy fixup --- pandas/core/arrays/datetimes.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fcb76a8debc35..de5832ba31b70 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -799,24 +799,27 @@ def _add_offset(self, offset: BaseOffset) -> Self: values = self try: - result = offset._apply_array(values._ndarray) - if result.dtype.kind == "i": - result = result.view(values.dtype) + res_values = offset._apply_array(values._ndarray) + if res_values.dtype.kind == "i": + # error: Argument 1 to "view" of "ndarray" has incompatible type + # "dtype[datetime64] | DatetimeTZDtype"; expected + # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]" + res_values = res_values.view(values.dtype) # type: ignore[arg-type] except NotImplementedError: warnings.warn( "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", PerformanceWarning, stacklevel=find_stack_level(), ) - result = self.astype("O") + offset + res_values = self.astype("O") + offset # TODO(GH#55564): as_unit will be unnecessary - result = type(self)._from_sequence(result).as_unit(self.unit) + result = type(self)._from_sequence(res_values).as_unit(self.unit) if not len(self): # GH#30336 _from_sequence won't be able to infer self.tz return result.tz_localize(self.tz) else: - result = type(self)._simple_new(result, dtype=result.dtype) + result = type(self)._simple_new(res_values, dtype=res_values.dtype) if offset.normalize: result = result.normalize() result._freq = None From 7a8b58e28973368d71d265045ad6086d58f19b62 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 18:36:55 -0800 Subject: [PATCH 6/6] update run_stubtest --- scripts/run_stubtest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run_stubtest.py b/scripts/run_stubtest.py index 35cbbef08124e..6307afa1bc822 100644 --- a/scripts/run_stubtest.py +++ b/scripts/run_stubtest.py @@ -69,7 +69,6 @@ "pandas._libs.sparse.SparseIndex.to_block_index", "pandas._libs.sparse.SparseIndex.to_int_index", # TODO (decorator changes argument names) - "pandas._libs.tslibs.offsets.BaseOffset._apply_array", "pandas._libs.tslibs.offsets.BusinessHour.rollback", "pandas._libs.tslibs.offsets.BusinessHour.rollforward ", # type alias