From 0043ea751bf0ebb1b755d5d0c43bdf60bffaea7d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Dec 2020 19:44:25 -0800 Subject: [PATCH] TYP: datetimelike --- pandas/core/arrays/datetimelike.py | 52 ++++++++++++++++++------------ pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 4 +-- pandas/core/arrays/timedeltas.py | 7 ++-- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 66906f8463336..be9864731842d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -112,8 +112,11 @@ def __init__(self, data, dtype=None, freq=None, copy=False): @classmethod def _simple_new( - cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None - ): + cls: Type[DatetimeLikeArrayT], + values: np.ndarray, + freq: Optional[BaseOffset] = None, + dtype=None, + ) -> DatetimeLikeArrayT: raise AbstractMethodError(cls) @property @@ -217,7 +220,7 @@ def _box_func(self, x): """ raise AbstractMethodError(self) - def _box_values(self, values): + def _box_values(self, values) -> np.ndarray: """ apply box func to passed values """ @@ -416,7 +419,9 @@ def _values_for_factorize(self): return self._ndarray, iNaT @classmethod - def _from_factorized(cls, values, original): + def _from_factorized( + cls: Type[DatetimeLikeArrayT], values, original + ) -> DatetimeLikeArrayT: return cls(values, dtype=original.dtype) # ------------------------------------------------------------------ @@ -661,7 +666,7 @@ def _unbox( # These are not part of the EA API, but we implement them because # pandas assumes they're there. - def value_counts(self, dropna=False): + def value_counts(self, dropna: bool = False): """ Return a Series containing counts of unique values. @@ -755,28 +760,30 @@ def isin(self, values) -> np.ndarray: # ------------------------------------------------------------------ # Null Handling - def isna(self): + def isna(self) -> np.ndarray: return self._isnan @property # NB: override with cache_readonly in immutable subclasses - def _isnan(self): + def _isnan(self) -> np.ndarray: """ return if each value is nan """ return self.asi8 == iNaT @property # NB: override with cache_readonly in immutable subclasses - def _hasnans(self): + def _hasnans(self) -> np.ndarray: """ return if I have any nans; enables various perf speedups """ return bool(self._isnan.any()) - def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): + def _maybe_mask_results( + self, result: np.ndarray, fill_value=iNaT, convert=None + ) -> np.ndarray: """ Parameters ---------- - result : a ndarray + result : np.ndarray fill_value : object, default iNaT convert : str, dtype or None @@ -794,7 +801,7 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): result = result.astype(convert) if fill_value is None: fill_value = np.nan - result[self._isnan] = fill_value + np.putmask(result, self._isnan, fill_value) return result # ------------------------------------------------------------------ @@ -893,22 +900,24 @@ def _validate_frequency(cls, index, freq, **kwargs): ) from e @classmethod - def _generate_range(cls, start, end, periods, freq, *args, **kwargs): + def _generate_range( + cls: Type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs + ) -> DatetimeLikeArrayT: raise AbstractMethodError(cls) # monotonicity/uniqueness properties are called via frequencies.infer_freq, # see GH#23789 @property - def _is_monotonic_increasing(self): + def _is_monotonic_increasing(self) -> bool: return algos.is_monotonic(self.asi8, timelike=True)[0] @property - def _is_monotonic_decreasing(self): + def _is_monotonic_decreasing(self) -> bool: return algos.is_monotonic(self.asi8, timelike=True)[1] @property - def _is_unique(self): + def _is_unique(self) -> bool: return len(unique1d(self.asi8)) == len(self) # ------------------------------------------------------------------ @@ -940,9 +949,10 @@ def _cmp_method(self, other, op): result = op(self._ndarray.view("i8"), other_vals.view("i8")) o_mask = isna(other) - if self._hasnans | np.any(o_mask): + mask = self._isnan | o_mask + if mask.any(): nat_result = op is operator.ne - result[self._isnan | o_mask] = nat_result + np.putmask(result, mask, nat_result) return result @@ -996,7 +1006,7 @@ def _add_timedeltalike_scalar(self, other): if isna(other): # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds new_values = np.empty(self.shape, dtype="i8") - new_values[:] = iNaT + new_values.fill(iNaT) return type(self)(new_values, dtype=self.dtype) inc = delta_to_nanoseconds(other) @@ -1038,7 +1048,7 @@ def _add_timedelta_arraylike(self, other): ) if self._hasnans or other._hasnans: mask = self._isnan | other._isnan - new_values[mask] = iNaT + np.putmask(new_values, mask, iNaT) return type(self)(new_values, dtype=self.dtype) @@ -1053,7 +1063,7 @@ def _add_nat(self): # GH#19124 pd.NaT is treated like a timedelta for both timedelta # and datetime dtypes - result = np.zeros(self.shape, dtype=np.int64) + result = np.empty(self.shape, dtype=np.int64) result.fill(iNaT) return type(self)(result, dtype=self.dtype, freq=None) @@ -1067,7 +1077,7 @@ def _sub_nat(self): # For datetime64 dtypes by convention we treat NaT as a datetime, so # this subtraction returns a timedelta64 dtype. # For period dtype, timedelta64 is a close-enough return dtype. - result = np.zeros(self.shape, dtype=np.int64) + result = np.empty(self.shape, dtype=np.int64) result.fill(iNaT) return result.view("timedelta64[ns]") diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ce70f929cc79d..ba639eb41bc2b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -680,7 +680,7 @@ def _sub_datetime_arraylike(self, other): arr_mask = self._isnan | other._isnan new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=arr_mask) if self._hasnans or other._hasnans: - new_values[arr_mask] = iNaT + np.putmask(new_values, arr_mask, iNaT) return new_values.view("timedelta64[ns]") def _add_offset(self, offset): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 50ed526cf01e9..7b0e4ce5b0748 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -597,7 +597,7 @@ def astype(self, dtype, copy: bool = True): return self.asfreq(dtype.freq) return super().astype(dtype, copy=copy) - def searchsorted(self, value, side="left", sorter=None): + def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: value = self._validate_searchsorted_value(value).view("M8[ns]") # Cast to M8 to get datetime-like NaT placement @@ -676,7 +676,7 @@ def _addsub_int_array( other = -other res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) res_values = res_values.view("i8") - res_values[self._isnan] = iNaT + np.putmask(res_values, self._isnan, iNaT) return type(self)(res_values, freq=self.freq) def _add_offset(self, other: BaseOffset): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 0921c3460c626..c51882afc4871 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -383,6 +383,7 @@ def sum( def std( self, + *, axis=None, dtype=None, out=None, @@ -627,7 +628,7 @@ def __floordiv__(self, other): # at this point we should only have numeric scalars; anything # else will raise result = self.asi8 // other - result[self._isnan] = iNaT + np.putmask(result, self._isnan, iNaT) freq = None if self.freq is not None: # Note: freq gets division, not floor-division @@ -653,7 +654,7 @@ def __floordiv__(self, other): mask = self._isnan | other._isnan if mask.any(): result = result.astype(np.float64) - result[mask] = np.nan + np.putmask(result, mask, np.nan) return result elif is_object_dtype(other.dtype): @@ -707,7 +708,7 @@ def __rfloordiv__(self, other): mask = self._isnan | other._isnan if mask.any(): result = result.astype(np.float64) - result[mask] = np.nan + np.putmask(result, mask, np.nan) return result elif is_object_dtype(other.dtype):