From 3b0a66b1e96b913b86790a8f4f487bf83cb014d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Sep 2020 15:35:15 -0700 Subject: [PATCH] TYP: core.missing; PERF for needs_i8_conversion --- pandas/core/arrays/datetimelike.py | 14 ++------------ pandas/core/dtypes/common.py | 4 ++++ pandas/core/missing.py | 31 +++++++++++++++--------------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 026aad5ad6eb7..45cabe8f0b498 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1005,19 +1005,9 @@ def fillna(self, value=None, method=None, limit=None): else: func = missing.backfill_1d - values = self._ndarray - if not is_period_dtype(self.dtype): - # For PeriodArray self._ndarray is i8, which gets copied - # by `func`. Otherwise we need to make a copy manually - # to avoid modifying `self` in-place. - values = values.copy() - + values = self.copy() new_values = func(values, limit=limit, mask=mask) - if is_datetime64tz_dtype(self.dtype): - # we need to pass int64 values to the constructor to avoid - # re-localizing incorrectly - new_values = new_values.view("i8") - new_values = type(self)(new_values, dtype=self.dtype) + new_values = self._from_backing_data(new_values) else: # fill with value new_values = self.copy() diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5987fdabf78bb..acbdbfd7707e3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1215,6 +1215,10 @@ def needs_i8_conversion(arr_or_dtype) -> bool: """ if arr_or_dtype is None: return False + if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): + # fastpath + dtype = arr_or_dtype + return dtype.kind in ["m", "M"] or dtype.type is Period return ( is_datetime_or_timedelta_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index be66b19d10064..9b96c8f01153b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,17 +7,15 @@ import numpy as np from pandas._libs import algos, lib +from pandas._typing import DtypeObj from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( ensure_float64, - is_datetime64_dtype, - is_datetime64tz_dtype, is_integer_dtype, is_numeric_v_string_like, is_scalar, - is_timedelta64_dtype, needs_i8_conversion, ) from pandas.core.dtypes.missing import isna @@ -72,7 +70,7 @@ def mask_missing(arr, values_to_mask): return mask -def clean_fill_method(method, allow_nearest=False): +def clean_fill_method(method, allow_nearest: bool = False): # asfreq is compat for resampling if method in [None, "asfreq"]: return None @@ -543,7 +541,12 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat def interpolate_2d( - values, method="pad", axis=0, limit=None, fill_value=None, dtype=None + values, + method="pad", + axis=0, + limit=None, + fill_value=None, + dtype: Optional[DtypeObj] = None, ): """ Perform an actual interpolation of values, values will be make 2-d if @@ -584,18 +587,14 @@ def interpolate_2d( return values -def _cast_values_for_fillna(values, dtype): +def _cast_values_for_fillna(values, dtype: DtypeObj): """ Cast values to a dtype that algos.pad and algos.backfill can handle. """ # TODO: for int-dtypes we make a copy, but for everything else this # alters the values in-place. Is this intentional? - if ( - is_datetime64_dtype(dtype) - or is_datetime64tz_dtype(dtype) - or is_timedelta64_dtype(dtype) - ): + if needs_i8_conversion(dtype): values = values.view(np.int64) elif is_integer_dtype(values): @@ -605,7 +604,7 @@ def _cast_values_for_fillna(values, dtype): return values -def _fillna_prep(values, mask=None, dtype=None): +def _fillna_prep(values, mask=None, dtype: Optional[DtypeObj] = None): # boilerplate for pad_1d, backfill_1d, pad_2d, backfill_2d if dtype is None: dtype = values.dtype @@ -620,19 +619,19 @@ def _fillna_prep(values, mask=None, dtype=None): return values, mask -def pad_1d(values, limit=None, mask=None, dtype=None): +def pad_1d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): values, mask = _fillna_prep(values, mask, dtype) algos.pad_inplace(values, mask, limit=limit) return values -def backfill_1d(values, limit=None, mask=None, dtype=None): +def backfill_1d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): values, mask = _fillna_prep(values, mask, dtype) algos.backfill_inplace(values, mask, limit=limit) return values -def pad_2d(values, limit=None, mask=None, dtype=None): +def pad_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): values, mask = _fillna_prep(values, mask, dtype) if np.all(values.shape): @@ -643,7 +642,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): return values -def backfill_2d(values, limit=None, mask=None, dtype=None): +def backfill_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): values, mask = _fillna_prep(values, mask, dtype) if np.all(values.shape):