diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 718404f0799e4..ace668d6ef7ee 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -26,15 +26,12 @@ TD64NS_DTYPE, ensure_object, is_bool_dtype, - is_complex_dtype, is_dtype_equal, is_extension_array_dtype, - is_float_dtype, is_integer_dtype, is_object_dtype, is_scalar, is_string_or_object_np_dtype, - needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -291,7 +288,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): result = values.isna() # type: ignore[assignment] elif is_string_or_object_np_dtype(values.dtype): result = _isna_string_dtype(values, inf_as_na=inf_as_na) - elif needs_i8_conversion(dtype): + elif dtype.kind in "mM": # this is the NaT pattern result = values.view("i8") == iNaT else: @@ -502,7 +499,7 @@ def array_equivalent( # fastpath when we require that the dtypes match (Block.equals) if left.dtype.kind in "fc": return _array_equivalent_float(left, right) - elif needs_i8_conversion(left.dtype): + elif left.dtype.kind in "mM": return _array_equivalent_datetimelike(left, right) elif is_string_or_object_np_dtype(left.dtype): # TODO: fastpath for pandas' StringDtype @@ -519,14 +516,14 @@ def array_equivalent( return _array_equivalent_object(left, right, strict_nan) # NaNs can occur in float and complex arrays. - if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): + if left.dtype.kind in "fc": if not (left.size and right.size): return True return ((left == right) | (isna(left) & isna(right))).all() - elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): + elif left.dtype.kind in "mM" or right.dtype.kind in "mM": # datetime64, timedelta64, Period - if not is_dtype_equal(left.dtype, right.dtype): + if left.dtype != right.dtype: return False left = left.view("i8") @@ -541,11 +538,11 @@ def array_equivalent( return np.array_equal(left, right) -def _array_equivalent_float(left, right) -> bool: +def _array_equivalent_float(left: np.ndarray, right: np.ndarray) -> bool: return bool(((left == right) | (np.isnan(left) & np.isnan(right))).all()) -def _array_equivalent_datetimelike(left, right): +def _array_equivalent_datetimelike(left: np.ndarray, right: np.ndarray): return np.array_equal(left.view("i8"), right.view("i8")) @@ -601,7 +598,7 @@ def infer_fill_value(val): if not is_list_like(val): val = [val] val = np.array(val, copy=False) - if needs_i8_conversion(val.dtype): + if val.dtype.kind in "mM": return np.array("NaT", dtype=val.dtype) elif is_object_dtype(val.dtype): dtype = lib.infer_dtype(ensure_object(val), skipna=False) @@ -616,7 +613,7 @@ def maybe_fill(arr: np.ndarray) -> np.ndarray: """ Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype. """ - if arr.dtype.kind not in ("u", "i", "b"): + if arr.dtype.kind not in "iub": arr.fill(np.nan) return arr @@ -650,15 +647,15 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True): if isinstance(dtype, ExtensionDtype): return dtype.na_value - elif needs_i8_conversion(dtype): + elif dtype.kind in "mM": return dtype.type("NaT", "ns") - elif is_float_dtype(dtype): + elif dtype.kind == "f": return np.nan - elif is_integer_dtype(dtype): + elif dtype.kind in "iu": if compat: return 0 return np.nan - elif is_bool_dtype(dtype): + elif dtype.kind == "b": if compat: return False return np.nan diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 57252447f4286..6b9f99519abd8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -94,7 +94,6 @@ is_dataclass, is_dict_like, is_dtype_equal, - is_extension_array_dtype, is_float, is_float_dtype, is_hashable, @@ -3603,7 +3602,9 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: result._mgr.add_references(self._mgr) # type: ignore[arg-type] elif ( - self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) + self._is_homogeneous_type + and dtypes + and isinstance(dtypes[0], ExtensionDtype) ): # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] @@ -4184,7 +4185,7 @@ def _set_item(self, key, value) -> None: if ( key in self.columns and value.ndim == 1 - and not is_extension_array_dtype(value) + and not isinstance(value.dtype, ExtensionDtype) ): # broadcast across multiple columns if necessary if not self.columns.is_unique or isinstance(self.columns, MultiIndex): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 38bac75bd48b9..496ca665623be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -123,7 +123,10 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -4670,7 +4673,7 @@ def _drop_axis( if errors == "raise" and labels_missing: raise KeyError(f"{labels} not found in axis") - if is_extension_array_dtype(mask.dtype): + if isinstance(mask.dtype, ExtensionDtype): # GH#45860 mask = mask.to_numpy(dtype=bool) @@ -5457,7 +5460,7 @@ def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t: and not ( self.ndim == 2 and len(self.dtypes) == 1 - and is_extension_array_dtype(self.dtypes.iloc[0]) + and isinstance(self.dtypes.iloc[0], ExtensionDtype) ) ) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 9a6b17853bde8..6348870e7b59a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -47,12 +47,6 @@ ensure_platform_int, ensure_uint64, is_1d_only_ea_dtype, - is_bool_dtype, - is_complex_dtype, - is_float_dtype, - is_integer_dtype, - is_numeric_dtype, - needs_i8_conversion, ) from pandas.core.dtypes.missing import ( isna, @@ -248,7 +242,7 @@ def _get_out_dtype(self, dtype: np.dtype) -> np.dtype: if how == "rank": out_dtype = "float64" else: - if is_numeric_dtype(dtype): + if dtype.kind in "iufcb": out_dtype = f"{dtype.kind}{dtype.itemsize}" else: out_dtype = "object" @@ -274,9 +268,9 @@ def _get_result_dtype(self, dtype: np.dtype) -> np.dtype: if dtype == np.dtype(bool): return np.dtype(np.int64) elif how in ["mean", "median", "var", "std", "sem"]: - if is_float_dtype(dtype) or is_complex_dtype(dtype): + if dtype.kind in "fc": return dtype - elif is_numeric_dtype(dtype): + elif dtype.kind in "iub": return np.dtype(np.float64) return dtype @@ -339,14 +333,14 @@ def _call_cython_op( orig_values = values dtype = values.dtype - is_numeric = is_numeric_dtype(dtype) + is_numeric = dtype.kind in "iufcb" - is_datetimelike = needs_i8_conversion(dtype) + is_datetimelike = dtype.kind in "mM" if is_datetimelike: values = values.view("int64") is_numeric = True - elif is_bool_dtype(dtype): + elif dtype.kind == "b": values = values.view("uint8") if values.dtype == "float16": values = values.astype(np.float32) @@ -446,7 +440,7 @@ def _call_cython_op( # i.e. counts is defined. Locations where count np.ndarray: result = result.take(indexer) # fall through for boolean - if not is_extension_array_dtype(result.dtype): + if not isinstance(result.dtype, ExtensionDtype): return result.astype(bool)._values if is_object_dtype(key): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 521ced16e9e97..977fd2c09f897 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -455,7 +455,7 @@ def _interpolate_1d( # sort preserve_nans and convert to list preserve_nans = sorted(preserve_nans) - is_datetimelike = needs_i8_conversion(yvalues.dtype) + is_datetimelike = yvalues.dtype.kind in "mM" if is_datetimelike: yvalues = yvalues.view("i8") diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 455c220853857..0beb788acd01c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -34,7 +34,6 @@ from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( - is_any_int_dtype, is_complex, is_float, is_float_dtype, @@ -247,7 +246,7 @@ def _maybe_get_mask( # Boolean data cannot contain nulls, so signal via mask being None return None - if skipna or needs_i8_conversion(values.dtype): + if skipna or values.dtype.kind in "mM": mask = isna(values) return mask @@ -300,7 +299,7 @@ def _get_values( dtype = values.dtype datetimelike = False - if needs_i8_conversion(values.dtype): + if values.dtype.kind in "mM": # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = np.asarray(values.view("i8")) @@ -433,7 +432,7 @@ def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.n For 2-D values, returns a 1-D array where each element is missing. """ # we either return np.nan or pd.NaT - if is_numeric_dtype(values.dtype): + if values.dtype.kind in "iufcb": values = values.astype("float64") fill_value = na_value_for_dtype(values.dtype) @@ -521,7 +520,7 @@ def nanany( # expected "bool") return values.any(axis) # type: ignore[return-value] - if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": + if values.dtype.kind == "M": # GH#34479 warnings.warn( "'any' with datetime64 dtypes is deprecated and will raise in a " @@ -582,7 +581,7 @@ def nanall( # expected "bool") return values.all(axis) # type: ignore[return-value] - if needs_i8_conversion(values.dtype) and values.dtype.kind != "m": + if values.dtype.kind == "M": # GH#34479 warnings.warn( "'all' with datetime64 dtypes is deprecated and will raise in a " @@ -976,12 +975,12 @@ def nanvar( """ dtype = values.dtype mask = _maybe_get_mask(values, skipna, mask) - if is_any_int_dtype(dtype): + if dtype.kind in "iu": values = values.astype("f8") if mask is not None: values[mask] = np.nan - if is_float_dtype(values.dtype): + if values.dtype.kind == "f": count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) else: count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) @@ -1007,7 +1006,7 @@ def nanvar( # Return variance as np.float64 (the datatype used in the accumulator), # unless we were dealing with a float array, in which case use the same # precision as the original values array. - if is_float_dtype(dtype): + if dtype.kind == "f": result = result.astype(dtype, copy=False) return result diff --git a/pandas/core/series.py b/pandas/core/series.py index 9c91badc57ce3..8055440fe4c02 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -64,7 +64,6 @@ ) from pandas.core.dtypes.common import ( is_dict_like, - is_extension_array_dtype, is_integer, is_iterator, is_list_like, @@ -73,6 +72,7 @@ pandas_dtype, validate_all_hashable, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import ( @@ -1872,7 +1872,7 @@ def to_dict(self, into: type[dict] = dict) -> dict: # GH16122 into_c = com.standardize_mapping(into) - if is_object_dtype(self) or is_extension_array_dtype(self): + if is_object_dtype(self.dtype) or isinstance(self.dtype, ExtensionDtype): return into_c((k, maybe_box_native(v)) for k, v in self.items()) else: # Not an object dtype => all types will be the same so let the default @@ -4175,7 +4175,7 @@ def explode(self, ignore_index: bool = False) -> Series: 3 4 dtype: object """ - if not len(self) or not is_object_dtype(self): + if not len(self) or not is_object_dtype(self.dtype): result = self.copy() return result.reset_index(drop=True) if ignore_index else result @@ -5376,7 +5376,7 @@ def _convert_dtypes( input_series = self if infer_objects: input_series = input_series.infer_objects() - if is_object_dtype(input_series): + if is_object_dtype(input_series.dtype): input_series = input_series.copy(deep=None) if convert_string or convert_integer or convert_boolean or convert_floating: diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index db6c51d0f6991..58bb551e036a9 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -14,6 +14,7 @@ from pandas.core.dtypes.common import ( is_float, is_scalar, + pandas_dtype, ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -709,7 +710,7 @@ def test_array_equivalent_index_with_tuples(): ], ) def test_na_value_for_dtype(dtype, na_value): - result = na_value_for_dtype(dtype) + result = na_value_for_dtype(pandas_dtype(dtype)) # identify check doesn't work for datetime64/timedelta64("NaT") bc they # are not singletons assert result is na_value or (