From 508741aed565d46c1d2a72850185768418e117cb Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Apr 2023 18:36:13 -0700 Subject: [PATCH 1/2] PERF: faster dtype checks --- pandas/core/arrays/datetimes.py | 5 +-- pandas/core/arrays/masked.py | 9 ++--- pandas/core/arrays/period.py | 9 ++--- pandas/core/dtypes/cast.py | 37 ++++++++----------- pandas/core/indexes/datetimes.py | 7 +--- pandas/core/interchange/utils.py | 6 +-- pandas/core/reshape/tile.py | 7 ++-- pandas/core/tools/datetimes.py | 5 +-- pandas/io/pytables.py | 10 ++--- pandas/io/stata.py | 8 ++-- .../dtypes/cast/test_find_common_type.py | 2 + 11 files changed, 45 insertions(+), 60 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 12245a144ec2a..b91e71e07faa3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -51,7 +51,6 @@ INT64_DTYPE, is_bool_dtype, is_datetime64_any_dtype, - is_datetime64_dtype, is_dtype_equal, is_float_dtype, is_object_dtype, @@ -2190,11 +2189,11 @@ def objects_to_datetime64ns( # is in UTC # Return i8 values to denote unix timestamps return result.view("i8"), tz_parsed - elif is_datetime64_dtype(result): + elif result.dtype.kind == "M": # returning M8[ns] denotes wall-times; since tz is None # the distinction is a thin one return result, tz_parsed - elif is_object_dtype(result): + elif result.dtype == object: # GH#23675 when called via `pd.to_datetime`, returning an object-dtype # array is allowed. When called via `pd.DatetimeIndex`, we can # only accept datetime64 dtype, so raise TypeError if object-dtype diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 576a75de9962c..18ce2402d14a9 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -42,7 +42,6 @@ from pandas.core.dtypes.common import ( is_bool, is_bool_dtype, - is_datetime64_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -478,18 +477,18 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: na_value: float | np.datetime64 | lib.NoDefault # coerce - if is_float_dtype(dtype): + if dtype.kind == "f": # In astype, we consider dtype=float to also mean na_value=np.nan na_value = np.nan - elif is_datetime64_dtype(dtype): + elif dtype.kind == "M": na_value = np.datetime64("NaT") else: na_value = lib.no_default # to_numpy will also raise, but we get somewhat nicer exception messages here - if is_integer_dtype(dtype) and self._hasna: + if dtype.kind in "iu" and self._hasna: raise ValueError("cannot convert NA to integer") - if is_bool_dtype(dtype) and self._hasna: + if dtype.kind == "b" and self._hasna: # careful: astype_nansafe converts np.nan to True raise ValueError("cannot convert float NaN to bool") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 89d02f7c1d444..6710f092277fd 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -56,10 +56,7 @@ from pandas.core.dtypes.common import ( ensure_object, is_datetime64_any_dtype, - is_datetime64_dtype, is_dtype_equal, - is_float_dtype, - is_integer_dtype, is_period_dtype, pandas_dtype, ) @@ -915,7 +912,7 @@ def period_array( """ data_dtype = getattr(data, "dtype", None) - if is_datetime64_dtype(data_dtype): + if lib.is_np_dtype(data_dtype, "M"): return PeriodArray._from_datetime64(data, freq) if isinstance(data_dtype, PeriodDtype): out = PeriodArray(data) @@ -937,10 +934,10 @@ def period_array( else: dtype = None - if is_float_dtype(arrdata) and len(arrdata) > 0: + if arrdata.dtype.kind == "f" and len(arrdata) > 0: raise TypeError("PeriodIndex does not allow floating point in construction") - if is_integer_dtype(arrdata.dtype): + if arrdata.dtype.kind in "iu": arr = arrdata.astype(np.int64, copy=False) # error: Argument 2 to "from_ordinals" has incompatible type "Union[str, # Tick, None]"; expected "Union[timedelta, BaseOffset, str]" diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3f035f7207488..5bfd99a9582ee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -49,20 +49,15 @@ ensure_object, ensure_str, is_bool, - is_bool_dtype, is_complex, - is_complex_dtype, is_extension_array_dtype, is_float, - is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype, is_object_dtype, is_scalar, - is_signed_integer_dtype, is_string_dtype, - is_unsigned_integer_dtype, pandas_dtype as pandas_dtype_func, ) from pandas.core.dtypes.dtypes import ( @@ -352,7 +347,7 @@ def trans(x): if result.dtype.itemsize <= dtype.itemsize and result.size: return result - if is_bool_dtype(dtype) or is_integer_dtype(dtype): + if dtype.kind in "biu": if not result.size: # if we don't have any elements, just astype it return trans(result).astype(dtype) @@ -384,7 +379,7 @@ def trans(x): elif ( issubclass(dtype.type, np.floating) - and not is_bool_dtype(result.dtype) + and result.dtype.kind != "b" and not is_string_dtype(result.dtype) ): with warnings.catch_warnings(): @@ -426,11 +421,11 @@ def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT: ndarray or ExtensionArray """ dtype = arr.dtype - if is_signed_integer_dtype(dtype) and dtype != np.int64: + if dtype.kind == "i" and dtype != np.int64: return arr.astype(np.int64) - elif is_unsigned_integer_dtype(dtype) and dtype != np.uint64: + elif dtype.kind == "u" and dtype != np.uint64: return arr.astype(np.uint64) - elif is_float_dtype(dtype) and dtype != np.float64: + elif dtype.kind == "f" and dtype != np.float64: return arr.astype(np.float64) else: return arr @@ -1032,7 +1027,7 @@ def convert_dtypes( if ( convert_string or convert_integer or convert_boolean or convert_floating ) and isinstance(input_array, np.ndarray): - if is_object_dtype(input_array.dtype): + if input_array.dtype == object: inferred_dtype = lib.infer_dtype(input_array) else: inferred_dtype = input_array.dtype @@ -1061,7 +1056,7 @@ def convert_dtypes( inferred_dtype = input_array.dtype elif ( infer_objects - and is_object_dtype(input_array.dtype) + and input_array.dtype == object and (isinstance(inferred_dtype, str) and inferred_dtype == "integer") ): inferred_dtype = target_int_dtype @@ -1088,7 +1083,7 @@ def convert_dtypes( inferred_dtype = inferred_float_dtype elif ( infer_objects - and is_object_dtype(input_array.dtype) + and input_array.dtype == object and ( isinstance(inferred_dtype, str) and inferred_dtype == "mixed-integer-float" @@ -1097,7 +1092,7 @@ def convert_dtypes( inferred_dtype = pandas_dtype_func("Float64") if convert_boolean: - if is_bool_dtype(input_array.dtype): + if input_array.dtype.kind == "b": inferred_dtype = pandas_dtype_func("boolean") elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": inferred_dtype = pandas_dtype_func("boolean") @@ -1412,10 +1407,10 @@ def find_common_type(types): # don't mix bool / int or float or complex # this is different from numpy, which casts bool with float/int as int - has_bools = any(is_bool_dtype(t) for t in types) + has_bools = any(t.kind == "b" for t in types) if has_bools: for t in types: - if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): + if t.kind in "iufc": return np.dtype("object") return np.find_common_type(types, []) @@ -1480,7 +1475,7 @@ def construct_1d_arraylike_from_scalar( subarr = cls._from_sequence(seq, dtype=dtype).repeat(length) else: - if length and is_integer_dtype(dtype) and isna(value): + if length and dtype.kind in "iu" and isna(value): # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in "US": @@ -1573,7 +1568,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n ... ValueError: Trying to coerce float values to integers """ - assert is_integer_dtype(dtype) + assert dtype.kind in "iu" try: if not isinstance(arr, np.ndarray): @@ -1620,16 +1615,16 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n return casted raise ValueError(f"string values cannot be losslessly cast to {dtype}") - if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): + if dtype.kind == "u" and (arr < 0).any(): raise OverflowError("Trying to coerce negative values to unsigned integers") - if is_float_dtype(arr.dtype): + if arr.dtype.kind == "f": if not np.isfinite(arr).all(): raise IntCastingNaNError( "Cannot convert non-finite values (NA or inf) to integer" ) raise ValueError("Trying to coerce float values to integers") - if is_object_dtype(arr.dtype): + if arr.dtype == object: raise ValueError("Trying to coerce float values to integers") if casted.dtype < arr.dtype: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7de1972de5e4a..062a9a384f9c5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -31,10 +31,7 @@ ) from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.common import ( - is_datetime64_dtype, - is_scalar, -) +from pandas.core.dtypes.common import is_scalar from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_na_for_dtype @@ -409,7 +406,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: # If we have tz, we can compare to tzaware return isinstance(dtype, DatetimeTZDtype) # if we dont have tz, we can only compare to tznaive - return is_datetime64_dtype(dtype) + return lib.is_np_dtype(dtype, "M") # -------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py index eb24a7a672ebd..89599818d6814 100644 --- a/pandas/core/interchange/utils.py +++ b/pandas/core/interchange/utils.py @@ -9,9 +9,9 @@ import numpy as np -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas._libs import lib -from pandas.api.types import is_datetime64_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype if typing.TYPE_CHECKING: from pandas._typing import DtypeObj @@ -82,7 +82,7 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: if format_str is not None: return format_str - if is_datetime64_dtype(dtype): + if lib.is_np_dtype(dtype, "M"): # Selecting the first char of resolution string: # dtype.str -> ' Col: if isinstance(values, Categorical): codes = values.codes atom = cls.get_atom_data(shape, kind=codes.dtype.name) - elif is_datetime64_dtype(dtype) or isinstance(dtype, DatetimeTZDtype): + elif lib.is_np_dtype(dtype, "M") or isinstance(dtype, DatetimeTZDtype): atom = cls.get_atom_datetime64(shape) elif lib.is_np_dtype(dtype, "m"): atom = cls.get_atom_timedelta64(shape) @@ -3081,7 +3079,7 @@ def write_array( vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) - elif is_datetime64_dtype(value.dtype): + elif lib.is_np_dtype(value.dtype, "M"): self._handle.create_array(self.group, key, value.view("i8")) getattr(self.group, key)._v_attrs.value_type = "datetime64" elif isinstance(value.dtype, DatetimeTZDtype): @@ -4863,7 +4861,7 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index atom = DataIndexableCol._get_atom(converted) if ( - (isinstance(index.dtype, np.dtype) and is_integer_dtype(index)) + lib.is_np_dtype(index.dtype, "iu") or needs_i8_conversion(index.dtype) or is_bool_dtype(index.dtype) ): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0eb4a42060416..ca6586899da4c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -33,6 +33,7 @@ from dateutil.relativedelta import relativedelta import numpy as np +from pandas._libs import lib from pandas._libs.lib import infer_dtype from pandas._libs.writers import max_len_string_array from pandas.errors import ( @@ -49,7 +50,6 @@ from pandas.core.dtypes.common import ( ensure_object, - is_datetime64_dtype, is_numeric_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype @@ -418,7 +418,7 @@ def parse_dates_safe( dates, delta: bool = False, year: bool = False, days: bool = False ): d = {} - if is_datetime64_dtype(dates.dtype): + if lib.is_np_dtype(dates.dtype, "M"): if delta: time_delta = dates - Timestamp(stata_epoch).as_unit("ns") d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds @@ -464,7 +464,7 @@ def g(x: datetime.datetime) -> int: index = dates.index if bad_loc.any(): dates = Series(dates) - if is_datetime64_dtype(dates): + if lib.is_np_dtype(dates.dtype, "M"): dates[bad_loc] = to_datetime(stata_epoch) else: dates[bad_loc] = stata_epoch @@ -2619,7 +2619,7 @@ def _prepare_pandas(self, data: DataFrame) -> None: for col in data: if col in self._convert_dates: continue - if is_datetime64_dtype(data[col]): + if lib.is_np_dtype(data[col].dtype, "M"): self._convert_dates[col] = "tc" self._convert_dates = _maybe_convert_to_int_keys( diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 8484b5525a92a..8ce05337be70b 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -2,6 +2,7 @@ import pytest from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -70,6 +71,7 @@ ], ) def test_numpy_dtypes(source_dtypes, expected_common_dtype): + source_dtypes = [pandas_dtype(x) for x in source_dtypes] assert find_common_type(source_dtypes) == expected_common_dtype From cbcf26f83c1c9dff19432ad3a6a80024aba54244 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Apr 2023 20:02:45 -0700 Subject: [PATCH 2/2] PERF: dtype checks --- pandas/_testing/__init__.py | 5 ++-- pandas/core/algorithms.py | 6 ++-- .../core/array_algos/masked_accumulations.py | 12 ++------ pandas/core/arrays/arrow/array.py | 5 ++-- pandas/core/arrays/categorical.py | 6 ++-- pandas/core/arrays/datetimelike.py | 5 ++-- pandas/core/arrays/interval.py | 6 ++-- pandas/core/arrays/masked.py | 6 ++-- pandas/core/arrays/numeric.py | 13 ++++----- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/construction.py | 5 ++-- pandas/core/dtypes/astype.py | 7 ++--- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/missing.py | 3 +- pandas/core/groupby/groupby.py | 4 +-- pandas/core/indexes/base.py | 4 +-- pandas/core/indexing.py | 6 ++-- pandas/core/missing.py | 2 +- pandas/tests/base/test_conversion.py | 8 ++---- pandas/tests/dtypes/cast/test_promote.py | 28 +++++++------------ pandas/tests/groupby/test_size.py | 4 ++- .../tests/groupby/transform/test_transform.py | 11 ++++---- pandas/tests/io/test_sql.py | 4 +-- pandas/tests/series/test_arithmetic.py | 5 ++-- 24 files changed, 64 insertions(+), 95 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 410cf7c6cbe3a..032e43bf9c089 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -29,7 +29,6 @@ from pandas.core.dtypes.common import ( is_float_dtype, - is_integer_dtype, is_sequence, is_signed_integer_dtype, is_unsigned_integer_dtype, @@ -389,11 +388,11 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index: dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - if is_integer_dtype(dtype): + if dtype.kind in "iu": values = np.arange(k, dtype=dtype) if is_unsigned_integer_dtype(dtype): values += 2 ** (dtype.itemsize * 8 - 1) - elif is_float_dtype(dtype): + elif dtype.kind == "f": values = np.random.random_sample(k) - np.random.random_sample(1) values.sort() values = values * (10 ** np.random.randint(0, 9)) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 34ba4077936ad..67b7dc0ac709d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -510,7 +510,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]: if ( len(comps_array) > 1_000_000 and len(values) <= 26 - and not is_object_dtype(comps_array) + and comps_array.dtype != object ): # If the values include nan we need to check for nan explicitly # since np.nan it not equal to np.nan @@ -766,7 +766,7 @@ def factorize( else: values = np.asarray(values) # convert DTA/TDA/MultiIndex - if not use_na_sentinel and is_object_dtype(values): + if not use_na_sentinel and values.dtype == object: # factorize can now handle differentiating various types of null values. # These can only occur when the array has object dtype. # However, for backwards compatibility we only use the null for the @@ -1317,7 +1317,7 @@ def searchsorted( if ( isinstance(arr, np.ndarray) - and is_integer_dtype(arr.dtype) + and arr.dtype.kind in "iu" and (is_integer(value) or is_integer_dtype(value)) ): # if `arr` and `value` have different dtypes, `arr` would be diff --git a/pandas/core/array_algos/masked_accumulations.py b/pandas/core/array_algos/masked_accumulations.py index 1798c0b366a46..ad9e96d398a24 100644 --- a/pandas/core/array_algos/masked_accumulations.py +++ b/pandas/core/array_algos/masked_accumulations.py @@ -12,12 +12,6 @@ import numpy as np -from pandas.core.dtypes.common import ( - is_bool_dtype, - is_float_dtype, - is_integer_dtype, -) - if TYPE_CHECKING: from pandas._typing import npt @@ -46,11 +40,11 @@ def _cum_func( Whether to skip NA. """ dtype_info: np.iinfo | np.finfo - if is_float_dtype(values): + if values.dtype.kind == "f": dtype_info = np.finfo(values.dtype.type) - elif is_integer_dtype(values): + elif values.dtype.kind in "iu": dtype_info = np.iinfo(values.dtype.type) - elif is_bool_dtype(values): + elif values.dtype.kind == "b": # Max value of bool is 1, but since we are setting into a boolean # array, 255 is fine as well. Min value has to be 0 when setting # into the boolean array. diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 8982465d28417..dc10abbbc401e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -43,7 +43,6 @@ is_array_like, is_bool_dtype, is_integer, - is_integer_dtype, is_list_like, is_object_dtype, is_scalar, @@ -363,9 +362,9 @@ def __getitem__(self, item: PositionalIndexer): else: pa_dtype = self._dtype.pyarrow_dtype return type(self)(pa.chunked_array([], type=pa_dtype)) - elif is_integer_dtype(item.dtype): + elif item.dtype.kind in "iu": return self.take(item) - elif is_bool_dtype(item.dtype): + elif item.dtype.kind == "b": return type(self)(self._pa_array.filter(item)) else: raise IndexError( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index adb083c16a838..a435cb2e4eb33 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -533,7 +533,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: elif isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) - elif is_integer_dtype(dtype) and self.isna().any(): + elif dtype.kind in "iu" and self.isna().any(): raise ValueError("Cannot convert float NaN to integer") elif len(self.codes) == 0 or len(self.categories) == 0: @@ -624,7 +624,7 @@ def _from_inferred_categories( cats = to_datetime(inferred_categories, errors="coerce") elif lib.is_np_dtype(dtype.categories.dtype, "m"): cats = to_timedelta(inferred_categories, errors="coerce") - elif is_bool_dtype(dtype.categories): + elif is_bool_dtype(dtype.categories.dtype): if true_values is None: true_values = ["True", "TRUE", "true"] @@ -708,7 +708,7 @@ def from_codes( codes = codes.to_numpy(dtype=np.int64) else: codes = np.asarray(codes) - if len(codes) and not is_integer_dtype(codes): + if len(codes) and codes.dtype.kind not in "iu": raise ValueError("codes need to be array-like integers") if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2132a3dcbf292..8660cbc0af1b5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -85,7 +85,6 @@ is_all_strings, is_datetime64_any_dtype, is_dtype_equal, - is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, @@ -460,7 +459,7 @@ def astype(self, dtype, copy: bool = True): return super().astype(dtype, copy=copy) elif is_string_dtype(dtype): return self._format_native_types() - elif is_integer_dtype(dtype): + elif dtype.kind in "iu": # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. values = self.asi8 @@ -473,7 +472,7 @@ def astype(self, dtype, copy: bool = True): if copy: values = values.copy() return values - elif (dtype.kind in "mM" and self.dtype != dtype) or is_float_dtype(dtype): + elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f": # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ea35a86095e15..b8442205c331e 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -329,9 +329,9 @@ def _ensure_simple_new_inputs( raise ValueError("closed keyword does not match dtype.closed") # coerce dtypes to match if needed - if is_float_dtype(left) and is_integer_dtype(right): + if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype): right = right.astype(left.dtype) - elif is_float_dtype(right) and is_integer_dtype(left): + elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype): left = left.astype(right.dtype) if type(left) != type(right): @@ -1778,6 +1778,6 @@ def _maybe_convert_platform_interval(values) -> ArrayLike: if not hasattr(values, "dtype"): values = np.asarray(values) - if is_integer_dtype(values) and values.dtype != np.int64: + if values.dtype.kind in "iu" and values.dtype != np.int64: values = values.astype(np.int64) return values diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 18ce2402d14a9..339eebfc815aa 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -788,10 +788,8 @@ def _maybe_mask_result(self, result, mask): return BooleanArray(result, mask, copy=False) - elif ( - isinstance(result.dtype, np.dtype) - and result.dtype.kind == "m" - and is_supported_unit(get_unit_from_dtype(result.dtype)) + elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit( + get_unit_from_dtype(result.dtype) ): # e.g. test_numeric_arr_mul_tdscalar_numexpr_path from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 344946ad68d32..df16419ea2bf1 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -18,10 +18,7 @@ from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( - is_bool_dtype, - is_float_dtype, is_integer_dtype, - is_object_dtype, is_string_dtype, pandas_dtype, ) @@ -171,16 +168,16 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype original = values values = np.array(values, copy=copy) inferred_type = None - if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): + if values.dtype == object or is_string_dtype(values.dtype): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "boolean" and dtype is None: name = dtype_cls.__name__.strip("_") raise TypeError(f"{values.dtype} cannot be converted to {name}") - elif is_bool_dtype(values) and checker(dtype): + elif values.dtype.kind == "b" and checker(dtype): values = np.array(values, dtype=default_dtype, copy=copy) - elif not (is_integer_dtype(values) or is_float_dtype(values)): + elif values.dtype.kind not in "iuf": name = dtype_cls.__name__.strip("_") raise TypeError(f"{values.dtype} cannot be converted to {name}") @@ -188,7 +185,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype raise TypeError("values must be a 1D list-like") if mask is None: - if is_integer_dtype(values): + if values.dtype.kind in "iu": # fastpath mask = np.zeros(len(values), dtype=np.bool_) else: @@ -205,7 +202,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype else: dtype = dtype.type - if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0: + if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0: if mask.all(): values = np.ones(values.shape, dtype=dtype) else: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d7e413ccec293..5bc9a7c6b51ab 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -925,7 +925,7 @@ def sequence_to_td64ns( elif is_float_dtype(data.dtype): # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int - if is_extension_array_dtype(data): + if is_extension_array_dtype(data.dtype): mask = data._mask data = data._data else: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 2208ae07fe30f..d626afa0c6e79 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -45,7 +45,6 @@ is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_timedelta64_ns_dtype, @@ -749,7 +748,7 @@ def _try_cast( """ is_ndarray = isinstance(arr, np.ndarray) - if is_object_dtype(dtype): + if dtype == object: if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) return subarr @@ -773,7 +772,7 @@ def _try_cast( # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. - elif is_integer_dtype(dtype): + elif dtype.kind in "iu": # this will raise if we have e.g. floats subarr = maybe_cast_to_integer_array(arr, dtype) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index a69559493c386..f00e25b7ba6b7 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -19,7 +19,6 @@ from pandas.core.dtypes.common import ( is_dtype_equal, - is_integer_dtype, is_object_dtype, is_string_dtype, pandas_dtype, @@ -99,10 +98,10 @@ def _astype_nansafe( arr, skipna=skipna, convert_na_value=False ).reshape(shape) - elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): + elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu": return _astype_float_to_int_nansafe(arr, dtype, copy) - elif is_object_dtype(arr.dtype): + elif arr.dtype == object: # if we have a datetime/timedelta array of objects # then coerce to datetime64[ns] and use DatetimeArray.astype @@ -131,7 +130,7 @@ def _astype_nansafe( ) raise ValueError(msg) - if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): + if copy or arr.dtype == object or dtype == object: # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5bfd99a9582ee..3e41fdf5a7634 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -609,7 +609,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): if not is_scalar(fill_value): # with object dtype there is nothing to promote, and the user can # pass pretty much any weird fill_value they like - if not is_object_dtype(dtype): + if dtype != object: # with object dtype there is nothing to promote, and the user can # pass pretty much any weird fill_value they like raise ValueError("fill_value must be a scalar") diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ede5fc4809d9c..2fb8d3563b792 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -27,7 +27,6 @@ ensure_object, is_dtype_equal, is_extension_array_dtype, - is_object_dtype, is_scalar, is_string_or_object_np_dtype, ) @@ -581,7 +580,7 @@ def infer_fill_value(val): val = np.array(val, copy=False) if val.dtype.kind in "mM": return np.array("NaT", dtype=val.dtype) - elif is_object_dtype(val.dtype): + elif val.dtype == object: dtype = lib.infer_dtype(ensure_object(val), skipna=False) if dtype in ["datetime", "datetime64"]: return np.array("NaT", dtype=DT64NS_DTYPE) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a12d312d148b9..6cc6251706463 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3227,7 +3227,7 @@ def quantile( """ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]: - if is_object_dtype(vals): + if is_object_dtype(vals.dtype): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" ) @@ -3253,7 +3253,7 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]: # ExtensionDtype]]", expected "Tuple[ndarray[Any, Any], # Optional[Union[dtype[Any], ExtensionDtype]]]") return vals, inference # type: ignore[return-value] - elif isinstance(vals, ExtensionArray) and is_float_dtype(vals): + elif isinstance(vals, ExtensionArray) and is_float_dtype(vals.dtype): inference = np.dtype(np.float64) out = vals.to_numpy(dtype=float, na_value=np.nan) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0a56fa4d031d6..b235fe26d0382 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1270,7 +1270,7 @@ def _format_data(self, name=None) -> str_t: is_justify = False elif self.inferred_type == "categorical": self = cast("CategoricalIndex", self) - if is_object_dtype(self.categories): + if is_object_dtype(self.categories.dtype): is_justify = False return format_object_summary( @@ -5314,7 +5314,7 @@ def putmask(self, mask, value) -> Index: try: converted = self._validate_fill_value(value) except (LossySetitemError, ValueError, TypeError) as err: - if is_object_dtype(self): # pragma: no cover + if is_object_dtype(self.dtype): # pragma: no cover raise err # See also: Block.coerce_to_target_dtype diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1d801f24fa3ff..6aecfe5267e0c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2167,8 +2167,8 @@ def _ensure_iterable_column_indexer(self, column_indexer): ilocs = [column_indexer] elif isinstance(column_indexer, slice): ilocs = np.arange(len(self.obj.columns))[column_indexer] - elif isinstance(column_indexer, np.ndarray) and is_bool_dtype( - column_indexer.dtype + elif ( + isinstance(column_indexer, np.ndarray) and column_indexer.dtype.kind == "b" ): ilocs = np.arange(len(column_indexer))[column_indexer] else: @@ -2257,7 +2257,7 @@ def ravel(i): return ser.reindex(ax)._values elif is_integer(indexer) and self.ndim == 1: - if is_object_dtype(self.obj): + if is_object_dtype(self.obj.dtype): return ser ax = self.obj._get_axis(0) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 977fd2c09f897..aaed431f890d3 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -86,7 +86,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] potential_na = False - if is_object_dtype(arr): + if is_object_dtype(arr.dtype): # pre-compute mask to avoid comparison to NA potential_na = True arr_mask = ~isna(arr) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 6924c3920245c..eda7871e5ab0a 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -1,10 +1,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import ( - is_datetime64_dtype, - is_timedelta64_dtype, -) from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd @@ -233,9 +229,9 @@ def test_numpy_array(arr): def test_numpy_array_all_dtypes(any_numpy_dtype): ser = Series(dtype=any_numpy_dtype) result = ser.array - if is_datetime64_dtype(any_numpy_dtype): + if np.dtype(any_numpy_dtype).kind == "M": assert isinstance(result, DatetimeArray) - elif is_timedelta64_dtype(any_numpy_dtype): + elif np.dtype(any_numpy_dtype).kind == "m": assert isinstance(result, TimedeltaArray) else: assert isinstance(result, PandasArray) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index a3ed5fcf139da..1becf3b9843b7 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -11,15 +11,7 @@ from pandas._libs.tslibs import NaT from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.common import ( - is_complex_dtype, - is_datetime64_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, - is_scalar, - is_timedelta64_dtype, -) +from pandas.core.dtypes.common import is_scalar from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -329,7 +321,7 @@ def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype): fill_value = np.array([1], dtype=fill_dtype)[0] # filling datetime with anything but datetime casts to object - if is_datetime64_dtype(fill_dtype): + if fill_dtype.kind == "M": expected_dtype = dtype # for datetime dtypes, scalar values get cast to to_datetime64 exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() @@ -354,7 +346,7 @@ def test_maybe_promote_any_with_datetime64(any_numpy_dtype, fill_value): dtype = np.dtype(any_numpy_dtype) # filling datetime with anything but datetime casts to object - if is_datetime64_dtype(dtype): + if dtype.kind == "M": expected_dtype = dtype # for datetime dtypes, scalar values get cast to pd.Timestamp.value exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() @@ -403,7 +395,7 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype): fill_value = np.array([1], dtype=fill_dtype)[0] # filling timedelta with anything but timedelta casts to object - if is_timedelta64_dtype(fill_dtype): + if fill_dtype.kind == "m": expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() @@ -423,7 +415,7 @@ def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value): dtype = np.dtype(any_numpy_dtype) # filling anything but timedelta with timedelta casts to object - if is_timedelta64_dtype(dtype): + if dtype.kind == "m": expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() @@ -497,8 +489,8 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture): # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) # this is the existing behavior in maybe_promote, # hinges on is_valid_na_for_dtype - if dtype.kind in ["i", "u", "f", "c"]: - if dtype.kind in ["i", "u"]: + if dtype.kind in "iufc": + if dtype.kind in "iu": expected_dtype = np.dtype(np.float64) else: expected_dtype = dtype @@ -506,11 +498,11 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture): else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value - elif is_integer_dtype(dtype) and fill_value is not NaT: + elif dtype.kind in "iu" and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 exp_val_for_scalar = np.nan - elif is_object_dtype(dtype) and fill_value is NaT: + elif dtype == object and fill_value is NaT: # inserting into object does not cast the value # but *does* cast None to np.nan expected_dtype = np.dtype(object) @@ -523,7 +515,7 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture): # NaT upcasts everything that's not datetime/timedelta to object expected_dtype = np.dtype(object) exp_val_for_scalar = NaT - elif is_float_dtype(dtype) or is_complex_dtype(dtype): + elif dtype.kind in "fc": # float / complex + missing value (!= NaT) stays the same expected_dtype = dtype exp_val_for_scalar = np.nan diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index 7da6bc8a32013..b96fe41c26c3e 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import is_integer_dtype + from pandas import ( DataFrame, Index, @@ -36,7 +38,7 @@ def test_size_axis_1(df, axis_1, by, sort, dropna): expected = Series(counts, dtype="int64") if sort: expected = expected.sort_index() - if tm.is_integer_dtype(expected.index) and not any(x is None for x in by): + if is_integer_dtype(expected.index.dtype) and not any(x is None for x in by): expected.index = expected.index.astype(np.int_) msg = "DataFrame.groupby with axis=1 is deprecated" diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index a93336b298c81..49f5ad47f6701 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -4,10 +4,9 @@ import numpy as np import pytest -from pandas.core.dtypes.common import ( - ensure_platform_int, - is_timedelta64_dtype, -) +from pandas._libs import lib + +from pandas.core.dtypes.common import ensure_platform_int import pandas as pd from pandas import ( @@ -337,10 +336,10 @@ def test_transform_casting(): ) result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff()) - assert is_timedelta64_dtype(result.dtype) + assert lib.is_np_dtype(result.dtype, "m") result = df[["ID3", "DATETIME"]].groupby("ID3").transform(lambda x: x.diff()) - assert is_timedelta64_dtype(result.DATETIME.dtype) + assert lib.is_np_dtype(result.DATETIME.dtype, "m") def test_transform_multiple(ts): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d0b7492f8d9ba..ee5dedb47e33c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -36,8 +36,6 @@ from pandas._libs import lib import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_datetime64_dtype - import pandas as pd from pandas import ( DataFrame, @@ -1924,7 +1922,7 @@ def test_datetime_with_timezone(self, request): def check(col): # check that a column is either datetime64[ns] # or datetime64[ns, UTC] - if is_datetime64_dtype(col.dtype): + if lib.is_np_dtype(col.dtype, "M"): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" assert col[0] == Timestamp("2000-01-01 08:00:00") diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 948e1e626aa5e..be53209d889ee 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -9,10 +9,9 @@ import numpy as np import pytest +from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency -from pandas.core.dtypes.common import is_datetime64_dtype - import pandas as pd from pandas import ( Categorical, @@ -905,7 +904,7 @@ def test_none_comparison(request, series_with_simple_index): assert result.iat[0] assert result.iat[1] - if is_datetime64_dtype(series.dtype) or isinstance(series.dtype, DatetimeTZDtype): + if lib.is_np_dtype(series.dtype, "M") or isinstance(series.dtype, DatetimeTZDtype): # Following DatetimeIndex (and Timestamp) convention, # inequality comparisons with Series[datetime64] raise msg = "Invalid comparison"