Skip to content

PERF: dtype checks #52682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
is_sequence,
is_signed_integer_dtype,
is_unsigned_integer_dtype,
Expand Down Expand Up @@ -389,11 +388,11 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
dtype = pandas_dtype(dtype)
assert isinstance(dtype, np.dtype)

if is_integer_dtype(dtype):
if dtype.kind in "iu":
values = np.arange(k, dtype=dtype)
if is_unsigned_integer_dtype(dtype):
values += 2 ** (dtype.itemsize * 8 - 1)
elif is_float_dtype(dtype):
elif dtype.kind == "f":
values = np.random.random_sample(k) - np.random.random_sample(1)
values.sort()
values = values * (10 ** np.random.randint(0, 9))
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
if (
len(comps_array) > 1_000_000
and len(values) <= 26
and not is_object_dtype(comps_array)
and comps_array.dtype != object
):
# If the values include nan we need to check for nan explicitly
# since np.nan it not equal to np.nan
Expand Down Expand Up @@ -766,7 +766,7 @@ def factorize(
else:
values = np.asarray(values) # convert DTA/TDA/MultiIndex

if not use_na_sentinel and is_object_dtype(values):
if not use_na_sentinel and values.dtype == object:
# factorize can now handle differentiating various types of null values.
# These can only occur when the array has object dtype.
# However, for backwards compatibility we only use the null for the
Expand Down Expand Up @@ -1317,7 +1317,7 @@ def searchsorted(

if (
isinstance(arr, np.ndarray)
and is_integer_dtype(arr.dtype)
and arr.dtype.kind in "iu"
and (is_integer(value) or is_integer_dtype(value))
):
# if `arr` and `value` have different dtypes, `arr` would be
Expand Down
12 changes: 3 additions & 9 deletions pandas/core/array_algos/masked_accumulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@

import numpy as np

from pandas.core.dtypes.common import (
is_bool_dtype,
is_float_dtype,
is_integer_dtype,
)

if TYPE_CHECKING:
from pandas._typing import npt

Expand Down Expand Up @@ -46,11 +40,11 @@ def _cum_func(
Whether to skip NA.
"""
dtype_info: np.iinfo | np.finfo
if is_float_dtype(values):
if values.dtype.kind == "f":
dtype_info = np.finfo(values.dtype.type)
elif is_integer_dtype(values):
elif values.dtype.kind in "iu":
dtype_info = np.iinfo(values.dtype.type)
elif is_bool_dtype(values):
elif values.dtype.kind == "b":
# Max value of bool is 1, but since we are setting into a boolean
# array, 255 is fine as well. Min value has to be 0 when setting
# into the boolean array.
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
is_array_like,
is_bool_dtype,
is_integer,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_scalar,
Expand Down Expand Up @@ -363,9 +362,9 @@ def __getitem__(self, item: PositionalIndexer):
else:
pa_dtype = self._dtype.pyarrow_dtype
return type(self)(pa.chunked_array([], type=pa_dtype))
elif is_integer_dtype(item.dtype):
elif item.dtype.kind in "iu":
return self.take(item)
elif is_bool_dtype(item.dtype):
elif item.dtype.kind == "b":
return type(self)(self._pa_array.filter(item))
else:
raise IndexError(
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
elif isinstance(dtype, ExtensionDtype):
return super().astype(dtype, copy=copy)

elif is_integer_dtype(dtype) and self.isna().any():
elif dtype.kind in "iu" and self.isna().any():
raise ValueError("Cannot convert float NaN to integer")

elif len(self.codes) == 0 or len(self.categories) == 0:
Expand Down Expand Up @@ -624,7 +624,7 @@ def _from_inferred_categories(
cats = to_datetime(inferred_categories, errors="coerce")
elif lib.is_np_dtype(dtype.categories.dtype, "m"):
cats = to_timedelta(inferred_categories, errors="coerce")
elif is_bool_dtype(dtype.categories):
elif is_bool_dtype(dtype.categories.dtype):
if true_values is None:
true_values = ["True", "TRUE", "true"]

Expand Down Expand Up @@ -708,7 +708,7 @@ def from_codes(
codes = codes.to_numpy(dtype=np.int64)
else:
codes = np.asarray(codes)
if len(codes) and not is_integer_dtype(codes):
if len(codes) and codes.dtype.kind not in "iu":
raise ValueError("codes need to be array-like integers")

if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@
is_all_strings,
is_datetime64_any_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
Expand Down Expand Up @@ -460,7 +459,7 @@ def astype(self, dtype, copy: bool = True):
return super().astype(dtype, copy=copy)
elif is_string_dtype(dtype):
return self._format_native_types()
elif is_integer_dtype(dtype):
elif dtype.kind in "iu":
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
values = self.asi8
Expand All @@ -473,7 +472,7 @@ def astype(self, dtype, copy: bool = True):
if copy:
values = values.copy()
return values
elif (dtype.kind in "mM" and self.dtype != dtype) or is_float_dtype(dtype):
elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f":
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
INT64_DTYPE,
is_bool_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_dtype_equal,
is_float_dtype,
is_object_dtype,
Expand Down Expand Up @@ -2190,11 +2189,11 @@ def objects_to_datetime64ns(
# is in UTC
# Return i8 values to denote unix timestamps
return result.view("i8"), tz_parsed
elif is_datetime64_dtype(result):
elif result.dtype.kind == "M":
# returning M8[ns] denotes wall-times; since tz is None
# the distinction is a thin one
return result, tz_parsed
elif is_object_dtype(result):
elif result.dtype == object:
# GH#23675 when called via `pd.to_datetime`, returning an object-dtype
# array is allowed. When called via `pd.DatetimeIndex`, we can
# only accept datetime64 dtype, so raise TypeError if object-dtype
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,9 @@ def _ensure_simple_new_inputs(
raise ValueError("closed keyword does not match dtype.closed")

# coerce dtypes to match if needed
if is_float_dtype(left) and is_integer_dtype(right):
if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype):
right = right.astype(left.dtype)
elif is_float_dtype(right) and is_integer_dtype(left):
elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype):
left = left.astype(right.dtype)

if type(left) != type(right):
Expand Down Expand Up @@ -1778,6 +1778,6 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:

if not hasattr(values, "dtype"):
values = np.asarray(values)
if is_integer_dtype(values) and values.dtype != np.int64:
if values.dtype.kind in "iu" and values.dtype != np.int64:
values = values.astype(np.int64)
return values
15 changes: 6 additions & 9 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from pandas.core.dtypes.common import (
is_bool,
is_bool_dtype,
is_datetime64_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -478,18 +477,18 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
na_value: float | np.datetime64 | lib.NoDefault

# coerce
if is_float_dtype(dtype):
if dtype.kind == "f":
# In astype, we consider dtype=float to also mean na_value=np.nan
na_value = np.nan
elif is_datetime64_dtype(dtype):
elif dtype.kind == "M":
na_value = np.datetime64("NaT")
else:
na_value = lib.no_default

# to_numpy will also raise, but we get somewhat nicer exception messages here
if is_integer_dtype(dtype) and self._hasna:
if dtype.kind in "iu" and self._hasna:
raise ValueError("cannot convert NA to integer")
if is_bool_dtype(dtype) and self._hasna:
if dtype.kind == "b" and self._hasna:
# careful: astype_nansafe converts np.nan to True
raise ValueError("cannot convert float NaN to bool")

Expand Down Expand Up @@ -789,10 +788,8 @@ def _maybe_mask_result(self, result, mask):

return BooleanArray(result, mask, copy=False)

elif (
isinstance(result.dtype, np.dtype)
and result.dtype.kind == "m"
and is_supported_unit(get_unit_from_dtype(result.dtype))
elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
# e.g. test_numeric_arr_mul_tdscalar_numexpr_path
from pandas.core.arrays import TimedeltaArray
Expand Down
13 changes: 5 additions & 8 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import (
is_bool_dtype,
is_float_dtype,
is_integer_dtype,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
Expand Down Expand Up @@ -171,24 +168,24 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
original = values
values = np.array(values, copy=copy)
inferred_type = None
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
if values.dtype == object or is_string_dtype(values.dtype):
inferred_type = lib.infer_dtype(values, skipna=True)
if inferred_type == "boolean" and dtype is None:
name = dtype_cls.__name__.strip("_")
raise TypeError(f"{values.dtype} cannot be converted to {name}")

elif is_bool_dtype(values) and checker(dtype):
elif values.dtype.kind == "b" and checker(dtype):
values = np.array(values, dtype=default_dtype, copy=copy)

elif not (is_integer_dtype(values) or is_float_dtype(values)):
elif values.dtype.kind not in "iuf":
name = dtype_cls.__name__.strip("_")
raise TypeError(f"{values.dtype} cannot be converted to {name}")

if values.ndim != 1:
raise TypeError("values must be a 1D list-like")

if mask is None:
if is_integer_dtype(values):
if values.dtype.kind in "iu":
# fastpath
mask = np.zeros(len(values), dtype=np.bool_)
else:
Expand All @@ -205,7 +202,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
else:
dtype = dtype.type

if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
if mask.all():
values = np.ones(values.shape, dtype=dtype)
else:
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,7 @@
from pandas.core.dtypes.common import (
ensure_object,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
is_period_dtype,
pandas_dtype,
)
Expand Down Expand Up @@ -915,7 +912,7 @@ def period_array(
"""
data_dtype = getattr(data, "dtype", None)

if is_datetime64_dtype(data_dtype):
if lib.is_np_dtype(data_dtype, "M"):
return PeriodArray._from_datetime64(data, freq)
if isinstance(data_dtype, PeriodDtype):
out = PeriodArray(data)
Expand All @@ -937,10 +934,10 @@ def period_array(
else:
dtype = None

if is_float_dtype(arrdata) and len(arrdata) > 0:
if arrdata.dtype.kind == "f" and len(arrdata) > 0:
raise TypeError("PeriodIndex does not allow floating point in construction")

if is_integer_dtype(arrdata.dtype):
if arrdata.dtype.kind in "iu":
arr = arrdata.astype(np.int64, copy=False)
# error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
# Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ def sequence_to_td64ns(
elif is_float_dtype(data.dtype):
# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
if is_extension_array_dtype(data):
if is_extension_array_dtype(data.dtype):
mask = data._mask
data = data._data
else:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
is_datetime64_ns_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_timedelta64_ns_dtype,
Expand Down Expand Up @@ -749,7 +748,7 @@ def _try_cast(
"""
is_ndarray = isinstance(arr, np.ndarray)

if is_object_dtype(dtype):
if dtype == object:
if not is_ndarray:
subarr = construct_1d_object_array_from_listlike(arr)
return subarr
Expand All @@ -773,7 +772,7 @@ def _try_cast(

# GH#15832: Check if we are requesting a numeric dtype and
# that we can convert the data to the requested dtype.
elif is_integer_dtype(dtype):
elif dtype.kind in "iu":
# this will raise if we have e.g. floats

subarr = maybe_cast_to_integer_array(arr, dtype)
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from pandas.core.dtypes.common import (
is_dtype_equal,
is_integer_dtype,
is_object_dtype,
is_string_dtype,
pandas_dtype,
Expand Down Expand Up @@ -99,10 +98,10 @@ def _astype_nansafe(
arr, skipna=skipna, convert_na_value=False
).reshape(shape)

elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
return _astype_float_to_int_nansafe(arr, dtype, copy)

elif is_object_dtype(arr.dtype):
elif arr.dtype == object:
# if we have a datetime/timedelta array of objects
# then coerce to datetime64[ns] and use DatetimeArray.astype

Expand Down Expand Up @@ -131,7 +130,7 @@ def _astype_nansafe(
)
raise ValueError(msg)

if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
if copy or arr.dtype == object or dtype == object:
# Explicit copy, or required since NumPy can't view from / to object.
return arr.astype(dtype, copy=True)

Expand Down
Loading