Skip to content

PERF: slightly faster dtype.kind checks #52387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ cpdef ndarray[object] ensure_string_array(

if hasattr(arr, "to_numpy"):

if hasattr(arr, "dtype") and arr.dtype.kind in ["m", "M"]:
if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
# dtype check to exclude DataFrame
# GH#41409 TODO: not a great place for this
out = arr.astype(str).astype(object)
Expand Down Expand Up @@ -2641,7 +2641,7 @@ def maybe_convert_objects(ndarray[object] objects,
dtype = dtype_if_all_nat
if cnp.PyArray_DescrCheck(dtype):
# i.e. isinstance(dtype, np.dtype)
if dtype.kind not in ["m", "M"]:
if dtype.kind not in "mM":
raise ValueError(dtype)
else:
res = np.empty((<object>objects).shape, dtype=dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def compare_mismatched_resolutions(ndarray left, ndarray right, op):
array([ True])
"""

if left.dtype.kind != right.dtype.kind or left.dtype.kind not in ["m", "M"]:
if left.dtype.kind != right.dtype.kind or left.dtype.kind not in "mM":
raise ValueError("left and right must both be timedelta64 or both datetime64")

cdef:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ def _binary_op_method_timedeltalike(op, name):
item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
return f(self, item)

elif other.dtype.kind in ["m", "M"]:
elif other.dtype.kind in "mM":
return op(self.to_timedelta64(), other)
elif other.dtype.kind == "O":
return np.array([op(self, x) for x in other])
Expand Down Expand Up @@ -2028,7 +2028,7 @@ class Timedelta(_Timedelta):
result[mask] = np.nan
return result

elif other.dtype.kind in ["i", "u", "f"]:
elif other.dtype.kind in "iuf":
if other.ndim == 0:
return self // other.item()
else:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ cdef class _Timestamp(ABCTimestamp):
raise integer_op_not_supported(self)

elif is_array(other):
if other.dtype.kind in ["i", "u"]:
if other.dtype.kind in "iu":
raise integer_op_not_supported(self)
if other.dtype.kind == "m":
if self.tz is None:
Expand Down Expand Up @@ -496,7 +496,7 @@ cdef class _Timestamp(ABCTimestamp):
return self + neg_other

elif is_array(other):
if other.dtype.kind in ["i", "u"]:
if other.dtype.kind in "iu":
raise integer_op_not_supported(self)
if other.dtype.kind == "m":
if self.tz is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def assert_interval_array_equal(
_check_isinstance(left, right, IntervalArray)

kwargs = {}
if left._left.dtype.kind in ["m", "M"]:
if left._left.dtype.kind in "mM":
# We have a DatetimeArray or TimedeltaArray
kwargs["check_freq"] = False

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def factorize_array(
uniques : ndarray
"""
original = values
if values.dtype.kind in ["m", "M"]:
if values.dtype.kind in "mM":
# _get_hashtable_algo will cast dt64/td64 to i8 via _ensure_data, so we
# need to do the same to na_value. We are assuming here that the passed
# na_value is an appropriately-typed NaT.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/array_algos/datetimelike_accumulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _cum_func(
result = func(y)
result[mask] = iNaT

if values.dtype.kind in ["m", "M"]:
if values.dtype.kind in "mM":
return result.view(values.dtype.base)
return result

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
if values.dtype == object:
dtype, _ = infer_dtype_from(other, pandas_dtype=True)

if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]:
if isinstance(dtype, np.dtype) and dtype.kind in "mM":
# https://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
if not is_list_like(other):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def _nanpercentile(
quantiles : scalar or array
"""

if values.dtype.kind in ["m", "M"]:
if values.dtype.kind in "mM":
# need to cast to integer to avoid rounding errors in numpy
result = _nanpercentile(
values.view("i8"),
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
if not hasattr(values, "dtype"):
values = np.asarray(values)

if values.dtype.kind in ["f", "i", "u", "c"]:
if values.dtype.kind in "fiuc":
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)

Expand Down Expand Up @@ -769,7 +769,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
except ValueError:
return isin(self.astype(object), values)

if self.dtype.kind in ["m", "M"]:
if self.dtype.kind in "mM":
self = cast("DatetimeArray | TimedeltaArray", self)
values = values.as_unit(self.unit)

Expand Down Expand Up @@ -1194,7 +1194,7 @@ def _sub_nat(self):
# For period dtype, timedelta64 is a close-enough return dtype.
result = np.empty(self.shape, dtype=np.int64)
result.fill(iNaT)
if self.dtype.kind in ["m", "M"]:
if self.dtype.kind in "mM":
# We can retain unit in dtype
self = cast("DatetimeArray| TimedeltaArray", self)
return result.view(f"timedelta64[{self.unit}]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def _arith_method(self, other, op):
else:
# Make sure we do this before the "pow" mask checks
# to get an expected exception message on shape mismatch.
if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]:
if self.dtype.kind in "iu" and op_name in ["floordiv", "mod"]:
# TODO(GH#30188) ATM we don't match the behavior of non-masked
# types with respect to floordiv-by-zero
pd_op = op
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __from_arrow__(
# test_from_arrow_type_error raise for string, but allow
# through itemsize conversion GH#31896
rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
if rt_dtype.kind not in ["i", "u", "f"]:
if rt_dtype.kind not in "iuf":
# Could allow "c" or potentially disallow float<->int conversion,
# but at the moment we specifically test that uint<->int works
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def _validate_scalar(self, fill_value):
return fill_value

def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
if self.dtype.kind in ["i", "u", "b"]:
if self.dtype.kind in "iub":
fv = None
else:
fv = np.nan
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _sparse_array_op(
if (
name in ["floordiv", "mod"]
and (right == 0).any()
and left.dtype.kind in ["i", "u"]
and left.dtype.kind in "iu"
):
# Match the non-Sparse Series behavior
opname = f"sparse_{name}_float64"
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ def _try_cast(
return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

elif dtype.kind == "U":
# TODO: test cases with arr.dtype.kind in ["m", "M"]
# TODO: test cases with arr.dtype.kind in "mM"
if is_ndarray:
arr = cast(np.ndarray, arr)
shape = arr.shape
Expand All @@ -768,7 +768,7 @@ def _try_cast(
shape
)

elif dtype.kind in ["m", "M"]:
elif dtype.kind in "mM":
return maybe_cast_to_datetime(arr, dtype)

# GH#15832: Check if we are requesting a numeric dtype and
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _astype_nansafe(
elif not isinstance(dtype, np.dtype): # pragma: no cover
raise ValueError("dtype must be np.dtype or ExtensionDtype")

if arr.dtype.kind in ["m", "M"]:
if arr.dtype.kind in "mM":
from pandas.core.construction import ensure_wrapped_if_datetimelike

arr = ensure_wrapped_if_datetimelike(arr)
Expand Down
37 changes: 18 additions & 19 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:

Notes
-----
Caller is responsible for checking dtype.kind in ["m", "M"]
Caller is responsible for checking dtype.kind in "mM"
"""
if is_valid_na_for_dtype(value, dtype):
# GH#36541: can't fill array directly with pd.NaT
Expand Down Expand Up @@ -295,7 +295,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi

# a datetimelike
# GH12821, iNaT is cast to float
if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]:
if dtype.kind in "mM" and result.dtype.kind in "if":
result = result.astype(dtype)

elif dtype.kind == "m" and result.dtype == _dtype_obj:
Expand Down Expand Up @@ -544,7 +544,7 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
return _dtype_obj
elif dtype.kind == "b":
return _dtype_obj
elif dtype.kind in ["i", "u"]:
elif dtype.kind in "iu":
return np.dtype(np.float64)
return dtype

Expand Down Expand Up @@ -623,8 +623,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
dtype = _dtype_obj
return dtype, fill_value

kinds = ["i", "u", "f", "c", "m", "M"]
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds:
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in "iufcmM":
dtype = ensure_dtype_can_hold_na(dtype)
fv = na_value_for_dtype(dtype)
return dtype, fv
Expand Down Expand Up @@ -1196,7 +1195,7 @@ def maybe_cast_to_datetime(
from pandas.core.arrays.datetimes import DatetimeArray
from pandas.core.arrays.timedeltas import TimedeltaArray

assert dtype.kind in ["m", "M"]
assert dtype.kind in "mM"
if not is_list_like(value):
raise TypeError("value must be listlike")

Expand Down Expand Up @@ -1251,7 +1250,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
# i.e. datetime64tz
pass

elif dtype.kind in ["m", "M"]:
elif dtype.kind in "mM":
reso = get_unit_from_dtype(dtype)
if not is_supported_unit(reso):
# pre-2.0 we would silently swap in nanos for lower-resolutions,
Expand Down Expand Up @@ -1294,7 +1293,7 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:

if (
isinstance(left, np.ndarray)
and left.dtype.kind in ["i", "u", "c"]
and left.dtype.kind in "iuc"
and (lib.is_integer(right) or lib.is_float(right))
):
# e.g. with int8 dtype and right=512, we want to end up with
Expand Down Expand Up @@ -1335,7 +1334,7 @@ def common_dtype_categorical_compat(
# GH#38240

# TODO: more generally, could do `not can_hold_na(dtype)`
if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]:
if isinstance(dtype, np.dtype) and dtype.kind in "iu":
for obj in objs:
# We don't want to accientally allow e.g. "categorical" str here
obj_dtype = getattr(obj, "dtype", None)
Expand Down Expand Up @@ -1429,7 +1428,7 @@ def construct_2d_arraylike_from_scalar(
) -> np.ndarray:
shape = (length, width)

if dtype.kind in ["m", "M"]:
if dtype.kind in "mM":
value = _maybe_box_and_unbox_datetimelike(value, dtype)
elif dtype == _dtype_obj:
if isinstance(value, (np.timedelta64, np.datetime64)):
Expand Down Expand Up @@ -1486,13 +1485,13 @@ def construct_1d_arraylike_from_scalar(
if length and is_integer_dtype(dtype) and isna(value):
# coerce if we have nan for an integer dtype
dtype = np.dtype("float64")
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
elif isinstance(dtype, np.dtype) and dtype.kind in "US":
# we need to coerce to object dtype to avoid
# to allow numpy to take our string as a scalar value
dtype = np.dtype("object")
if not isna(value):
value = ensure_str(value)
elif dtype.kind in ["M", "m"]:
elif dtype.kind in "mM":
value = _maybe_box_and_unbox_datetimelike(value, dtype)

subarr = np.empty(length, dtype=dtype)
Expand All @@ -1504,7 +1503,7 @@ def construct_1d_arraylike_from_scalar(


def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
# Caller is responsible for checking dtype.kind in ["m", "M"]
# Caller is responsible for checking dtype.kind in "mM"

if isinstance(value, dt.datetime):
# we dont want to box dt64, in particular datetime64("NaT")
Expand Down Expand Up @@ -1642,7 +1641,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
f"To cast anyway, use pd.Series(values).astype({dtype})"
)

if arr.dtype.kind in ["m", "M"]:
if arr.dtype.kind in "mM":
# test_constructor_maskedarray_nonfloat
raise TypeError(
f"Constructing a Series or DataFrame from {arr.dtype} values and "
Expand All @@ -1667,7 +1666,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
bool
"""
dtype = arr.dtype
if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]:
if not isinstance(dtype, np.dtype) or dtype.kind in "mM":
if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)):
# np.dtype here catches datetime64ns and timedelta64ns; we assume
# in this case that we have DatetimeArray/TimedeltaArray
Expand Down Expand Up @@ -1715,7 +1714,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:

tipo = _maybe_infer_dtype_type(element)

if dtype.kind in ["i", "u"]:
if dtype.kind in "iu":
if isinstance(element, range):
if _dtype_can_hold_range(element, dtype):
return element
Expand All @@ -1731,7 +1730,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
raise LossySetitemError

if tipo is not None:
if tipo.kind not in ["i", "u"]:
if tipo.kind not in "iu":
if isinstance(element, np.ndarray) and element.dtype.kind == "f":
# If all can be losslessly cast to integers, then we can hold them
with np.errstate(invalid="ignore"):
Expand Down Expand Up @@ -1783,7 +1782,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:

if tipo is not None:
# TODO: itemsize check?
if tipo.kind not in ["f", "i", "u"]:
if tipo.kind not in "iuf":
# Anything other than float/integer we cannot hold
raise LossySetitemError
if not isinstance(tipo, np.dtype):
Expand Down Expand Up @@ -1819,7 +1818,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
raise LossySetitemError

if tipo is not None:
if tipo.kind in ["c", "f", "i", "u"]:
if tipo.kind in "iufc":
return element
raise LossySetitemError
raise LossySetitemError
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,7 @@ def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
Checks only for dtype objects, not dtype-castable strings or types.
"""
return isinstance(dtype, ExtensionDtype) or (
isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]
isinstance(dtype, np.dtype) and dtype.kind in "mM"
)


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def concat_compat(
# "Sequence[Union[ExtensionArray, ndarray[Any, Any]]]"; expected
# "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]]]"
result: np.ndarray = np.concatenate(to_concat, axis=axis) # type: ignore[arg-type]
if "b" in kinds and result.dtype.kind in ["i", "u", "f"]:
if "b" in kinds and result.dtype.kind in "iuf":
# GH#39817 cast to object instead of casting bools to numeric
result = result.astype(object, copy=False)
return result
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ def _can_hold_na(self) -> bool:
raise NotImplementedError(
"_can_hold_na is not defined for partially-initialized IntervalDtype"
)
if subtype.kind in ["i", "u"]:
if subtype.kind in "iu":
return False
return True

Expand Down Expand Up @@ -1437,7 +1437,7 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
from pandas.core.arrays.boolean import BooleanDtype

return BooleanDtype()
elif dtype.kind in ["i", "u"]:
elif dtype.kind in "iu":
from pandas.core.arrays.integer import INT_STR_TO_DTYPE

return INT_STR_TO_DTYPE[dtype.name]
Expand Down
Loading