Skip to content

Commit b033602

Browse files
jbrockmendeltopper-123
authored andcommitted
PERF: slightly faster dtype.kind checks (pandas-dev#52387)
1 parent 8fcc137 commit b033602

36 files changed

+73
-74
lines changed

pandas/_libs/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ cpdef ndarray[object] ensure_string_array(
747747

748748
if hasattr(arr, "to_numpy"):
749749

750-
if hasattr(arr, "dtype") and arr.dtype.kind in ["m", "M"]:
750+
if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
751751
# dtype check to exclude DataFrame
752752
# GH#41409 TODO: not a great place for this
753753
out = arr.astype(str).astype(object)
@@ -2641,7 +2641,7 @@ def maybe_convert_objects(ndarray[object] objects,
26412641
dtype = dtype_if_all_nat
26422642
if cnp.PyArray_DescrCheck(dtype):
26432643
# i.e. isinstance(dtype, np.dtype)
2644-
if dtype.kind not in ["m", "M"]:
2644+
if dtype.kind not in "mM":
26452645
raise ValueError(dtype)
26462646
else:
26472647
res = np.empty((<object>objects).shape, dtype=dtype)

pandas/_libs/tslibs/np_datetime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def compare_mismatched_resolutions(ndarray left, ndarray right, op):
419419
array([ True])
420420
"""
421421

422-
if left.dtype.kind != right.dtype.kind or left.dtype.kind not in ["m", "M"]:
422+
if left.dtype.kind != right.dtype.kind or left.dtype.kind not in "mM":
423423
raise ValueError("left and right must both be timedelta64 or both datetime64")
424424

425425
cdef:

pandas/_libs/tslibs/timedeltas.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ def _binary_op_method_timedeltalike(op, name):
775775
item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
776776
return f(self, item)
777777

778-
elif other.dtype.kind in ["m", "M"]:
778+
elif other.dtype.kind in "mM":
779779
return op(self.to_timedelta64(), other)
780780
elif other.dtype.kind == "O":
781781
return np.array([op(self, x) for x in other])
@@ -2028,7 +2028,7 @@ class Timedelta(_Timedelta):
20282028
result[mask] = np.nan
20292029
return result
20302030

2031-
elif other.dtype.kind in ["i", "u", "f"]:
2031+
elif other.dtype.kind in "iuf":
20322032
if other.ndim == 0:
20332033
return self // other.item()
20342034
else:

pandas/_libs/tslibs/timestamps.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ cdef class _Timestamp(ABCTimestamp):
464464
raise integer_op_not_supported(self)
465465

466466
elif is_array(other):
467-
if other.dtype.kind in ["i", "u"]:
467+
if other.dtype.kind in "iu":
468468
raise integer_op_not_supported(self)
469469
if other.dtype.kind == "m":
470470
if self.tz is None:
@@ -496,7 +496,7 @@ cdef class _Timestamp(ABCTimestamp):
496496
return self + neg_other
497497

498498
elif is_array(other):
499-
if other.dtype.kind in ["i", "u"]:
499+
if other.dtype.kind in "iu":
500500
raise integer_op_not_supported(self)
501501
if other.dtype.kind == "m":
502502
if self.tz is None:

pandas/_testing/asserters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ def assert_interval_array_equal(
530530
_check_isinstance(left, right, IntervalArray)
531531

532532
kwargs = {}
533-
if left._left.dtype.kind in ["m", "M"]:
533+
if left._left.dtype.kind in "mM":
534534
# We have a DatetimeArray or TimedeltaArray
535535
kwargs["check_freq"] = False
536536

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ def factorize_array(
568568
uniques : ndarray
569569
"""
570570
original = values
571-
if values.dtype.kind in ["m", "M"]:
571+
if values.dtype.kind in "mM":
572572
# _get_hashtable_algo will cast dt64/td64 to i8 via _ensure_data, so we
573573
# need to do the same to na_value. We are assuming here that the passed
574574
# na_value is an appropriately-typed NaT.

pandas/core/array_algos/datetimelike_accumulations.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _cum_func(
5050
result = func(y)
5151
result[mask] = iNaT
5252

53-
if values.dtype.kind in ["m", "M"]:
53+
if values.dtype.kind in "mM":
5454
return result.view(values.dtype.base)
5555
return result
5656

pandas/core/array_algos/putmask.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
142142
if values.dtype == object:
143143
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
144144

145-
if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]:
145+
if isinstance(dtype, np.dtype) and dtype.kind in "mM":
146146
# https://github.com/numpy/numpy/issues/12550
147147
# timedelta64 will incorrectly cast to int
148148
if not is_list_like(other):

pandas/core/array_algos/quantile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def _nanpercentile(
180180
quantiles : scalar or array
181181
"""
182182

183-
if values.dtype.kind in ["m", "M"]:
183+
if values.dtype.kind in "mM":
184184
# need to cast to integer to avoid rounding errors in numpy
185185
result = _nanpercentile(
186186
values.view("i8"),

pandas/core/arrays/datetimelike.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
740740
if not hasattr(values, "dtype"):
741741
values = np.asarray(values)
742742

743-
if values.dtype.kind in ["f", "i", "u", "c"]:
743+
if values.dtype.kind in "fiuc":
744744
# TODO: de-duplicate with equals, validate_comparison_value
745745
return np.zeros(self.shape, dtype=bool)
746746

@@ -769,7 +769,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
769769
except ValueError:
770770
return isin(self.astype(object), values)
771771

772-
if self.dtype.kind in ["m", "M"]:
772+
if self.dtype.kind in "mM":
773773
self = cast("DatetimeArray | TimedeltaArray", self)
774774
values = values.as_unit(self.unit)
775775

@@ -1205,7 +1205,7 @@ def _sub_nat(self):
12051205
# For period dtype, timedelta64 is a close-enough return dtype.
12061206
result = np.empty(self.shape, dtype=np.int64)
12071207
result.fill(iNaT)
1208-
if self.dtype.kind in ["m", "M"]:
1208+
if self.dtype.kind in "mM":
12091209
# We can retain unit in dtype
12101210
self = cast("DatetimeArray| TimedeltaArray", self)
12111211
return result.view(f"timedelta64[{self.unit}]")

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ def _arith_method(self, other, op):
694694
else:
695695
# Make sure we do this before the "pow" mask checks
696696
# to get an expected exception message on shape mismatch.
697-
if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]:
697+
if self.dtype.kind in "iu" and op_name in ["floordiv", "mod"]:
698698
# TODO(GH#30188) ATM we don't match the behavior of non-masked
699699
# types with respect to floordiv-by-zero
700700
pd_op = op

pandas/core/arrays/numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __from_arrow__(
8080
# test_from_arrow_type_error raise for string, but allow
8181
# through itemsize conversion GH#31896
8282
rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
83-
if rt_dtype.kind not in ["i", "u", "f"]:
83+
if rt_dtype.kind not in "iuf":
8484
# Could allow "c" or potentially disallow float<->int conversion,
8585
# but at the moment we specifically test that uint<->int works
8686
raise TypeError(

pandas/core/arrays/numpy_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _validate_scalar(self, fill_value):
217217
return fill_value
218218

219219
def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
220-
if self.dtype.kind in ["i", "u", "b"]:
220+
if self.dtype.kind in "iub":
221221
fv = None
222222
else:
223223
fv = np.nan

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def _sparse_array_op(
228228
if (
229229
name in ["floordiv", "mod"]
230230
and (right == 0).any()
231-
and left.dtype.kind in ["i", "u"]
231+
and left.dtype.kind in "iu"
232232
):
233233
# Match the non-Sparse Series behavior
234234
opname = f"sparse_{name}_float64"

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ def _try_cast(
756756
return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)
757757

758758
elif dtype.kind == "U":
759-
# TODO: test cases with arr.dtype.kind in ["m", "M"]
759+
# TODO: test cases with arr.dtype.kind in "mM"
760760
if is_ndarray:
761761
arr = cast(np.ndarray, arr)
762762
shape = arr.shape
@@ -768,7 +768,7 @@ def _try_cast(
768768
shape
769769
)
770770

771-
elif dtype.kind in ["m", "M"]:
771+
elif dtype.kind in "mM":
772772
return maybe_cast_to_datetime(arr, dtype)
773773

774774
# GH#15832: Check if we are requesting a numeric dtype and

pandas/core/dtypes/astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _astype_nansafe(
8686
elif not isinstance(dtype, np.dtype): # pragma: no cover
8787
raise ValueError("dtype must be np.dtype or ExtensionDtype")
8888

89-
if arr.dtype.kind in ["m", "M"]:
89+
if arr.dtype.kind in "mM":
9090
from pandas.core.construction import ensure_wrapped_if_datetimelike
9191

9292
arr = ensure_wrapped_if_datetimelike(arr)

pandas/core/dtypes/cast.py

+18-19
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
210210
211211
Notes
212212
-----
213-
Caller is responsible for checking dtype.kind in ["m", "M"]
213+
Caller is responsible for checking dtype.kind in "mM"
214214
"""
215215
if is_valid_na_for_dtype(value, dtype):
216216
# GH#36541: can't fill array directly with pd.NaT
@@ -295,7 +295,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
295295

296296
# a datetimelike
297297
# GH12821, iNaT is cast to float
298-
if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]:
298+
if dtype.kind in "mM" and result.dtype.kind in "if":
299299
result = result.astype(dtype)
300300

301301
elif dtype.kind == "m" and result.dtype == _dtype_obj:
@@ -544,7 +544,7 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
544544
return _dtype_obj
545545
elif dtype.kind == "b":
546546
return _dtype_obj
547-
elif dtype.kind in ["i", "u"]:
547+
elif dtype.kind in "iu":
548548
return np.dtype(np.float64)
549549
return dtype
550550

@@ -623,8 +623,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
623623
dtype = _dtype_obj
624624
return dtype, fill_value
625625

626-
kinds = ["i", "u", "f", "c", "m", "M"]
627-
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds:
626+
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in "iufcmM":
628627
dtype = ensure_dtype_can_hold_na(dtype)
629628
fv = na_value_for_dtype(dtype)
630629
return dtype, fv
@@ -1196,7 +1195,7 @@ def maybe_cast_to_datetime(
11961195
from pandas.core.arrays.datetimes import DatetimeArray
11971196
from pandas.core.arrays.timedeltas import TimedeltaArray
11981197

1199-
assert dtype.kind in ["m", "M"]
1198+
assert dtype.kind in "mM"
12001199
if not is_list_like(value):
12011200
raise TypeError("value must be listlike")
12021201

@@ -1251,7 +1250,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
12511250
# i.e. datetime64tz
12521251
pass
12531252

1254-
elif dtype.kind in ["m", "M"]:
1253+
elif dtype.kind in "mM":
12551254
reso = get_unit_from_dtype(dtype)
12561255
if not is_supported_unit(reso):
12571256
# pre-2.0 we would silently swap in nanos for lower-resolutions,
@@ -1294,7 +1293,7 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
12941293

12951294
if (
12961295
isinstance(left, np.ndarray)
1297-
and left.dtype.kind in ["i", "u", "c"]
1296+
and left.dtype.kind in "iuc"
12981297
and (lib.is_integer(right) or lib.is_float(right))
12991298
):
13001299
# e.g. with int8 dtype and right=512, we want to end up with
@@ -1335,7 +1334,7 @@ def common_dtype_categorical_compat(
13351334
# GH#38240
13361335

13371336
# TODO: more generally, could do `not can_hold_na(dtype)`
1338-
if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]:
1337+
if isinstance(dtype, np.dtype) and dtype.kind in "iu":
13391338
for obj in objs:
13401339
# We don't want to accientally allow e.g. "categorical" str here
13411340
obj_dtype = getattr(obj, "dtype", None)
@@ -1429,7 +1428,7 @@ def construct_2d_arraylike_from_scalar(
14291428
) -> np.ndarray:
14301429
shape = (length, width)
14311430

1432-
if dtype.kind in ["m", "M"]:
1431+
if dtype.kind in "mM":
14331432
value = _maybe_box_and_unbox_datetimelike(value, dtype)
14341433
elif dtype == _dtype_obj:
14351434
if isinstance(value, (np.timedelta64, np.datetime64)):
@@ -1486,13 +1485,13 @@ def construct_1d_arraylike_from_scalar(
14861485
if length and is_integer_dtype(dtype) and isna(value):
14871486
# coerce if we have nan for an integer dtype
14881487
dtype = np.dtype("float64")
1489-
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
1488+
elif isinstance(dtype, np.dtype) and dtype.kind in "US":
14901489
# we need to coerce to object dtype to avoid
14911490
# to allow numpy to take our string as a scalar value
14921491
dtype = np.dtype("object")
14931492
if not isna(value):
14941493
value = ensure_str(value)
1495-
elif dtype.kind in ["M", "m"]:
1494+
elif dtype.kind in "mM":
14961495
value = _maybe_box_and_unbox_datetimelike(value, dtype)
14971496

14981497
subarr = np.empty(length, dtype=dtype)
@@ -1504,7 +1503,7 @@ def construct_1d_arraylike_from_scalar(
15041503

15051504

15061505
def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
1507-
# Caller is responsible for checking dtype.kind in ["m", "M"]
1506+
# Caller is responsible for checking dtype.kind in "mM"
15081507

15091508
if isinstance(value, dt.datetime):
15101509
# we dont want to box dt64, in particular datetime64("NaT")
@@ -1642,7 +1641,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16421641
f"To cast anyway, use pd.Series(values).astype({dtype})"
16431642
)
16441643

1645-
if arr.dtype.kind in ["m", "M"]:
1644+
if arr.dtype.kind in "mM":
16461645
# test_constructor_maskedarray_nonfloat
16471646
raise TypeError(
16481647
f"Constructing a Series or DataFrame from {arr.dtype} values and "
@@ -1667,7 +1666,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
16671666
bool
16681667
"""
16691668
dtype = arr.dtype
1670-
if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]:
1669+
if not isinstance(dtype, np.dtype) or dtype.kind in "mM":
16711670
if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)):
16721671
# np.dtype here catches datetime64ns and timedelta64ns; we assume
16731672
# in this case that we have DatetimeArray/TimedeltaArray
@@ -1715,7 +1714,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
17151714

17161715
tipo = _maybe_infer_dtype_type(element)
17171716

1718-
if dtype.kind in ["i", "u"]:
1717+
if dtype.kind in "iu":
17191718
if isinstance(element, range):
17201719
if _dtype_can_hold_range(element, dtype):
17211720
return element
@@ -1731,7 +1730,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
17311730
raise LossySetitemError
17321731

17331732
if tipo is not None:
1734-
if tipo.kind not in ["i", "u"]:
1733+
if tipo.kind not in "iu":
17351734
if isinstance(element, np.ndarray) and element.dtype.kind == "f":
17361735
# If all can be losslessly cast to integers, then we can hold them
17371736
with np.errstate(invalid="ignore"):
@@ -1783,7 +1782,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
17831782

17841783
if tipo is not None:
17851784
# TODO: itemsize check?
1786-
if tipo.kind not in ["f", "i", "u"]:
1785+
if tipo.kind not in "iuf":
17871786
# Anything other than float/integer we cannot hold
17881787
raise LossySetitemError
17891788
if not isinstance(tipo, np.dtype):
@@ -1819,7 +1818,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18191818
raise LossySetitemError
18201819

18211820
if tipo is not None:
1822-
if tipo.kind in ["c", "f", "i", "u"]:
1821+
if tipo.kind in "iufc":
18231822
return element
18241823
raise LossySetitemError
18251824
raise LossySetitemError

pandas/core/dtypes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1376,7 +1376,7 @@ def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
13761376
Checks only for dtype objects, not dtype-castable strings or types.
13771377
"""
13781378
return isinstance(dtype, ExtensionDtype) or (
1379-
isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]
1379+
isinstance(dtype, np.dtype) and dtype.kind in "mM"
13801380
)
13811381

13821382

pandas/core/dtypes/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def concat_compat(
155155
# "Sequence[Union[ExtensionArray, ndarray[Any, Any]]]"; expected
156156
# "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]]]"
157157
result: np.ndarray = np.concatenate(to_concat, axis=axis) # type: ignore[arg-type]
158-
if "b" in kinds and result.dtype.kind in ["i", "u", "f"]:
158+
if "b" in kinds and result.dtype.kind in "iuf":
159159
# GH#39817 cast to object instead of casting bools to numeric
160160
result = result.astype(object, copy=False)
161161
return result

pandas/core/dtypes/dtypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ def _can_hold_na(self) -> bool:
11381138
raise NotImplementedError(
11391139
"_can_hold_na is not defined for partially-initialized IntervalDtype"
11401140
)
1141-
if subtype.kind in ["i", "u"]:
1141+
if subtype.kind in "iu":
11421142
return False
11431143
return True
11441144

@@ -1437,7 +1437,7 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
14371437
from pandas.core.arrays.boolean import BooleanDtype
14381438

14391439
return BooleanDtype()
1440-
elif dtype.kind in ["i", "u"]:
1440+
elif dtype.kind in "iu":
14411441
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
14421442

14431443
return INT_STR_TO_DTYPE[dtype.name]

0 commit comments

Comments
 (0)