Skip to content

Commit 2da7c34

Browse files
authored
PERF: get_dtype_kinds (#36309)
1 parent 21fe972 commit 2da7c34

File tree

2 files changed

+23
-32
lines changed

2 files changed

+23
-32
lines changed

pandas/core/dtypes/concat.py

+21-30
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
"""
22
Utility functions related to concat.
33
"""
4-
from typing import cast
4+
from typing import Set, cast
55

66
import numpy as np
77

88
from pandas._typing import ArrayLike, DtypeObj
99

1010
from pandas.core.dtypes.cast import find_common_type
1111
from pandas.core.dtypes.common import (
12-
is_bool_dtype,
1312
is_categorical_dtype,
14-
is_datetime64_dtype,
15-
is_datetime64tz_dtype,
1613
is_dtype_equal,
1714
is_extension_array_dtype,
18-
is_object_dtype,
1915
is_sparse,
20-
is_timedelta64_dtype,
2116
)
2217
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
2318

@@ -26,42 +21,38 @@
2621
from pandas.core.construction import array
2722

2823

29-
def get_dtype_kinds(l):
24+
def _get_dtype_kinds(l) -> Set[str]:
3025
"""
3126
Parameters
3227
----------
3328
l : list of arrays
3429
3530
Returns
3631
-------
37-
a set of kinds that exist in this list of arrays
32+
set[str]
33+
A set of kinds that exist in this list of arrays.
3834
"""
39-
typs = set()
35+
typs: Set[str] = set()
4036
for arr in l:
37+
# Note: we use dtype.kind checks because they are much more performant
38+
# than is_foo_dtype
4139

4240
dtype = arr.dtype
43-
if is_categorical_dtype(dtype):
44-
typ = "category"
45-
elif is_sparse(dtype):
46-
typ = "sparse"
41+
if not isinstance(dtype, np.dtype):
42+
# ExtensionDtype so we get
43+
# e.g. "categorical", "datetime64[ns, US/Central]", "Sparse[itn64, 0]"
44+
typ = str(dtype)
4745
elif isinstance(arr, ABCRangeIndex):
4846
typ = "range"
49-
elif is_datetime64tz_dtype(dtype):
50-
# if to_concat contains different tz,
51-
# the result must be object dtype
52-
typ = str(dtype)
53-
elif is_datetime64_dtype(dtype):
47+
elif dtype.kind == "M":
5448
typ = "datetime"
55-
elif is_timedelta64_dtype(dtype):
49+
elif dtype.kind == "m":
5650
typ = "timedelta"
57-
elif is_object_dtype(dtype):
58-
typ = "object"
59-
elif is_bool_dtype(dtype):
60-
typ = "bool"
61-
elif is_extension_array_dtype(dtype):
62-
typ = str(dtype)
51+
elif dtype.kind in ["O", "b"]:
52+
typ = str(dtype) # i.e. "object", "bool"
6353
else:
6454
typ = dtype.kind
55+
6556
typs.add(typ)
6657
return typs
6758

@@ -140,7 +131,7 @@ def is_nonempty(x) -> bool:
140131
if non_empties and axis == 0:
141132
to_concat = non_empties
142133

143-
typs = get_dtype_kinds(to_concat)
134+
typs = _get_dtype_kinds(to_concat)
144135
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
145136

146137
all_empty = not len(non_empties)
@@ -161,13 +152,13 @@ def is_nonempty(x) -> bool:
161152
return np.concatenate(to_concat)
162153

163154
elif _contains_datetime or "timedelta" in typs:
164-
return concat_datetime(to_concat, axis=axis, typs=typs)
155+
return _concat_datetime(to_concat, axis=axis, typs=typs)
165156

166157
elif all_empty:
167158
# we have all empties, but may need to coerce the result dtype to
168159
# object if we have non-numeric type operands (numpy would otherwise
169160
# cast this to float)
170-
typs = get_dtype_kinds(to_concat)
161+
typs = _get_dtype_kinds(to_concat)
171162
if len(typs) != 1:
172163

173164
if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}):
@@ -361,7 +352,7 @@ def _concatenate_2d(to_concat, axis: int):
361352
return np.concatenate(to_concat, axis=axis)
362353

363354

364-
def concat_datetime(to_concat, axis=0, typs=None):
355+
def _concat_datetime(to_concat, axis=0, typs=None):
365356
"""
366357
provide concatenation of an datetimelike array of arrays each of which is a
367358
single M8[ns], datetime64[ns, tz] or m8[ns] dtype
@@ -377,7 +368,7 @@ def concat_datetime(to_concat, axis=0, typs=None):
377368
a single array, preserving the combined dtypes
378369
"""
379370
if typs is None:
380-
typs = get_dtype_kinds(to_concat)
371+
typs = _get_dtype_kinds(to_concat)
381372

382373
to_concat = [_wrap_datetimelike(x) for x in to_concat]
383374
single_dtype = len({x.dtype for x in to_concat}) == 1

pandas/tests/dtypes/test_concat.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
)
4545
def test_get_dtype_kinds(index_or_series, to_concat, expected):
4646
to_concat_klass = [index_or_series(c) for c in to_concat]
47-
result = _concat.get_dtype_kinds(to_concat_klass)
47+
result = _concat._get_dtype_kinds(to_concat_klass)
4848
assert result == set(expected)
4949

5050

@@ -76,7 +76,7 @@ def test_get_dtype_kinds(index_or_series, to_concat, expected):
7676
],
7777
)
7878
def test_get_dtype_kinds_period(to_concat, expected):
79-
result = _concat.get_dtype_kinds(to_concat)
79+
result = _concat._get_dtype_kinds(to_concat)
8080
assert result == set(expected)
8181

8282

0 commit comments

Comments
 (0)