1
1
"""
2
2
Utility functions related to concat.
3
3
"""
4
- from typing import cast
4
+ from typing import Set , cast
5
5
6
6
import numpy as np
7
7
8
8
from pandas ._typing import ArrayLike , DtypeObj
9
9
10
10
from pandas .core .dtypes .cast import find_common_type
11
11
from pandas .core .dtypes .common import (
12
- is_bool_dtype ,
13
12
is_categorical_dtype ,
14
- is_datetime64_dtype ,
15
- is_datetime64tz_dtype ,
16
13
is_dtype_equal ,
17
14
is_extension_array_dtype ,
18
- is_object_dtype ,
19
15
is_sparse ,
20
- is_timedelta64_dtype ,
21
16
)
22
17
from pandas .core .dtypes .generic import ABCCategoricalIndex , ABCRangeIndex , ABCSeries
23
18
26
21
from pandas .core .construction import array
27
22
28
23
29
- def get_dtype_kinds (l ):
24
+ def _get_dtype_kinds (l ) -> Set [ str ] :
30
25
"""
31
26
Parameters
32
27
----------
33
28
l : list of arrays
34
29
35
30
Returns
36
31
-------
37
- a set of kinds that exist in this list of arrays
32
+ set[str]
33
+ A set of kinds that exist in this list of arrays.
38
34
"""
39
- typs = set ()
35
+ typs : Set [ str ] = set ()
40
36
for arr in l :
37
+ # Note: we use dtype.kind checks because they are much more performant
38
+ # than is_foo_dtype
41
39
42
40
dtype = arr .dtype
43
- if is_categorical_dtype ( dtype ):
44
- typ = "category"
45
- elif is_sparse ( dtype ):
46
- typ = "sparse"
41
+ if not isinstance ( dtype , np . dtype ):
42
+ # ExtensionDtype so we get
43
+ # e.g. "categorical", "datetime64[ns, US/Central]", "Sparse[itn64, 0]"
44
+ typ = str ( dtype )
47
45
elif isinstance (arr , ABCRangeIndex ):
48
46
typ = "range"
49
- elif is_datetime64tz_dtype (dtype ):
50
- # if to_concat contains different tz,
51
- # the result must be object dtype
52
- typ = str (dtype )
53
- elif is_datetime64_dtype (dtype ):
47
+ elif dtype .kind == "M" :
54
48
typ = "datetime"
55
- elif is_timedelta64_dtype ( dtype ) :
49
+ elif dtype . kind == "m" :
56
50
typ = "timedelta"
57
- elif is_object_dtype (dtype ):
58
- typ = "object"
59
- elif is_bool_dtype (dtype ):
60
- typ = "bool"
61
- elif is_extension_array_dtype (dtype ):
62
- typ = str (dtype )
51
+ elif dtype .kind in ["O" , "b" ]:
52
+ typ = str (dtype ) # i.e. "object", "bool"
63
53
else :
64
54
typ = dtype .kind
55
+
65
56
typs .add (typ )
66
57
return typs
67
58
@@ -140,7 +131,7 @@ def is_nonempty(x) -> bool:
140
131
if non_empties and axis == 0 :
141
132
to_concat = non_empties
142
133
143
- typs = get_dtype_kinds (to_concat )
134
+ typs = _get_dtype_kinds (to_concat )
144
135
_contains_datetime = any (typ .startswith ("datetime" ) for typ in typs )
145
136
146
137
all_empty = not len (non_empties )
@@ -161,13 +152,13 @@ def is_nonempty(x) -> bool:
161
152
return np .concatenate (to_concat )
162
153
163
154
elif _contains_datetime or "timedelta" in typs :
164
- return concat_datetime (to_concat , axis = axis , typs = typs )
155
+ return _concat_datetime (to_concat , axis = axis , typs = typs )
165
156
166
157
elif all_empty :
167
158
# we have all empties, but may need to coerce the result dtype to
168
159
# object if we have non-numeric type operands (numpy would otherwise
169
160
# cast this to float)
170
- typs = get_dtype_kinds (to_concat )
161
+ typs = _get_dtype_kinds (to_concat )
171
162
if len (typs ) != 1 :
172
163
173
164
if not len (typs - {"i" , "u" , "f" }) or not len (typs - {"bool" , "i" , "u" }):
@@ -361,7 +352,7 @@ def _concatenate_2d(to_concat, axis: int):
361
352
return np .concatenate (to_concat , axis = axis )
362
353
363
354
364
- def concat_datetime (to_concat , axis = 0 , typs = None ):
355
+ def _concat_datetime (to_concat , axis = 0 , typs = None ):
365
356
"""
366
357
provide concatenation of an datetimelike array of arrays each of which is a
367
358
single M8[ns], datetime64[ns, tz] or m8[ns] dtype
@@ -377,7 +368,7 @@ def concat_datetime(to_concat, axis=0, typs=None):
377
368
a single array, preserving the combined dtypes
378
369
"""
379
370
if typs is None :
380
- typs = get_dtype_kinds (to_concat )
371
+ typs = _get_dtype_kinds (to_concat )
381
372
382
373
to_concat = [_wrap_datetimelike (x ) for x in to_concat ]
383
374
single_dtype = len ({x .dtype for x in to_concat }) == 1
0 commit comments