Skip to content

Commit 3bb83b0

Browse files
authored
CLN: dtypes.concat (#39572)
1 parent 6b8cc98 commit 3bb83b0

File tree

2 files changed

+7
-118
lines changed

2 files changed

+7
-118
lines changed

pandas/core/dtypes/concat.py

+6-44
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Utility functions related to concat.
33
"""
4-
from typing import Set, cast
4+
from typing import cast
55

66
import numpy as np
77

@@ -14,49 +14,13 @@
1414
is_extension_array_dtype,
1515
is_sparse,
1616
)
17-
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
17+
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries
1818

1919
from pandas.core.arrays import ExtensionArray
2020
from pandas.core.arrays.sparse import SparseArray
2121
from pandas.core.construction import array, ensure_wrapped_if_datetimelike
2222

2323

24-
def _get_dtype_kinds(arrays) -> Set[str]:
25-
"""
26-
Parameters
27-
----------
28-
arrays : list of arrays
29-
30-
Returns
31-
-------
32-
set[str]
33-
A set of kinds that exist in this list of arrays.
34-
"""
35-
typs: Set[str] = set()
36-
for arr in arrays:
37-
# Note: we use dtype.kind checks because they are much more performant
38-
# than is_foo_dtype
39-
40-
dtype = arr.dtype
41-
if not isinstance(dtype, np.dtype):
42-
# ExtensionDtype so we get
43-
# e.g. "categorical", "datetime64[ns, US/Central]", "Sparse[itn64, 0]"
44-
typ = str(dtype)
45-
elif isinstance(arr, ABCRangeIndex):
46-
typ = "range"
47-
elif dtype.kind == "M":
48-
typ = "datetime"
49-
elif dtype.kind == "m":
50-
typ = "timedelta"
51-
elif dtype.kind in ["O", "b"]:
52-
typ = str(dtype) # i.e. "object", "bool"
53-
else:
54-
typ = dtype.kind
55-
56-
typs.add(typ)
57-
return typs
58-
59-
6024
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
6125
"""
6226
Helper function for `arr.astype(common_dtype)` but handling all special
@@ -130,8 +94,7 @@ def is_nonempty(x) -> bool:
13094
if non_empties and axis == 0:
13195
to_concat = non_empties
13296

133-
typs = _get_dtype_kinds(to_concat)
134-
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
97+
kinds = {obj.dtype.kind for obj in to_concat}
13598

13699
all_empty = not len(non_empties)
137100
single_dtype = len({x.dtype for x in to_concat}) == 1
@@ -150,17 +113,16 @@ def is_nonempty(x) -> bool:
150113
else:
151114
return np.concatenate(to_concat)
152115

153-
elif _contains_datetime or "timedelta" in typs:
116+
elif any(kind in ["m", "M"] for kind in kinds):
154117
return _concat_datetime(to_concat, axis=axis)
155118

156119
elif all_empty:
157120
# we have all empties, but may need to coerce the result dtype to
158121
# object if we have non-numeric type operands (numpy would otherwise
159122
# cast this to float)
160-
typs = _get_dtype_kinds(to_concat)
161-
if len(typs) != 1:
123+
if len(kinds) != 1:
162124

163-
if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}):
125+
if not len(kinds - {"i", "u", "f"}) or not len(kinds - {"b", "i", "u"}):
164126
# let numpy coerce
165127
pass
166128
else:

pandas/tests/dtypes/test_concat.py

+1-74
Original file line numberDiff line numberDiff line change
@@ -3,83 +3,10 @@
33
import pandas.core.dtypes.concat as _concat
44

55
import pandas as pd
6-
from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex
6+
from pandas import Series
77
import pandas._testing as tm
88

99

10-
@pytest.mark.parametrize(
11-
"to_concat, expected",
12-
[
13-
# int/float/str
14-
([["a"], [1, 2]], ["i", "object"]),
15-
([[3, 4], [1, 2]], ["i"]),
16-
([[3, 4], [1, 2.1]], ["i", "f"]),
17-
# datetimelike
18-
([DatetimeIndex(["2011-01-01"]), DatetimeIndex(["2011-01-02"])], ["datetime"]),
19-
([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 days"])], ["timedelta"]),
20-
# datetimelike object
21-
(
22-
[
23-
DatetimeIndex(["2011-01-01"]),
24-
DatetimeIndex(["2011-01-02"], tz="US/Eastern"),
25-
],
26-
["datetime", "datetime64[ns, US/Eastern]"],
27-
),
28-
(
29-
[
30-
DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"),
31-
DatetimeIndex(["2011-01-02"], tz="US/Eastern"),
32-
],
33-
["datetime64[ns, Asia/Tokyo]", "datetime64[ns, US/Eastern]"],
34-
),
35-
([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 hours"])], ["timedelta"]),
36-
(
37-
[
38-
DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"),
39-
TimedeltaIndex(["1 days"]),
40-
],
41-
["datetime64[ns, Asia/Tokyo]", "timedelta"],
42-
),
43-
],
44-
)
45-
def test_get_dtype_kinds(index_or_series, to_concat, expected):
46-
to_concat_klass = [index_or_series(c) for c in to_concat]
47-
result = _concat._get_dtype_kinds(to_concat_klass)
48-
assert result == set(expected)
49-
50-
51-
@pytest.mark.parametrize(
52-
"to_concat, expected",
53-
[
54-
(
55-
[PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="M")],
56-
["period[M]"],
57-
),
58-
(
59-
[
60-
Series([Period("2011-01", freq="M")]),
61-
Series([Period("2011-02", freq="M")]),
62-
],
63-
["period[M]"],
64-
),
65-
(
66-
[PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="D")],
67-
["period[M]", "period[D]"],
68-
),
69-
(
70-
[
71-
Series([Period("2011-01", freq="M")]),
72-
Series([Period("2011-02", freq="D")]),
73-
],
74-
["period[M]", "period[D]"],
75-
),
76-
],
77-
)
78-
def test_get_dtype_kinds_period(to_concat, expected):
79-
result = _concat._get_dtype_kinds(to_concat)
80-
assert result == set(expected)
81-
82-
8310
def test_concat_mismatched_categoricals_with_empty():
8411
# concat_compat behavior on series._values should match pd.concat on series
8512
ser1 = Series(["a", "b", "c"], dtype="category")

0 commit comments

Comments
 (0)