Skip to content

Commit 6a0acd9

Browse files
authored
REF: push concat logic out of internals and into concat_compat (#33110)
1 parent 053c207 commit 6a0acd9

File tree

4 files changed

+29
-28
lines changed

4 files changed

+29
-28
lines changed

pandas/core/dtypes/concat.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ def is_nonempty(x) -> bool:
9797
# Creating an empty array directly is tempting, but the winnings would be
9898
# marginal given that it would still require shape & dtype calculation and
9999
# np.concatenate which has them both implemented is compiled.
100+
non_empties = [x for x in to_concat if is_nonempty(x)]
101+
if non_empties and axis == 0:
102+
to_concat = non_empties
100103

101104
typs = get_dtype_kinds(to_concat)
102105
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
@@ -114,10 +117,17 @@ def is_nonempty(x) -> bool:
114117
elif "sparse" in typs:
115118
return _concat_sparse(to_concat, axis=axis, typs=typs)
116119

117-
all_empty = all(not is_nonempty(x) for x in to_concat)
118-
if any(is_extension_array_dtype(x) for x in to_concat) and axis == 1:
120+
all_empty = not len(non_empties)
121+
single_dtype = len({x.dtype for x in to_concat}) == 1
122+
any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
123+
124+
if any_ea and axis == 1:
119125
to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
120126

127+
elif any_ea and single_dtype and axis == 0:
128+
cls = type(to_concat[0])
129+
return cls._concat_same_type(to_concat)
130+
121131
if all_empty:
122132
# we have all empties, but may need to coerce the result dtype to
123133
# object if we have non-numeric type operands (numpy would otherwise

pandas/core/internals/managers.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -1649,21 +1649,11 @@ def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager":
16491649
-------
16501650
SingleBlockManager
16511651
"""
1652-
non_empties = [x for x in to_concat if len(x) > 0]
16531652

1654-
# check if all series are of the same block type:
1655-
if len(non_empties) > 0:
1656-
blocks = [obj.blocks[0] for obj in non_empties]
1657-
if len({b.dtype for b in blocks}) == 1:
1658-
new_block = blocks[0].concat_same_type(blocks)
1659-
else:
1660-
values = [x.values for x in blocks]
1661-
values = concat_compat(values)
1662-
new_block = make_block(values, placement=slice(0, len(values), 1))
1663-
else:
1664-
values = [x._block.values for x in to_concat]
1665-
values = concat_compat(values)
1666-
new_block = make_block(values, placement=slice(0, len(values), 1))
1653+
blocks = [obj.blocks[0] for obj in to_concat]
1654+
values = concat_compat([x.values for x in blocks])
1655+
1656+
new_block = make_block(values, placement=slice(0, len(values), 1))
16671657

16681658
mgr = SingleBlockManager(new_block, new_axis)
16691659
return mgr

pandas/tests/dtypes/test_concat.py

+12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import pandas.core.dtypes.concat as _concat
44

5+
import pandas as pd
56
from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex
7+
import pandas._testing as tm
68

79

810
@pytest.mark.parametrize(
@@ -76,3 +78,13 @@ def test_get_dtype_kinds(index_or_series, to_concat, expected):
7678
def test_get_dtype_kinds_period(to_concat, expected):
7779
result = _concat.get_dtype_kinds(to_concat)
7880
assert result == set(expected)
81+
82+
83+
def test_concat_mismatched_categoricals_with_empty():
84+
# concat_compat behavior on series._values should match pd.concat on series
85+
ser1 = Series(["a", "b", "c"], dtype="category")
86+
ser2 = Series([], dtype="category")
87+
88+
result = _concat.concat_compat([ser1._values, ser2._values])
89+
expected = pd.concat([ser1, ser2])._values
90+
tm.assert_categorical_equal(result, expected)

pandas/tests/extension/test_external_block.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas.core.internals import BlockManager, SingleBlockManager
5+
from pandas.core.internals import BlockManager
66
from pandas.core.internals.blocks import ExtensionBlock
77

88

@@ -33,17 +33,6 @@ def df():
3333
return pd.DataFrame(block_manager)
3434

3535

36-
def test_concat_series():
37-
# GH17728
38-
values = np.arange(3, dtype="int64")
39-
block = CustomBlock(values, placement=slice(0, 3))
40-
mgr = SingleBlockManager(block, pd.RangeIndex(3))
41-
s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True)
42-
43-
res = pd.concat([s, s])
44-
assert isinstance(res._data.blocks[0], CustomBlock)
45-
46-
4736
def test_concat_dataframe(df):
4837
# GH17728
4938
res = pd.concat([df, df])

0 commit comments

Comments
 (0)