Skip to content

Commit 1c2ad16

Browse files
authored
PERF: concat (#52291)
1 parent 9ba4ef8 commit 1c2ad16

File tree

3 files changed

+29
-33
lines changed

3 files changed

+29
-33
lines changed

pandas/core/dtypes/concat.py

+19-13
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@
4141
)
4242

4343

44+
def _is_nonempty(x, axis) -> bool:
45+
# filter empty arrays
46+
# 1-d dtypes always are included here
47+
if x.ndim <= axis:
48+
return True
49+
return x.shape[axis] > 0
50+
51+
4452
def concat_compat(
4553
to_concat: Sequence[ArrayLike], axis: AxisInt = 0, ea_compat_axis: bool = False
4654
) -> ArrayLike:
@@ -79,36 +87,34 @@ def concat_compat(
7987
# e.g. DatetimeArray
8088
# NB: We are assuming here that ensure_wrapped_if_arraylike has
8189
# been called where relevant.
82-
return obj._concat_same_type(to_concat_eas, axis=axis)
83-
84-
# filter empty arrays
85-
# 1-d dtypes always are included here
86-
def is_nonempty(x) -> bool:
87-
if x.ndim <= axis:
88-
return True
89-
return x.shape[axis] > 0
90+
return obj._concat_same_type(
91+
# error: Unexpected keyword argument "axis" for "_concat_same_type"
92+
# of "ExtensionArray"
93+
to_concat_eas,
94+
axis=axis, # type: ignore[call-arg]
95+
)
9096

9197
# If all arrays are empty, there's nothing to convert, just short-cut to
9298
# the concatenation, #3121.
9399
#
94100
# Creating an empty array directly is tempting, but the winnings would be
95101
# marginal given that it would still require shape & dtype calculation and
96102
# np.concatenate which has them both implemented is compiled.
97-
non_empties = [x for x in to_concat if is_nonempty(x)]
103+
non_empties = [x for x in to_concat if _is_nonempty(x, axis)]
98104
if non_empties and axis == 0 and not ea_compat_axis:
99105
# ea_compat_axis see GH#39574
100106
to_concat = non_empties
101107

102108
dtypes = {obj.dtype for obj in to_concat}
103109
kinds = {obj.dtype.kind for obj in to_concat}
104110
contains_datetime = any(
105-
isinstance(dtype, (np.dtype, DatetimeTZDtype)) and dtype.kind in ["m", "M"]
111+
isinstance(dtype, (np.dtype, DatetimeTZDtype)) and dtype.kind in "mM"
106112
for dtype in dtypes
107113
) or any(isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat)
108114

109115
all_empty = not len(non_empties)
110-
single_dtype = len({x.dtype for x in to_concat}) == 1
111-
any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat)
116+
single_dtype = len(dtypes) == 1
117+
any_ea = any(isinstance(x, ExtensionDtype) for x in dtypes)
112118

113119
if contains_datetime:
114120
return _concat_datetime(to_concat, axis=axis)
@@ -345,7 +351,7 @@ def _concat_datetime(to_concat: Sequence[ArrayLike], axis: AxisInt = 0) -> Array
345351

346352
to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
347353

348-
single_dtype = len({x.dtype for x in to_concat}) == 1
354+
single_dtype = lib.dtypes_all_equal([x.dtype for x in to_concat])
349355

350356
# multiple types, need to coerce to object
351357
if not single_dtype:

pandas/core/reshape/concat.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -480,9 +480,7 @@ def __init__(
480480
else:
481481
# filter out the empties if we have not multi-index possibilities
482482
# note to keep empty Series as it affect to result columns / name
483-
non_empties = [
484-
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries)
485-
]
483+
non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1]
486484

487485
if len(non_empties) and (
488486
keys is None and names is None and levels is None and not self.intersect
@@ -495,19 +493,21 @@ def __init__(
495493
self.objs = objs
496494

497495
# Standardize axis parameter to int
498-
if isinstance(sample, ABCSeries):
496+
if sample.ndim == 1:
499497
from pandas import DataFrame
500498

501499
axis = DataFrame._get_axis_number(axis)
500+
self._is_frame = False
501+
self._is_series = True
502502
else:
503503
axis = sample._get_axis_number(axis)
504+
self._is_frame = True
505+
self._is_series = False
504506

505507
# Need to flip BlockManager axis in the DataFrame special case
506-
self._is_frame = isinstance(sample, ABCDataFrame)
507508
if self._is_frame:
508509
axis = sample._get_block_manager_axis(axis)
509510

510-
self._is_series = isinstance(sample, ABCSeries)
511511
if not 0 <= axis <= sample.ndim:
512512
raise AssertionError(
513513
f"axis must be between 0 and {sample.ndim}, input was {axis}"
@@ -583,7 +583,8 @@ def get_result(self):
583583
arrs = [ser._values for ser in self.objs]
584584

585585
res = concat_compat(arrs, axis=0)
586-
result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype)
586+
mgr = type(sample._mgr).from_array(res, index=self.new_axes[0])
587+
result = cons(mgr, name=name, fastpath=True)
587588
return result.__finalize__(self, method="concat")
588589

589590
# combine as columns in a frame
@@ -666,7 +667,7 @@ def _get_concat_axis(self) -> Index:
666667
num = 0
667668
has_names = False
668669
for i, x in enumerate(self.objs):
669-
if not isinstance(x, ABCSeries):
670+
if x.ndim != 1:
670671
raise TypeError(
671672
f"Cannot concatenate type 'Series' with "
672673
f"object of type '{type(x).__name__}'"

pandas/tests/extension/test_numpy.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -346,18 +346,7 @@ def test_fillna_frame(self, data_missing):
346346

347347

348348
class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
349-
@pytest.mark.parametrize(
350-
"in_frame",
351-
[
352-
True,
353-
pytest.param(
354-
False,
355-
marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"),
356-
),
357-
],
358-
)
359-
def test_concat(self, data, in_frame):
360-
super().test_concat(data, in_frame)
349+
pass
361350

362351

363352
class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):

0 commit comments

Comments
 (0)