Skip to content

Commit 2028d9a

Browse files
authored
PERF: concat(axis=1) for objects with different indexes (#52541)
* PERF: concat(axis=1) for objects with different indexes * whatsnew * maybe sort
1 parent 0cb4d1b commit 2028d9a

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

asv_bench/benchmarks/join_merge.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,14 @@ def time_f_ordered(self, axis, ignore_index):
7373

7474
class ConcatIndexDtype:
7575
params = (
76-
["datetime64[ns]", "int64", "Int64", "string[python]", "string[pyarrow]"],
76+
[
77+
"datetime64[ns]",
78+
"int64",
79+
"Int64",
80+
"int64[pyarrow]",
81+
"string[python]",
82+
"string[pyarrow]",
83+
],
7784
["monotonic", "non_monotonic", "has_na"],
7885
[0, 1],
7986
[True, False],
@@ -84,7 +91,7 @@ def setup(self, dtype, structure, axis, sort):
8491
N = 10_000
8592
if dtype == "datetime64[ns]":
8693
vals = date_range("1970-01-01", periods=N)
87-
elif dtype in ("int64", "Int64"):
94+
elif dtype in ("int64", "Int64", "int64[pyarrow]"):
8895
vals = np.arange(N, dtype=np.int64)
8996
elif dtype in ("string[python]", "string[pyarrow]"):
9097
vals = tm.makeStringIndex(N)

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ Performance improvements
252252
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
253253
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
254254
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`)
255+
- Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`)
255256
-
256257

257258
.. ---------------------------------------------------------------------------

pandas/core/indexes/api.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
227227

228228
def _unique_indices(inds, dtype) -> Index:
229229
"""
230-
Convert indexes to lists and concatenate them, removing duplicates.
231-
232-
The final dtype is inferred.
230+
Concatenate indices and remove duplicates.
233231
234232
Parameters
235233
----------
@@ -240,6 +238,12 @@ def _unique_indices(inds, dtype) -> Index:
240238
-------
241239
Index
242240
"""
241+
if all(isinstance(ind, Index) for ind in inds):
242+
result = inds[0].append(inds[1:]).unique()
243+
result = result.astype(dtype, copy=False)
244+
if sort:
245+
result = result.sort_values()
246+
return result
243247

244248
def conv(i):
245249
if isinstance(i, Index):

0 commit comments

Comments
 (0)