pandas-dev · phofl · Oct 22, 2022 · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -97,13 +97,13 @@ class ConcatIndexDtype:
 
     params = (
         ["datetime64[ns]", "int64", "Int64", "string[python]", "string[pyarrow]"],
+        ["monotonic", "non_monotonic", "has_na"],
         [0, 1],
         [True, False],
-        [True, False],
     )
-    param_names = ["dtype", "axis", "sort", "is_monotonic"]
+    param_names = ["dtype", "structure", "axis", "sort"]
 
-    def setup(self, dtype, axis, sort, is_monotonic):
+    def setup(self, dtype, structure, axis, sort):
         N = 10_000
         if dtype == "datetime64[ns]":
             vals = date_range("1970-01-01", periods=N)
@@ -115,14 +115,21 @@ def setup(self, dtype, axis, sort, is_monotonic):
             raise NotImplementedError
 
         idx = Index(vals, dtype=dtype)
-        if is_monotonic:
+
+        if structure == "monotonic":
             idx = idx.sort_values()
-        else:
+        elif structure == "non_monotonic":
             idx = idx[::-1]
+        elif structure == "has_na":
+            if not idx._can_hold_na:
+                raise NotImplementedError
+            idx = Index([None], dtype=dtype).append(idx)
+        else:
+            raise NotImplementedError
 
-        self.series = [Series(i, idx[i:]) for i in range(5)]
+        self.series = [Series(i, idx[:-i]) for i in range(1, 6)]
 
-    def time_concat_series(self, dtype, axis, sort, is_monotonic):
+    def time_concat_series(self, dtype, structure, axis, sort):
         concat(self.series, axis=axis, sort=sort)
 
 

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -218,10 +218,10 @@ Performance improvements
 - Performance improvement in :func:`merge` and :meth:`DataFrame.join` when joining on a sorted :class:`MultiIndex` (:issue:`48504`)
 - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
 - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
+- Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
-- Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -102,7 +102,6 @@
         Categorical,
         DataFrame,
         Index,
-        MultiIndex,
         Series,
     )
     from pandas.core.arrays import (
@@ -1768,7 +1767,7 @@ def safe_sort(
     na_sentinel: int | None = -1,
     assume_unique: bool = False,
     verify: bool = True,
-) -> np.ndarray | MultiIndex | tuple[np.ndarray | MultiIndex, np.ndarray]:
+) -> AnyArrayLike | tuple[AnyArrayLike, np.ndarray]:
     """
     Sort ``values`` and reorder corresponding ``codes``.
 
@@ -1797,7 +1796,7 @@ def safe_sort(
 
     Returns
     -------
-    ordered : ndarray or MultiIndex
+    ordered : AnyArrayLike
         Sorted ``values``
     new_codes : ndarray
         Reordered ``codes``; returned when ``codes`` is not None.
@@ -1816,7 +1815,7 @@ def safe_sort(
             "Only list-like objects are allowed to be passed to safe_sort as values"
         )
 
-    if not isinstance(values, (np.ndarray, ABCExtensionArray, ABCMultiIndex)):
+    if not is_array_like(values):
         # don't convert to string types
         dtype, _ = infer_dtype_from_array(values)
         # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any],
@@ -1826,7 +1825,7 @@ def safe_sort(
         values = np.asarray(values, dtype=dtype)  # type: ignore[arg-type]
 
     sorter = None
-    ordered: np.ndarray | MultiIndex
+    ordered: AnyArrayLike
 
     if (
         not is_extension_array_dtype(values)
@@ -1894,15 +1893,19 @@ def safe_sort(
     return ordered, ensure_platform_int(new_codes)
 
 
-def _sort_mixed(values) -> np.ndarray:
+def _sort_mixed(values) -> AnyArrayLike:
     """order ints before strings before nulls in 1d arrays"""
     str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
     null_pos = np.array([isna(x) for x in values], dtype=bool)
-    nums = np.sort(values[~str_pos & ~null_pos])
-    strs = np.sort(values[str_pos])
-    return np.concatenate(
-        [nums, np.asarray(strs, dtype=object), np.array(values[null_pos])]
-    )
+    num_pos = ~str_pos & ~null_pos
+    str_argsort = np.argsort(values[str_pos])
+    num_argsort = np.argsort(values[num_pos])
+    # convert boolean arrays to positional indices, then order by underlying values
+    str_locs = str_pos.nonzero()[0].take(str_argsort)
+    num_locs = num_pos.nonzero()[0].take(num_argsort)
+    null_locs = null_pos.nonzero()[0]
+    locs = np.concatenate([num_locs, str_locs, null_locs])
+    return values.take(locs)
 
 
 def _sort_tuples(values: np.ndarray) -> np.ndarray:

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -190,7 +190,7 @@ def safe_sort_index(index: Index) -> Index:
     except TypeError:
         pass
     else:
-        if isinstance(array_sorted, MultiIndex):
+        if isinstance(array_sorted, Index):
             return array_sorted
 
         array_sorted = cast(np.ndarray, array_sorted)

diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py
@@ -252,7 +252,7 @@ def test_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
         tm.assert_index_equal(
             result.index.levels[0], pairwise_frames.index, check_names=False
         )
-        tm.assert_numpy_array_equal(
+        tm.assert_index_equal(
             safe_sort(result.index.levels[1]),
             safe_sort(pairwise_frames.columns.unique()),
         )
@@ -310,7 +310,7 @@ def test_pairwise_with_other(
         tm.assert_index_equal(
             result.index.levels[0], pairwise_frames.index, check_names=False
         )
-        tm.assert_numpy_array_equal(
+        tm.assert_index_equal(
             safe_sort(result.index.levels[1]),
             safe_sort(pairwise_other_frame.columns.unique()),
         )