From 368b84635d15a36fce6da0963e99fce32bd98303 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 Jun 2022 00:47:18 +0200 Subject: [PATCH] Backport PR #47206: REGR: concat not sorting columns for mixed column names --- doc/source/whatsnew/v1.4.3.rst | 1 + pandas/core/indexes/api.py | 11 ++++++++- pandas/tests/reshape/concat/test_dataframe.py | 23 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 54cad82366e43..5898e51ab5f52 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) +- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) - Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`) - Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`) - Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 922c344510375..1e740132e3464 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,9 @@ from __future__ import annotations import textwrap +from typing import cast + +import numpy as np from pandas._libs import ( NaT, @@ -10,6 +13,7 @@ from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.algorithms import safe_sort from pandas.core.indexes.base import ( Index, _new_Index, @@ -154,7 +158,12 @@ def _get_combined_index( if sort: try: - index = index.sort_values() + array_sorted = safe_sort(index) + array_sorted = cast(np.ndarray, array_sorted) + if isinstance(index, MultiIndex): + index = MultiIndex.from_tuples(array_sorted, names=index.names) + else: + index = Index(array_sorted, name=index.name) except TypeError: pass diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 01763926c6d89..1018fc2806fee 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -205,3 +205,26 @@ def test_concat_copies(self, axis, order, ignore_index): for arr in res._iter_column_arrays(): for arr2 in df._iter_column_arrays(): assert not np.shares_memory(arr, arr2) + + def test_outer_sort_columns(self): + # GH#47127 + df1 = DataFrame({"A": [0], "B": [1], 0: 1}) + df2 = DataFrame({"A": [100]}) + result = concat([df1, df2], ignore_index=True, join="outer", sort=True) + expected = DataFrame({0: [1.0, np.nan], "A": [0, 100], "B": [1.0, np.nan]}) + tm.assert_frame_equal(result, expected) + + def test_inner_sort_columns(self): + # GH#47127 + df1 = DataFrame({"A": [0], "B": [1], 0: 1}) + df2 = DataFrame({"A": [100], 0: 2}) + result = concat([df1, df2], ignore_index=True, join="inner", sort=True) + expected = DataFrame({0: [1, 2], "A": [0, 100]}) + tm.assert_frame_equal(result, expected) + + def test_sort_columns_one_df(self): + # GH#47127 + df1 = DataFrame({"A": [100], 0: 2}) + result = concat([df1], ignore_index=True, join="inner", sort=True) + expected = DataFrame({0: [2], "A": [100]}) + tm.assert_frame_equal(result, expected)