diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 3cc11974b14e5..14e0bc5e777d8 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -15,6 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed memory leak in :func:`read_csv` (:issue:`57039`) - Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`) +- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`) - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`) - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`) - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 560285bd57a22..15292953e72d0 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -295,6 +295,7 @@ def _find_common_index_dtype(inds): raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") if len(dtis) == len(indexes): + sort = True result = indexes[0] elif len(dtis) > 1: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 2558532bfb029..f03b408b34150 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -205,8 +205,10 @@ def concat( Check whether the new concatenated axis contains duplicates. This can be very expensive relative to the actual data concatenation. sort : bool, default False - Sort non-concatenation axis if it is not already aligned. - + Sort non-concatenation axis if it is not already aligned. One exception to + this is when the non-concatentation axis is a DatetimeIndex and join='outer' + and the axis is not already aligned. In that case, the non-concatenation + axis is always sorted lexicographically. copy : bool, default True If False, do not copy data unnecessarily. diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 77485788faa02..d7791ec38a7ae 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -73,23 +73,23 @@ def test_concat_datetime_timezone(self): exp_idx = DatetimeIndex( [ - "2010-12-31 23:00:00+00:00", - "2011-01-01 00:00:00+00:00", - "2011-01-01 01:00:00+00:00", "2010-12-31 15:00:00+00:00", "2010-12-31 16:00:00+00:00", "2010-12-31 17:00:00+00:00", + "2010-12-31 23:00:00+00:00", + "2011-01-01 00:00:00+00:00", + "2011-01-01 01:00:00+00:00", ] ).as_unit("ns") expected = DataFrame( [ - [1, np.nan], - [2, np.nan], - [3, np.nan], [np.nan, 1], [np.nan, 2], [np.nan, 3], + [1, np.nan], + [2, np.nan], + [3, np.nan], ], index=exp_idx, columns=["a", "b"],