diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 8ead78a17e9c2..2cf93667354d0 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -29,6 +29,7 @@ Bug fixes - Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) +- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`) - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index dd6aadf570baa..ec62192464665 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -20,7 +20,6 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, - is_categorical_dtype, is_extension_array_dtype, ) from pandas.core.dtypes.generic import ABCMultiIndex @@ -294,13 +293,7 @@ def lexsort_indexer( keys = [ensure_key_mapped(k, key) for k in keys] for k, order in zip(keys, orders): - # we are already a Categorical - if is_categorical_dtype(k): - cat = k - - # create the Categorical - else: - cat = Categorical(k, ordered=True) + cat = Categorical(k, ordered=True) if na_position not in ["last", "first"]: raise ValueError(f"invalid na_position: {na_position}") diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c60e7e3b1bdb6..0ca232ec433e7 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -691,3 +691,23 @@ def test_sort_values_key_dict_axis(self): result = df.sort_values(1, key=lambda col: -col, axis=1) expected = df.loc[:, ::-1] tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_sort_values_key_casts_to_categorical(self, ordered): + # https://github.com/pandas-dev/pandas/issues/36383 + categories = ["c", "b", "a"] + df = pd.DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) + + def sorter(key): + if key.name == "y": + return pd.Series( + pd.Categorical(key, categories=categories, ordered=ordered) + ) + return key + + result = df.sort_values(by=["x", "y"], key=sorter) + expected = pd.DataFrame( + {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0]) + ) + + tm.assert_frame_equal(result, expected)