diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 44691e4265f5b..5ee378f34b34e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -491,7 +491,6 @@ Reshaping - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`) - Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`) - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`) -- Bug in :meth:`DataFrame.stack` would incorrectly order results when ``sort=True`` and the input had :class:`MultiIndex` levels that were not sorted (:issue:`53636`) - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`) - diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f6ce9955bc2bc..3866d30e9c757 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -756,16 +756,7 @@ def _convert_level_number(level_num: int, columns: Index): level_vals = mi_cols.levels[-1] level_codes = unique(mi_cols.codes[-1]) if sort: - _, index, inverse = np.unique( - level_vals, return_index=True, return_inverse=True - ) - sorted_level_vals = np.take(level_vals, index) level_codes = np.sort(level_codes) - # Take level_codes according to where level_vals get sorted to, while - # also allowing for NA (-1) values - level_codes = np.where(level_codes == -1, -1, np.take(inverse, level_codes)) - else: - sorted_level_vals = level_vals level_vals_nan = level_vals.insert(len(level_vals), None) level_vals_used = np.take(level_vals_nan, level_codes) @@ -827,7 +818,7 @@ def _convert_level_number(level_num: int, columns: Index): new_codes = [old_codes.repeat(levsize)] new_names = [this.index.name] # something better? - new_levels.append(sorted_level_vals) + new_levels.append(level_vals) new_codes.append(np.tile(level_codes, N)) new_names.append(frame.columns.names[level_num]) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 13d93bb0a490d..6cc6534da0b87 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2000,20 +2000,18 @@ def __init__(self, *args, **kwargs) -> None: ), ) @pytest.mark.parametrize("stack_lev", range(2)) - @pytest.mark.parametrize("sort", [True, False]) - def test_stack_order_with_unsorted_levels(self, levels, stack_lev, sort): + def test_stack_order_with_unsorted_levels(self, levels, stack_lev): # GH#16323 # deep check for 1-row case columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) df = DataFrame(columns=columns, data=[range(4)]) - df_stacked = df.stack(stack_lev, sort=sort) - for row in df.index: - for col in df.columns: - expected = df.loc[row, col] - result_row = row, col[stack_lev] - result_col = col[1 - stack_lev] - result = df_stacked.loc[result_row, result_col] - assert result == expected + df_stacked = df.stack(stack_lev) + assert all( + df.loc[row, col] + == df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]] + for row in df.index + for col in df.columns + ) def test_stack_order_with_unsorted_levels_multi_row(self): # GH#16323 @@ -2032,26 +2030,6 @@ def test_stack_order_with_unsorted_levels_multi_row(self): for col in df.columns ) - def test_stack_order_with_unsorted_levels_multi_row_2(self): - # GH#53636 - levels = ((0, 1), (1, 0)) - stack_lev = 1 - columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - df = DataFrame(columns=columns, data=[range(4)], index=[1, 0, 2, 3]) - result = df.stack(stack_lev, sort=True) - expected_index = MultiIndex( - levels=[[0, 1, 2, 3], [0, 1]], - codes=[[1, 1, 0, 0, 2, 2, 3, 3], [1, 0, 1, 0, 1, 0, 1, 0]], - ) - expected = DataFrame( - { - 0: [0, 1, 0, 1, 0, 1, 0, 1], - 1: [2, 3, 2, 3, 2, 3, 2, 3], - }, - index=expected_index, - ) - tm.assert_frame_equal(result, expected) - def test_stack_unstack_unordered_multiindex(self): # GH# 18265 values = np.arange(5)