diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 58892b316c940..e3db8edee8521 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -184,7 +184,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`) - Sparse diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 374de6156c807..c32ca47c19160 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -725,8 +725,9 @@ def _convert_level_number(level_num, columns): new_names = list(this.index.names) new_codes = [lab.repeat(levsize) for lab in this.index.codes] else: - new_levels = [this.index] - new_codes = [np.arange(N).repeat(levsize)] + old_codes, old_levels = _factorize_from_iterable(this.index) + new_levels = [old_levels] + new_codes = [old_codes.repeat(levsize)] new_names = [this.index.name] # something better? new_levels.append(level_vals) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 84e343f07f990..eb654be3f12e6 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1002,6 +1002,27 @@ def test_stack_preserve_categorical_dtype_values(self): ) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "index, columns", + [ + ([0, 0, 1, 1], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 0, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 1, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ], + ) + def test_stack_multi_columns_non_unique_index(self, index, columns): + # GH-28301 + df = pd.DataFrame(index=index, columns=columns).fillna(1) + stacked = df.stack() + new_index = pd.MultiIndex.from_tuples(stacked.index.to_numpy()) + expected = pd.DataFrame( + stacked.to_numpy(), index=new_index, columns=stacked.columns + ) + tm.assert_frame_equal(stacked, expected) + stacked_codes = np.asarray(stacked.index.codes) + expected_codes = np.asarray(new_index.codes) + tm.assert_numpy_array_equal(stacked_codes, expected_codes) + @pytest.mark.parametrize("level", [0, 1]) def test_unstack_mixed_extension_types(self, level): index = pd.MultiIndex.from_tuples(