Skip to content

Commit 2ee3f05

Browse files
christopherzimmermanproost
authored andcommitted
GH28301 check for non-unique index in stack_multi_columns (pandas-dev#28336)
1 parent d4a6b8c commit 2ee3f05

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ Groupby/resample/rolling
205205
Reshaping
206206
^^^^^^^^^
207207

208-
-
208+
- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`)
209209
-
210210

211211
Sparse

pandas/core/reshape/reshape.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -725,8 +725,9 @@ def _convert_level_number(level_num, columns):
725725
new_names = list(this.index.names)
726726
new_codes = [lab.repeat(levsize) for lab in this.index.codes]
727727
else:
728-
new_levels = [this.index]
729-
new_codes = [np.arange(N).repeat(levsize)]
728+
old_codes, old_levels = _factorize_from_iterable(this.index)
729+
new_levels = [old_levels]
730+
new_codes = [old_codes.repeat(levsize)]
730731
new_names = [this.index.name] # something better?
731732

732733
new_levels.append(level_vals)

pandas/tests/frame/test_reshape.py

+21
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,27 @@ def test_stack_preserve_categorical_dtype_values(self):
10021002
)
10031003
tm.assert_series_equal(result, expected)
10041004

1005+
@pytest.mark.parametrize(
1006+
"index, columns",
1007+
[
1008+
([0, 0, 1, 1], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])),
1009+
([0, 0, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])),
1010+
([0, 1, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])),
1011+
],
1012+
)
1013+
def test_stack_multi_columns_non_unique_index(self, index, columns):
1014+
# GH-28301
1015+
df = pd.DataFrame(index=index, columns=columns).fillna(1)
1016+
stacked = df.stack()
1017+
new_index = pd.MultiIndex.from_tuples(stacked.index.to_numpy())
1018+
expected = pd.DataFrame(
1019+
stacked.to_numpy(), index=new_index, columns=stacked.columns
1020+
)
1021+
tm.assert_frame_equal(stacked, expected)
1022+
stacked_codes = np.asarray(stacked.index.codes)
1023+
expected_codes = np.asarray(new_index.codes)
1024+
tm.assert_numpy_array_equal(stacked_codes, expected_codes)
1025+
10051026
@pytest.mark.parametrize("level", [0, 1])
10061027
def test_unstack_mixed_extension_types(self, level):
10071028
index = pd.MultiIndex.from_tuples(

0 commit comments

Comments
 (0)