diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ae1844b0a913c..3fc29441c2c3b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -261,7 +261,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`) - Sparse diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index d908638c4706b..560735b593cd1 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -717,8 +717,9 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde new_levels.extend(new_index.levels) new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) else: - new_levels.append(new_index) - new_codes.append(np.tile(np.arange(n), kpieces)) + new_levels.append(new_index.unique()) + single_codes = new_index.unique().get_indexer(new_index) + new_codes.append(np.tile(single_codes, kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 91c246fc9ee2d..460546f4b478a 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -180,3 +180,15 @@ def test_concat_bool_with_int(self): result = concat([df1, df2]) expected = concat([df1.astype("int64"), df2]) tm.assert_frame_equal(result, expected) + + def test_concat_duplicates_in_index_with_keys(self): + # GH#42651 + index = [1, 1, 3] + data = [1, 2, 3] + + df = DataFrame(data=data, index=index) + result = concat([df], keys=["A"], names=["ID", "date"]) + mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"]) + expected = DataFrame(data=data, index=mi) + tm.assert_frame_equal(result, expected) + tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date"))