Skip to content

Commit 44d2851

Browse files
phoflfeefladder
authored andcommitted
Bug in concat creating invalid MultiIndex (pandas-dev#42675)
1 parent 8183415 commit 44d2851

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ Groupby/resample/rolling
262262

263263
Reshaping
264264
^^^^^^^^^
265-
-
265+
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
266266
-
267267

268268
Sparse

pandas/core/reshape/concat.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -717,8 +717,9 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
717717
new_levels.extend(new_index.levels)
718718
new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])
719719
else:
720-
new_levels.append(new_index)
721-
new_codes.append(np.tile(np.arange(n), kpieces))
720+
new_levels.append(new_index.unique())
721+
single_codes = new_index.unique().get_indexer(new_index)
722+
new_codes.append(np.tile(single_codes, kpieces))
722723

723724
if len(new_names) < len(new_levels):
724725
new_names.extend(new_index.names)

pandas/tests/reshape/concat/test_dataframe.py

+12
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,15 @@ def test_concat_bool_with_int(self):
180180
result = concat([df1, df2])
181181
expected = concat([df1.astype("int64"), df2])
182182
tm.assert_frame_equal(result, expected)
183+
184+
def test_concat_duplicates_in_index_with_keys(self):
185+
# GH#42651
186+
index = [1, 1, 3]
187+
data = [1, 2, 3]
188+
189+
df = DataFrame(data=data, index=index)
190+
result = concat([df], keys=["A"], names=["ID", "date"])
191+
mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"])
192+
expected = DataFrame(data=data, index=mi)
193+
tm.assert_frame_equal(result, expected)
194+
tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date"))

0 commit comments

Comments
 (0)