Skip to content

Commit ae049ae

Browse files
authored
BUG: concat fails if indexes are all the same and keys are not unique (#43596)
1 parent 9a4b0a0 commit ae049ae

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ Reshaping
486486
- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
487487
- Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`)
488488
- Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`)
489+
- Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`)
489490

490491
Sparse
491492
^^^^^^

pandas/core/reshape/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
695695
else:
696696
levels = [ensure_index(x) for x in levels]
697697

698-
if not all_indexes_same(indexes):
698+
if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):
699699
codes_list = []
700700

701701
# things are potentially different sizes, so compute the exact codes

pandas/tests/reshape/concat/test_concat.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,16 @@
33
deque,
44
)
55
from decimal import Decimal
6-
from warnings import catch_warnings
6+
from warnings import (
7+
catch_warnings,
8+
simplefilter,
9+
)
710

811
import numpy as np
912
import pytest
1013

14+
from pandas.errors import PerformanceWarning
15+
1116
import pandas as pd
1217
from pandas import (
1318
DataFrame,
@@ -560,6 +565,22 @@ def test_duplicate_keys(keys):
560565
tm.assert_frame_equal(result, expected)
561566

562567

568+
def test_duplicate_keys_same_frame():
569+
# GH 43595
570+
keys = ["e", "e"]
571+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
572+
result = concat([df, df], axis=1, keys=keys)
573+
expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]]
574+
expected_columns = MultiIndex.from_tuples(
575+
[(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")]
576+
)
577+
expected = DataFrame(expected_values, columns=expected_columns)
578+
with catch_warnings():
579+
# result.columns not sorted, resulting in performance warning
580+
simplefilter("ignore", PerformanceWarning)
581+
tm.assert_frame_equal(result, expected)
582+
583+
563584
@pytest.mark.parametrize(
564585
"obj",
565586
[

0 commit comments

Comments
 (0)