Skip to content

Commit 8ebb66f

Browse files
MarcoGorellirhshadrach
authored andcommitted
BUG: can't concatenate DataFrame with Series with duplicate keys (pandas-dev#33805)
1 parent eb5c52d commit 8ebb66f

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ Reshaping
724724
- Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`)
725725
- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`)
726726
- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
727+
- Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`)
727728
- Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`)
728729

729730

pandas/core/reshape/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -619,10 +619,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
619619
for hlevel, level in zip(zipped, levels):
620620
to_concat = []
621621
for key, index in zip(hlevel, indexes):
622-
try:
623-
i = level.get_loc(key)
624-
except KeyError as err:
625-
raise ValueError(f"Key {key} not in level {level}") from err
622+
mask = level == key
623+
if not mask.any():
624+
raise ValueError(f"Key {key} not in level {level}")
625+
i = np.nonzero(level == key)[0][0]
626626

627627
to_concat.append(np.repeat(i, len(index)))
628628
codes_list.append(np.concatenate(to_concat))

pandas/tests/reshape/test_concat.py

+15
Original file line numberDiff line numberDiff line change
@@ -2802,3 +2802,18 @@ def test_concat_multiindex_datetime_object_index():
28022802
)
28032803
result = concat([s, s2], axis=1)
28042804
tm.assert_frame_equal(result, expected)
2805+
2806+
2807+
@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]])
2808+
def test_duplicate_keys(keys):
2809+
# GH 33654
2810+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
2811+
s1 = Series([7, 8, 9], name="c")
2812+
s2 = Series([10, 11, 12], name="d")
2813+
result = concat([df, s1, s2], axis=1, keys=keys)
2814+
expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]]
2815+
expected_columns = pd.MultiIndex.from_tuples(
2816+
[(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")]
2817+
)
2818+
expected = DataFrame(expected_values, columns=expected_columns)
2819+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)