Skip to content

Commit 0deadef

Browse files
lukemanleymroeschke
authored andcommitted
BUG: merge with left and/or right empty returning mis-ordered columns (pandas-dev#55028)
1 parent 5139e84 commit 0deadef

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ Groupby/resample/rolling
246246

247247
Reshaping
248248
^^^^^^^^^
249-
-
249+
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
250250
-
251251

252252
Sparse

pandas/core/reshape/merge.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1271,12 +1271,7 @@ def _get_merge_keys(
12711271
# work-around for merge_asof(right_index=True)
12721272
right_keys.append(right.index._values)
12731273
if lk is not None and lk == rk: # FIXME: what about other NAs?
1274-
# avoid key upcast in corner case (length-0)
1275-
lk = cast(Hashable, lk)
1276-
if len(left) > 0:
1277-
right_drop.append(rk)
1278-
else:
1279-
left_drop.append(lk)
1274+
right_drop.append(rk)
12801275
else:
12811276
rk = cast(ArrayLike, rk)
12821277
right_keys.append(rk)

pandas/tests/reshape/merge/test_merge.py

+33-14
Original file line numberDiff line numberDiff line change
@@ -582,11 +582,11 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2):
582582
df_empty = df[:0]
583583
expected = DataFrame(
584584
{
585-
"value_x": Series(dtype=df.dtypes["value"]),
586585
"key": Series(dtype=df.dtypes["key"]),
586+
"value_x": Series(dtype=df.dtypes["value"]),
587587
"value_y": Series(dtype=df.dtypes["value"]),
588588
},
589-
columns=["value_x", "key", "value_y"],
589+
columns=["key", "value_x", "value_y"],
590590
)
591591
actual = df_empty.merge(df, on="key")
592592
tm.assert_frame_equal(actual, expected)
@@ -889,13 +889,13 @@ def test_merge_on_datetime64tz_empty(self):
889889
result = left.merge(right, on="date")
890890
expected = DataFrame(
891891
{
892+
"date": Series(dtype=dtz),
892893
"value_x": Series(dtype=float),
893894
"date2_x": Series(dtype=dtz),
894-
"date": Series(dtype=dtz),
895895
"value_y": Series(dtype=float),
896896
"date2_y": Series(dtype=dtz),
897897
},
898-
columns=["value_x", "date2_x", "date", "value_y", "date2_y"],
898+
columns=["date", "value_x", "date2_x", "value_y", "date2_y"],
899899
)
900900
tm.assert_frame_equal(result, expected)
901901

@@ -1827,11 +1827,9 @@ def test_merge_empty(self, left_empty, how, exp):
18271827
if exp == "left":
18281828
expected = DataFrame({"A": [2, 1], "B": [3, 4], "C": [np.nan, np.nan]})
18291829
elif exp == "right":
1830-
expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]})
1830+
expected = DataFrame({"A": [1], "B": [np.nan], "C": [5]})
18311831
elif exp == "empty":
18321832
expected = DataFrame(columns=["A", "B", "C"], dtype="int64")
1833-
if left_empty:
1834-
expected = expected[["B", "A", "C"]]
18351833
elif exp == "empty_cross":
18361834
expected = DataFrame(columns=["A_x", "B", "A_y", "C"], dtype="int64")
18371835

@@ -2481,14 +2479,12 @@ def test_merge_multiindex_columns():
24812479
result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf)))
24822480

24832481
# Constructing the expected results
2484-
expected_labels = [letter + l_suf for letter in letters] + [
2485-
letter + r_suf for letter in letters
2486-
]
2487-
expected_index = MultiIndex.from_product(
2488-
[expected_labels, numbers], names=["outer", "inner"]
2489-
)
2482+
tuples = [(letter + l_suf, num) for letter in letters for num in numbers]
2483+
tuples += [("id", "")]
2484+
tuples += [(letter + r_suf, num) for letter in letters for num in numbers]
2485+
2486+
expected_index = MultiIndex.from_tuples(tuples, names=["outer", "inner"])
24902487
expected = DataFrame(columns=expected_index)
2491-
expected["id"] = ""
24922488

24932489
tm.assert_frame_equal(result, expected)
24942490

@@ -2959,3 +2955,26 @@ def test_merge_arrow_string_index(any_string_dtype):
29592955
{"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
29602956
)
29612957
tm.assert_frame_equal(result, expected)
2958+
2959+
2960+
@pytest.mark.parametrize("left_empty", [True, False])
2961+
@pytest.mark.parametrize("right_empty", [True, False])
2962+
def test_merge_empty_frames_column_order(left_empty, right_empty):
2963+
# GH 51929
2964+
df1 = DataFrame(1, index=[0], columns=["A", "B"])
2965+
df2 = DataFrame(1, index=[0], columns=["A", "C", "D"])
2966+
2967+
if left_empty:
2968+
df1 = df1.iloc[:0]
2969+
if right_empty:
2970+
df2 = df2.iloc[:0]
2971+
2972+
result = merge(df1, df2, on=["A"], how="outer")
2973+
expected = DataFrame(1, index=[0], columns=["A", "B", "C", "D"])
2974+
if left_empty and right_empty:
2975+
expected = expected.iloc[:0]
2976+
elif left_empty:
2977+
expected.loc[:, "B"] = np.nan
2978+
elif right_empty:
2979+
expected.loc[:, ["C", "D"]] = np.nan
2980+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)