From e3f2e6a11fdbfb4be0755b3657c4b5ea532fce43 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sat, 13 May 2023 18:40:24 +0000 Subject: [PATCH 1/3] fix merge when MultiIndex with single level --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/reshape/merge.py | 17 ++++------------- pandas/tests/reshape/merge/test_merge.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 52fc8512c9db3..06d16c4ac95b1 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -421,6 +421,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`) +- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`) - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`) - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`) - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a96a08f18e81f..7e73642e5c563 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2261,23 +2261,14 @@ def _get_no_sort_one_missing_indexer( def _left_join_on_index( left_ax: Index, right_ax: Index, join_keys, sort: bool = False ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]: - if len(join_keys) > 1: - if not ( - isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels - ): - raise AssertionError( - "If more than one join key is given then " - "'right_ax' must be a MultiIndex and the " - "number of join keys must be the number of levels in right_ax" - ) - + if isinstance(right_ax, MultiIndex): left_indexer, right_indexer = _get_multiindex_indexer( join_keys, right_ax, sort=sort ) else: - jkey = join_keys[0] - - left_indexer, right_indexer = _get_single_indexer(jkey, right_ax, sort=sort) + left_indexer, right_indexer = _get_single_indexer( + join_keys[0], right_ax, sort=sort + ) if sort or len(left_ax) != len(left_indexer): # if asked to sort or there are 1-to-many matches diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 017bf1c917e37..6a5bc42b20046 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2773,3 +2773,17 @@ def test_merge_arrow_and_numpy_dtypes(dtype): result = df2.merge(df) expected = df2.copy() tm.assert_frame_equal(result, expected) + + +def test_merge_multiindex_single_level(): + # Non-regression test for GH #52331 + # Merge on MultiIndex with single level + df = DataFrame({"col": ["A", "B"]}) + df2 = DataFrame( + data={"b": [100]}, + index=MultiIndex.from_tuples([("A",), ("C",)], names=["col"]), + ) + expected = DataFrame({"col": ["A", "B"], "b": [100, np.nan]}) + + result = df.merge(df2, left_on=["col"], right_index=True, how="left") + tm.assert_frame_equal(result, expected) From a8fed3929c498129ef39796e99eb61e98c22ae2a Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sun, 14 May 2023 13:07:06 +0000 Subject: [PATCH 2/3] resolved conversations --- pandas/tests/reshape/merge/test_merge.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 6a5bc42b20046..273b10bbf9e2b 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2776,8 +2776,7 @@ def test_merge_arrow_and_numpy_dtypes(dtype): def test_merge_multiindex_single_level(): - # Non-regression test for GH #52331 - # Merge on MultiIndex with single level + # GH #52331 df = DataFrame({"col": ["A", "B"]}) df2 = DataFrame( data={"b": [100]}, From ee504f9b5f80f7a976db520a6a8d796206e3c44b Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Tue, 16 May 2023 01:58:04 +0000 Subject: [PATCH 3/3] fixed code style --- pandas/tests/reshape/merge/test_merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 039d6987b0232..cb773a30901a2 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2775,7 +2775,7 @@ def test_merge_arrow_and_numpy_dtypes(dtype): expected = df2.copy() tm.assert_frame_equal(result, expected) - + @pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")]) def test_merge_datetime_different_resolution(tzinfo): # https://github.com/pandas-dev/pandas/issues/53200 @@ -2798,6 +2798,7 @@ def test_merge_datetime_different_resolution(tzinfo): result = df1.merge(df2, on="t") tm.assert_frame_equal(result, expected) + def test_merge_multiindex_single_level(): # GH #52331 df = DataFrame({"col": ["A", "B"]})