diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cea42cbffa906..68b13c2fe28f5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -712,6 +712,7 @@ Reshaping - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`) +- Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 918a894a27916..cdcd6b19704c4 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1358,12 +1358,14 @@ def get_join_indexers( lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort, how=how) # preserve left frame order if how == 'left' and sort == False kwargs = copy.copy(kwargs) - if how == "left": + if how in ("left", "right"): kwargs["sort"] = sort join_func = { "inner": libjoin.inner_join, "left": libjoin.left_outer_join, - "right": _right_outer_join, + "right": lambda x, y, count, **kwargs: libjoin.left_outer_join( + y, x, count, **kwargs + )[::-1], "outer": libjoin.full_outer_join, }[how] @@ -1883,11 +1885,6 @@ def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = return left_ax, None, right_indexer -def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) - return left_indexer, right_indexer - - def _factorize_keys( lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" ) -> Tuple[np.ndarray, np.ndarray, int]: diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 999b827fe0571..9ccfdfc146eac 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -601,6 +601,18 @@ def test_merge_nosort(self): assert (df.var3.unique() == result.var3.unique()).all() + @pytest.mark.parametrize( + ("sort", "values"), [(False, [1, 1, 0, 1, 1]), (True, [0, 1, 1, 1, 1])] + ) + @pytest.mark.parametrize("how", ["left", "right"]) + def test_merge_same_order_left_right(self, sort, values, how): + # GH#35382 + df = DataFrame({"a": [1, 0, 1]}) + + result = df.merge(df, on="a", how=how, sort=sort) + expected = DataFrame(values, columns=["a"]) + tm.assert_frame_equal(result, expected) + def test_merge_nan_right(self): df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) df2 = DataFrame({"i1": [0], "i3": [0]})