diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c9267a756bef3..f3d4d53b00aa2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -786,6 +786,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`) - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) +- Bug in :func:`merge_asof` propagating the right Index with ``left_index=True`` and ``right_on`` specification instead of left Index (:issue:`33463`) - Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`) - :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`) - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 94d78f6b54b91..8cee0dd2abb88 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -969,7 +969,16 @@ def _get_join_info( join_index = self.right.index.take(right_indexer) left_indexer = np.array([-1] * len(join_index), dtype=np.intp) elif self.left_index: - if len(self.right) > 0: + if self.how == "asof": + # GH#33463 asof should always behave like a left merge + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="left", + ) + + elif len(self.right) > 0: join_index = self._create_join_index( self.right.index, self.left.index, diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 3f5bb9b84372c..671f0ad2d26c7 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -6,6 +6,7 @@ import pandas as pd from pandas import ( + Index, Timedelta, merge_asof, read_csv, @@ -1338,7 +1339,9 @@ def test_merge_index_column_tz(self): "from_date": index[1:], "abc": [2.46] * 3 + [2.19], }, - index=pd.Index([1, 2, 3, 4]), + index=pd.date_range( + "2019-10-01 00:30:00", freq="30min", periods=4, tz="UTC" + ), ) tm.assert_frame_equal(result, expected) @@ -1351,7 +1354,7 @@ def test_merge_index_column_tz(self): "abc": [2.46] * 4 + [2.19], "xyz": [np.nan, 0.9, 0.8, 0.7, 0.6], }, - index=pd.Index([0, 1, 2, 3, 4]), + index=Index([0, 1, 2, 3, 4]), ) tm.assert_frame_equal(result, expected) @@ -1412,3 +1415,25 @@ def test_merge_asof_non_numerical_dtype_object(): left_by="a", right_by="left_val", ) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True, "left_index": True}, + {"left_on": "left_time", "right_index": True}, + {"left_index": True, "right_on": "right"}, + ], +) +def test_merge_asof_index_behavior(kwargs): + # GH 33463 + index = Index([1, 5, 10], name="test") + left = pd.DataFrame({"left": ["a", "b", "c"], "left_time": [1, 4, 10]}, index=index) + right = pd.DataFrame({"right": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) + result = merge_asof(left, right, **kwargs) + + expected = pd.DataFrame( + {"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]}, + index=index, + ) + tm.assert_frame_equal(result, expected)