From 49d1c23465def224f04497aa2259c66f53b6586c Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 30 May 2020 13:16:29 +0200 Subject: [PATCH 01/10] BUG: take always left index in merge_asof --- pandas/core/reshape/merge.py | 2 ++ pandas/tests/reshape/merge/test_merge_asof.py | 25 +++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0c796c8f45a52..5ebe9a26319f9 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -938,6 +938,8 @@ def _create_join_index( ------- join_index """ + if how == "left" and self.how == "asof": + return other_index.take(other_indexer) if self.how in (how, "outer") and not isinstance(other_index, MultiIndex): # if final index requires values in other_index but not target # index, indexer may hold missing (-1) values, causing Index.take diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 9b09f0033715d..8b59d9446b95f 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -5,7 +5,7 @@ import pytz import pandas as pd -from pandas import Timedelta, merge_asof, read_csv, to_datetime +from pandas import Index, Timedelta, merge_asof, read_csv, to_datetime import pandas._testing as tm from pandas.core.reshape.merge import MergeError @@ -1323,7 +1323,7 @@ def test_merge_index_column_tz(self): "from_date": index[1:], "abc": [2.46] * 3 + [2.19], }, - index=pd.Index([1, 2, 3, 4]), + index=index[1:], ) tm.assert_frame_equal(result, expected) @@ -1339,3 +1339,24 @@ def test_merge_index_column_tz(self): index=pd.Index([0, 1, 2, 3, 4]), ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True, "left_index": True}, + {"left_on": "left_time", "right_index": True}, + {"left_index": True, "right_on": "right"}, + ], +) +def test_merge_asof_index_behavior(kwargs): + index = Index([1, 5, 10], name="test") + left = pd.DataFrame({"left": ["a", "b", "c"], "left_time": [1, 4, 10]}, index=index) + right = pd.DataFrame({"right": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) + result = merge_asof(left, right, **kwargs) + + expected = pd.DataFrame( + {"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]}, + index=index, + ) + tm.assert_frame_equal(result, expected) From 742a5d9331cd787f944033dee7ba9fa4dff11368 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 30 May 2020 18:13:51 +0200 Subject: [PATCH 02/10] Add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/reshape/merge/test_merge_asof.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 17a830788be3f..d8df0d8ef347d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -936,6 +936,7 @@ Reshaping - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) +- Bug in :func:`merge_asof` propagated the right Index with ``left_index=True`` and `right_on`` specification instead of left Index (:issue:`33463`) Sparse ^^^^^^ diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 8b59d9446b95f..db8d8e2fe2f5d 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1350,6 +1350,7 @@ def test_merge_index_column_tz(self): ], ) def test_merge_asof_index_behavior(kwargs): + # GH 33463 index = Index([1, 5, 10], name="test") left = pd.DataFrame({"left": ["a", "b", "c"], "left_time": [1, 4, 10]}, index=index) right = pd.DataFrame({"right": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) From 81d736fc59f1c55149a375922c76e69c284fc1b1 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 4 Sep 2020 20:35:11 +0200 Subject: [PATCH 03/10] Merge master and move whats new --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e65daa439a225..2937354384bd0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -319,6 +319,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) - Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) +- Bug in :func:`merge_asof` propagated the right Index with ``left_index=True`` and ``right_on`` specification instead of left Index (:issue:`33463`) - Sparse From 4b9336c25a53599f2e1b81d0fe9c20e9841c5722 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 4 Sep 2020 20:40:45 +0200 Subject: [PATCH 04/10] Fix pep 8 issues --- pandas/tests/reshape/merge/test_merge_asof.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 8cf52ee966219..0a1f314c72072 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1363,7 +1363,6 @@ def test_left_index_right_index_tolerance(self): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( "kwargs", [ From 6e3553f851ebfd4496a7c3972071fc5e0a54c52b Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Sep 2020 20:52:33 +0200 Subject: [PATCH 05/10] Move asof case to get_join_info --- pandas/core/reshape/merge.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a3e78b9754001..1512875644325 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -889,19 +889,25 @@ def _get_join_info(self): self.left.index, self.right.index, left_indexer, - right_indexer, how="right", ) else: join_index = self.right.index.take(right_indexer) left_indexer = np.array([-1] * len(join_index)) elif self.left_index: - if len(self.right) > 0: + if self.how == "asof": + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="left", + ) + + elif len(self.right) > 0: join_index = self._create_join_index( self.right.index, self.left.index, right_indexer, - left_indexer, how="left", ) else: @@ -919,7 +925,6 @@ def _create_join_index( index: Index, other_index: Index, indexer, - other_indexer, how: str = "left", ): """ @@ -936,8 +941,6 @@ def _create_join_index( ------- join_index """ - if how == "left" and self.how == "asof": - return other_index.take(other_indexer) if self.how in (how, "outer") and not isinstance(other_index, MultiIndex): # if final index requires values in other_index but not target # index, indexer may hold missing (-1) values, causing Index.take From bfa5d26b8f0d57826d45126a4f8f815ca8c80628 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Sep 2020 21:41:26 +0200 Subject: [PATCH 06/10] Run black pandas --- pandas/core/reshape/merge.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 1512875644325..55e1f28432ad4 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -886,10 +886,7 @@ def _get_join_info(self): if self.right_index: if len(self.left) > 0: join_index = self._create_join_index( - self.left.index, - self.right.index, - left_indexer, - how="right", + self.left.index, self.right.index, left_indexer, how="right", ) else: join_index = self.right.index.take(right_indexer) @@ -897,18 +894,12 @@ def _get_join_info(self): elif self.left_index: if self.how == "asof": join_index = self._create_join_index( - self.left.index, - self.right.index, - left_indexer, - how="left", + self.left.index, self.right.index, left_indexer, how="left", ) elif len(self.right) > 0: join_index = self._create_join_index( - self.right.index, - self.left.index, - right_indexer, - how="left", + self.right.index, self.left.index, right_indexer, how="left", ) else: join_index = self.left.index.take(left_indexer) @@ -921,11 +912,7 @@ def _get_join_info(self): return join_index, left_indexer, right_indexer def _create_join_index( - self, - index: Index, - other_index: Index, - indexer, - how: str = "left", + self, index: Index, other_index: Index, indexer, how: str = "left", ): """ Create a join index by rearranging one index to match another From ab079df6c565bbc259502b74a9538ee9772bf70a Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Sep 2020 21:44:03 +0200 Subject: [PATCH 07/10] Change index and method signature --- pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge_asof.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 55e1f28432ad4..23d0e4f385641 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -912,7 +912,7 @@ def _get_join_info(self): return join_index, left_indexer, right_indexer def _create_join_index( - self, index: Index, other_index: Index, indexer, how: str = "left", + self, index: Index, other_index: Index, indexer, how: str, ): """ Create a join index by rearranging one index to match another diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 0a1f314c72072..69484f795dd60 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1323,7 +1323,9 @@ def test_merge_index_column_tz(self): "from_date": index[1:], "abc": [2.46] * 3 + [2.19], }, - index=index[1:], + index=pd.date_range( + "2019-10-01 00:30:00", freq="30min", periods=4, tz="UTC" + ), ) tm.assert_frame_equal(result, expected) From 3add9da888a66b418b6535db7a71df00bc48aecf Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 24 Nov 2020 23:51:30 +0100 Subject: [PATCH 08/10] Fix pre commit --- pandas/core/reshape/merge.py | 21 +++++++++++++++---- pandas/tests/reshape/merge/test_merge_asof.py | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c2eb8750a7c91..018d4fc24b1ce 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -888,7 +888,10 @@ def _get_join_info(self): if self.right_index: if len(self.left) > 0: join_index = self._create_join_index( - self.left.index, self.right.index, left_indexer, how="right", + self.left.index, + self.right.index, + left_indexer, + how="right", ) else: join_index = self.right.index.take(right_indexer) @@ -896,12 +899,18 @@ def _get_join_info(self): elif self.left_index: if self.how == "asof": join_index = self._create_join_index( - self.left.index, self.right.index, left_indexer, how="left", + self.left.index, + self.right.index, + left_indexer, + how="left", ) elif len(self.right) > 0: join_index = self._create_join_index( - self.right.index, self.left.index, right_indexer, how="left", + self.right.index, + self.left.index, + right_indexer, + how="left", ) else: join_index = self.left.index.take(left_indexer) @@ -914,7 +923,11 @@ def _get_join_info(self): return join_index, left_indexer, right_indexer def _create_join_index( - self, index: Index, other_index: Index, indexer, how: str, + self, + index: Index, + other_index: Index, + indexer, + how: str, ): """ Create a join index by rearranging one index to match another diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 28000ca3777e7..94f799324fa56 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1338,7 +1338,7 @@ def test_merge_index_column_tz(self): "abc": [2.46] * 4 + [2.19], "xyz": [np.nan, 0.9, 0.8, 0.7, 0.6], }, - index=pd.Index([0, 1, 2, 3, 4]), + index=Index([0, 1, 2, 3, 4]), ) tm.assert_frame_equal(result, expected) From a704eeed0f8c36b24f95dc7d5fd068ab4a7a153b Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 29 Dec 2020 22:08:34 +0100 Subject: [PATCH 09/10] Move whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 83bff6d7bfb2d..d13fa727cc05d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -287,7 +287,7 @@ Reshaping ^^^^^^^^^ - -- +- Bug in :func:`merge_asof` propagating the right Index with ``left_index=True`` and ``right_on`` specification instead of left Index (:issue:`33463`) Sparse ^^^^^^ From f3d2e0fc60577725d3566c175a471479b5e4dffc Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Apr 2021 03:09:39 +0200 Subject: [PATCH 10/10] Add comment --- pandas/core/reshape/merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0489ce6ce81e2..8cee0dd2abb88 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -970,6 +970,7 @@ def _get_join_info( left_indexer = np.array([-1] * len(join_index), dtype=np.intp) elif self.left_index: if self.how == "asof": + # GH#33463 asof should always behave like a left merge join_index = self._create_join_index( self.left.index, self.right.index,