From c46dcfadceb033167f864c3c8925aee39220cdc4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 18 Jul 2016 17:35:29 -0400 Subject: [PATCH] BUG: merge_asof not handling allow_exact_matches and tolerance on first entry closes #13695 --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/src/join.pyx | 18 ++++++++------- pandas/tools/tests/test_merge_asof.py | 33 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 0b9695125c0a9..dd528669b47ef 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -46,7 +46,7 @@ The following are now part of this API: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A long-time requested feature has been added through the :func:`merge_asof` function, to -support asof style joining of time-series. (:issue:`1870`). Full documentation is +support asof style joining of time-series. (:issue:`1870`, :issue:`13695`). Full documentation is :ref:`here ` The :func:`merge_asof` performs an asof merge, which is similar to a left-join diff --git a/pandas/src/join.pyx b/pandas/src/join.pyx index a81ac0aa35d4e..ad3b1d4e4a90e 100644 --- a/pandas/src/join.pyx +++ b/pandas/src/join.pyx @@ -193,11 +193,12 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right, diff = left_val - right_val # do we allow exact matches - if allow_exact_matches and diff > tol: - right_indexer[indexer] = -1 - continue + if allow_exact_matches: + if diff > tol: + right_indexer[indexer] = -1 + continue elif not allow_exact_matches: - if diff >= tol: + if diff >= tol or lc == rc: right_indexer[indexer] = -1 continue @@ -220,13 +221,14 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right, diff = left_val - right_val # do we allow exact matches - if allow_exact_matches and diff > tol: - right_indexer[indexer] = -1 - continue + if allow_exact_matches: + if diff > tol: + right_indexer[indexer] = -1 + continue # we don't allow exact matches elif not allow_exact_matches: - if diff >= tol or not right_pos: + if diff >= tol or lc == rc: right_indexer[indexer] = -1 else: right_indexer[indexer] = right_pos - 1 diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tools/tests/test_merge_asof.py index 5d78ccf199ed3..bcbb0f0fadb49 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tools/tests/test_merge_asof.py @@ -347,6 +347,39 @@ def test_allow_exact_matches_and_tolerance(self): expected = self.allow_exact_matches_and_tolerance assert_frame_equal(result, expected) + def test_allow_exact_matches_and_tolerance2(self): + # GH 13695 + df1 = pd.DataFrame({ + 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), + 'username': ['bob']}) + df2 = pd.DataFrame({ + 'time': pd.to_datetime(['2016-07-15 13:30:00.000', + '2016-07-15 13:30:00.030']), + 'version': [1, 2]}) + + result = pd.merge_asof(df1, df2, on='time') + expected = pd.DataFrame({ + 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), + 'username': ['bob'], + 'version': [2]}) + assert_frame_equal(result, expected) + + result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False) + expected = pd.DataFrame({ + 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), + 'username': ['bob'], + 'version': [1]}) + assert_frame_equal(result, expected) + + result = pd.merge_asof(df1, df2, on='time', allow_exact_matches=False, + tolerance=pd.Timedelta('10ms')) + expected = pd.DataFrame({ + 'time': pd.to_datetime(['2016-07-15 13:30:00.030']), + 'username': ['bob'], + 'version': [np.nan]}) + assert_frame_equal(result, expected) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)