BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676)

Christopher C. Aycock · Christopher C. Aycock · commit 4a2cc09bade8 · 2017-03-13T22:25:20.000-04:00
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -851,6 +851,7 @@ Bug Fixes
 - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
 
 
+- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`)
 - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`)
 - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`)
 
diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py
@@ -368,6 +368,37 @@ def test_multiby_heterogeneous_types(self):
                                by=['ticker', 'exch'])
         assert_frame_equal(result, expected)
 
+    def test_multiby_indexed(self):
+        # GH15676
+        left = pd.DataFrame([
+            [pd.to_datetime('20160602'), 1, 'a'],
+            [pd.to_datetime('20160602'), 2, 'a'],
+            [pd.to_datetime('20160603'), 1, 'b'],
+            [pd.to_datetime('20160603'), 2, 'b']],
+            columns=['time', 'k1', 'k2']).set_index('time')
+
+        right = pd.DataFrame([
+            [pd.to_datetime('20160502'), 1, 'a', 1.0],
+            [pd.to_datetime('20160502'), 2, 'a', 2.0],
+            [pd.to_datetime('20160503'), 1, 'b', 3.0],
+            [pd.to_datetime('20160503'), 2, 'b', 4.0]],
+            columns=['time', 'k1', 'k2', 'value']).set_index('time')
+
+        expected = pd.DataFrame([
+            [pd.to_datetime('20160602'), 1, 'a', 1.0],
+            [pd.to_datetime('20160602'), 2, 'a', 2.0],
+            [pd.to_datetime('20160603'), 1, 'b', 3.0],
+            [pd.to_datetime('20160603'), 2, 'b', 4.0]],
+            columns=['time', 'k1', 'k2', 'value']).set_index('time')
+
+        result = pd.merge_asof(left,
+                               right,
+                               left_index=True,
+                               right_index=True,
+                               by=['k1', 'k2'])
+
+        assert_frame_equal(expected, result)
+
     def test_basic2(self):
 
         expected = self.read_data('asof2.csv')
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -1264,13 +1264,21 @@ def flip(xs):
 
         # a "by" parameter requires special handling
         if self.left_by is not None:
-            if len(self.left_join_keys) > 2:
-                # get tuple representation of values if more than one
-                left_by_values = flip(self.left_join_keys[0:-1])
-                right_by_values = flip(self.right_join_keys[0:-1])
+            # remove 'on' parameter from values if one existed
+            if self.left_index and self.right_index:
+                left_by_values = self.left_join_keys
+                right_by_values = self.right_join_keys
+            else:
+                left_by_values = self.left_join_keys[0:-1]
+                right_by_values = self.right_join_keys[0:-1]
+
+            # get tuple representation of values if more than one
+            if len(left_by_values) == 1:
+                left_by_values = left_by_values[0]
+                right_by_values = right_by_values[0]
             else:
-                left_by_values = self.left_join_keys[0]
-                right_by_values = self.right_join_keys[0]
+                left_by_values = flip(left_by_values)
+                right_by_values = flip(right_by_values)
 
             # upcast 'by' parameter because HashTable is limited
             by_type = _get_cython_type_upcast(left_by_values.dtype)