Skip to content

Commit 4a2cc09

Browse files
author
Christopher C. Aycock
committed
BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676)
1 parent 05d70f4 commit 4a2cc09

File tree

3 files changed

+46
-6
lines changed

3 files changed

+46
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,7 @@ Bug Fixes
851851
- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
852852

853853

854+
- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`)
854855
- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`)
855856
- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`)
856857

pandas/tests/tools/test_merge_asof.py

+31
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,37 @@ def test_multiby_heterogeneous_types(self):
368368
by=['ticker', 'exch'])
369369
assert_frame_equal(result, expected)
370370

371+
def test_multiby_indexed(self):
372+
# GH15676
373+
left = pd.DataFrame([
374+
[pd.to_datetime('20160602'), 1, 'a'],
375+
[pd.to_datetime('20160602'), 2, 'a'],
376+
[pd.to_datetime('20160603'), 1, 'b'],
377+
[pd.to_datetime('20160603'), 2, 'b']],
378+
columns=['time', 'k1', 'k2']).set_index('time')
379+
380+
right = pd.DataFrame([
381+
[pd.to_datetime('20160502'), 1, 'a', 1.0],
382+
[pd.to_datetime('20160502'), 2, 'a', 2.0],
383+
[pd.to_datetime('20160503'), 1, 'b', 3.0],
384+
[pd.to_datetime('20160503'), 2, 'b', 4.0]],
385+
columns=['time', 'k1', 'k2', 'value']).set_index('time')
386+
387+
expected = pd.DataFrame([
388+
[pd.to_datetime('20160602'), 1, 'a', 1.0],
389+
[pd.to_datetime('20160602'), 2, 'a', 2.0],
390+
[pd.to_datetime('20160603'), 1, 'b', 3.0],
391+
[pd.to_datetime('20160603'), 2, 'b', 4.0]],
392+
columns=['time', 'k1', 'k2', 'value']).set_index('time')
393+
394+
result = pd.merge_asof(left,
395+
right,
396+
left_index=True,
397+
right_index=True,
398+
by=['k1', 'k2'])
399+
400+
assert_frame_equal(expected, result)
401+
371402
def test_basic2(self):
372403

373404
expected = self.read_data('asof2.csv')

pandas/tools/merge.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -1264,13 +1264,21 @@ def flip(xs):
12641264

12651265
# a "by" parameter requires special handling
12661266
if self.left_by is not None:
1267-
if len(self.left_join_keys) > 2:
1268-
# get tuple representation of values if more than one
1269-
left_by_values = flip(self.left_join_keys[0:-1])
1270-
right_by_values = flip(self.right_join_keys[0:-1])
1267+
# remove 'on' parameter from values if one existed
1268+
if self.left_index and self.right_index:
1269+
left_by_values = self.left_join_keys
1270+
right_by_values = self.right_join_keys
1271+
else:
1272+
left_by_values = self.left_join_keys[0:-1]
1273+
right_by_values = self.right_join_keys[0:-1]
1274+
1275+
# get tuple representation of values if more than one
1276+
if len(left_by_values) == 1:
1277+
left_by_values = left_by_values[0]
1278+
right_by_values = right_by_values[0]
12711279
else:
1272-
left_by_values = self.left_join_keys[0]
1273-
right_by_values = self.right_join_keys[0]
1280+
left_by_values = flip(left_by_values)
1281+
right_by_values = flip(right_by_values)
12741282

12751283
# upcast 'by' parameter because HashTable is limited
12761284
by_type = _get_cython_type_upcast(left_by_values.dtype)

0 commit comments

Comments
 (0)