Skip to content

Commit 2621b31

Browse files
Christopher C. Aycockjreback
Christopher C. Aycock
authored andcommitted
BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (pandas-dev#15676)
closes pandas-dev#15676 Author: Christopher C. Aycock <[email protected]> Closes pandas-dev#15679 from chrisaycock/GH15676 and squashes the following commits: 965caf2 [Christopher C. Aycock] Verify that 'by' parameters are the same length 4a2cc09 [Christopher C. Aycock] BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (pandas-dev#15676)
1 parent c7c74ad commit 2621b31

File tree

3 files changed

+54
-7
lines changed

3 files changed

+54
-7
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ Bug Fixes
884884
- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
885885

886886

887+
- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`)
887888
- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`)
888889
- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`)
889890

pandas/tests/tools/test_merge_asof.py

+35
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,41 @@ def test_multiby_heterogeneous_types(self):
368368
by=['ticker', 'exch'])
369369
assert_frame_equal(result, expected)
370370

371+
def test_multiby_indexed(self):
372+
# GH15676
373+
left = pd.DataFrame([
374+
[pd.to_datetime('20160602'), 1, 'a'],
375+
[pd.to_datetime('20160602'), 2, 'a'],
376+
[pd.to_datetime('20160603'), 1, 'b'],
377+
[pd.to_datetime('20160603'), 2, 'b']],
378+
columns=['time', 'k1', 'k2']).set_index('time')
379+
380+
right = pd.DataFrame([
381+
[pd.to_datetime('20160502'), 1, 'a', 1.0],
382+
[pd.to_datetime('20160502'), 2, 'a', 2.0],
383+
[pd.to_datetime('20160503'), 1, 'b', 3.0],
384+
[pd.to_datetime('20160503'), 2, 'b', 4.0]],
385+
columns=['time', 'k1', 'k2', 'value']).set_index('time')
386+
387+
expected = pd.DataFrame([
388+
[pd.to_datetime('20160602'), 1, 'a', 1.0],
389+
[pd.to_datetime('20160602'), 2, 'a', 2.0],
390+
[pd.to_datetime('20160603'), 1, 'b', 3.0],
391+
[pd.to_datetime('20160603'), 2, 'b', 4.0]],
392+
columns=['time', 'k1', 'k2', 'value']).set_index('time')
393+
394+
result = pd.merge_asof(left,
395+
right,
396+
left_index=True,
397+
right_index=True,
398+
by=['k1', 'k2'])
399+
400+
assert_frame_equal(expected, result)
401+
402+
with self.assertRaises(MergeError):
403+
pd.merge_asof(left, right, left_index=True, right_index=True,
404+
left_by=['k1', 'k2'], right_by=['k1'])
405+
371406
def test_basic2(self):
372407

373408
expected = self.read_data('asof2.csv')

pandas/tools/merge.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -1165,14 +1165,17 @@ def _validate_specification(self):
11651165
if self.left_by is not None and self.right_by is None:
11661166
raise MergeError('missing right_by')
11671167

1168-
# add by to our key-list so we can have it in the
1168+
# add 'by' to our key-list so we can have it in the
11691169
# output as a key
11701170
if self.left_by is not None:
11711171
if not is_list_like(self.left_by):
11721172
self.left_by = [self.left_by]
11731173
if not is_list_like(self.right_by):
11741174
self.right_by = [self.right_by]
11751175

1176+
if len(self.left_by) != len(self.right_by):
1177+
raise MergeError('left_by and right_by must be same length')
1178+
11761179
self.left_on = self.left_by + list(self.left_on)
11771180
self.right_on = self.right_by + list(self.right_on)
11781181

@@ -1264,13 +1267,21 @@ def flip(xs):
12641267

12651268
# a "by" parameter requires special handling
12661269
if self.left_by is not None:
1267-
if len(self.left_join_keys) > 2:
1268-
# get tuple representation of values if more than one
1269-
left_by_values = flip(self.left_join_keys[0:-1])
1270-
right_by_values = flip(self.right_join_keys[0:-1])
1270+
# remove 'on' parameter from values if one existed
1271+
if self.left_index and self.right_index:
1272+
left_by_values = self.left_join_keys
1273+
right_by_values = self.right_join_keys
1274+
else:
1275+
left_by_values = self.left_join_keys[0:-1]
1276+
right_by_values = self.right_join_keys[0:-1]
1277+
1278+
# get tuple representation of values if more than one
1279+
if len(left_by_values) == 1:
1280+
left_by_values = left_by_values[0]
1281+
right_by_values = right_by_values[0]
12711282
else:
1272-
left_by_values = self.left_join_keys[0]
1273-
right_by_values = self.right_join_keys[0]
1283+
left_by_values = flip(left_by_values)
1284+
right_by_values = flip(right_by_values)
12741285

12751286
# upcast 'by' parameter because HashTable is limited
12761287
by_type = _get_cython_type_upcast(left_by_values.dtype)

0 commit comments

Comments
 (0)