@@ -259,7 +259,8 @@ def _merger(x, y):
259
259
260
260
def merge_asof (left , right , on = None ,
261
261
left_on = None , right_on = None ,
262
- by = None ,
262
+ left_index = False , right_index = False ,
263
+ by = None , left_by = None , right_by = None ,
263
264
suffixes = ('_x' , '_y' ),
264
265
tolerance = None ,
265
266
allow_exact_matches = True ):
@@ -288,9 +289,29 @@ def merge_asof(left, right, on=None,
288
289
Field name to join on in left DataFrame.
289
290
right_on : label
290
291
Field name to join on in right DataFrame.
292
+ left_index : boolean
293
+ Use the index of the left DataFrame as the join key.
294
+
295
+ .. versionadded:: 0.19.2
296
+
297
+ right_index : boolean
298
+ Use the index of the right DataFrame as the join key.
299
+
300
+ .. versionadded:: 0.19.2
301
+
291
302
by : column name
292
303
Group both the left and right DataFrames by the group column; perform
293
304
the merge operation on these pieces and recombine.
305
+ left_by : column name
306
+ Field name to group by in the left DataFrame.
307
+
308
+ .. versionadded:: 0.19.2
309
+
310
+ right_by : column name
311
+ Field name to group by in the right DataFrame.
312
+
313
+ .. versionadded:: 0.19.2
314
+
294
315
suffixes : 2-length sequence (tuple, list, ...)
295
316
Suffix to apply to overlapping column names in the left and right
296
317
side, respectively
@@ -348,6 +369,28 @@ def merge_asof(left, right, on=None,
348
369
3 5 b 3.0
349
370
6 10 c 7.0
350
371
372
+ We can use indexed DataFrames as well.
373
+
374
+ >>> left
375
+ left_val
376
+ 1 a
377
+ 5 b
378
+ 10 c
379
+
380
+ >>> right
381
+ right_val
382
+ 1 1
383
+ 2 2
384
+ 3 3
385
+ 6 6
386
+ 7 7
387
+
388
+ >>> pd.merge_asof(left, right, left_index=True, right_index=True)
389
+ left_val right_val
390
+ 1 a 1
391
+ 5 b 3
392
+ 10 c 7
393
+
351
394
Here is a real-world times-series example
352
395
353
396
>>> quotes
@@ -418,7 +461,9 @@ def merge_asof(left, right, on=None,
418
461
"""
419
462
op = _AsOfMerge (left , right ,
420
463
on = on , left_on = left_on , right_on = right_on ,
421
- by = by , suffixes = suffixes ,
464
+ left_index = left_index , right_index = right_index ,
465
+ by = by , left_by = left_by , right_by = right_by ,
466
+ suffixes = suffixes ,
422
467
how = 'asof' , tolerance = tolerance ,
423
468
allow_exact_matches = allow_exact_matches )
424
469
return op .get_result ()
@@ -650,7 +695,7 @@ def _get_join_info(self):
650
695
left_ax = self .left ._data .axes [self .axis ]
651
696
right_ax = self .right ._data .axes [self .axis ]
652
697
653
- if self .left_index and self .right_index :
698
+ if self .left_index and self .right_index and self . how != 'asof' :
654
699
join_index , left_indexer , right_indexer = \
655
700
left_ax .join (right_ax , how = self .how , return_indexers = True )
656
701
elif self .right_index and self .how == 'left' :
@@ -731,6 +776,16 @@ def _get_merge_keys(self):
731
776
is_rkey = lambda x : isinstance (
732
777
x , (np .ndarray , ABCSeries )) and len (x ) == len (right )
733
778
779
+ # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
780
+ # user could, for example, request 'left_index' and 'left_by'. In a
781
+ # regular pd.merge(), users cannot specify both 'left_index' and
782
+ # 'left_on'. (Instead, users have a MultiIndex). That means the
783
+ # self.left_on in this function is always empty in a pd.merge(), but
784
+ # a pd.merge_asof(left_index=True, left_by=...) will result in a
785
+ # self.left_on array with a None in the middle of it. This requires
786
+ # a work-around as designated in the code below.
787
+ # See _validate_specification() for where this happens.
788
+
734
789
# ugh, spaghetti re #733
735
790
if _any (self .left_on ) and _any (self .right_on ):
736
791
for lk , rk in zip (self .left_on , self .right_on ):
@@ -740,21 +795,35 @@ def _get_merge_keys(self):
740
795
right_keys .append (rk )
741
796
join_names .append (None ) # what to do?
742
797
else :
743
- right_keys .append (right [rk ]._values )
744
- join_names .append (rk )
798
+ if rk is not None :
799
+ right_keys .append (right [rk ]._values )
800
+ join_names .append (rk )
801
+ else :
802
+ # work-around for merge_asof(right_index=True)
803
+ right_keys .append (right .index )
804
+ join_names .append (right .index .name )
745
805
else :
746
806
if not is_rkey (rk ):
747
- right_keys .append (right [rk ]._values )
748
- if lk == rk :
807
+ if rk is not None :
808
+ right_keys .append (right [rk ]._values )
809
+ else :
810
+ # work-around for merge_asof(right_index=True)
811
+ right_keys .append (right .index )
812
+ if lk is not None and lk == rk :
749
813
# avoid key upcast in corner case (length-0)
750
814
if len (left ) > 0 :
751
815
right_drop .append (rk )
752
816
else :
753
817
left_drop .append (lk )
754
818
else :
755
819
right_keys .append (rk )
756
- left_keys .append (left [lk ]._values )
757
- join_names .append (lk )
820
+ if lk is not None :
821
+ left_keys .append (left [lk ]._values )
822
+ join_names .append (lk )
823
+ else :
824
+ # work-around for merge_asof(left_index=True)
825
+ left_keys .append (left .index )
826
+ join_names .append (left .index .name )
758
827
elif _any (self .left_on ):
759
828
for k in self .left_on :
760
829
if is_lkey (k ):
@@ -879,13 +948,15 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner',
879
948
class _OrderedMerge (_MergeOperation ):
880
949
_merge_type = 'ordered_merge'
881
950
882
- def __init__ (self , left , right , on = None , left_on = None ,
883
- right_on = None , axis = 1 ,
951
+ def __init__ (self , left , right , on = None , left_on = None , right_on = None ,
952
+ left_index = False , right_index = False , axis = 1 ,
884
953
suffixes = ('_x' , '_y' ), copy = True ,
885
954
fill_method = None , how = 'outer' ):
886
955
887
956
self .fill_method = fill_method
888
957
_MergeOperation .__init__ (self , left , right , on = on , left_on = left_on ,
958
+ left_index = left_index ,
959
+ right_index = right_index ,
889
960
right_on = right_on , axis = axis ,
890
961
how = how , suffixes = suffixes ,
891
962
sort = True # factorize sorts
@@ -958,43 +1029,68 @@ def _get_cython_type(dtype):
958
1029
class _AsOfMerge (_OrderedMerge ):
959
1030
_merge_type = 'asof_merge'
960
1031
961
- def __init__ (self , left , right , on = None , by = None , left_on = None ,
962
- right_on = None , axis = 1 ,
963
- suffixes = ('_x' , '_y' ), copy = True ,
1032
+ def __init__ (self , left , right , on = None , left_on = None , right_on = None ,
1033
+ left_index = False , right_index = False ,
1034
+ by = None , left_by = None , right_by = None ,
1035
+ axis = 1 , suffixes = ('_x' , '_y' ), copy = True ,
964
1036
fill_method = None ,
965
1037
how = 'asof' , tolerance = None ,
966
1038
allow_exact_matches = True ):
967
1039
968
1040
self .by = by
1041
+ self .left_by = left_by
1042
+ self .right_by = right_by
969
1043
self .tolerance = tolerance
970
1044
self .allow_exact_matches = allow_exact_matches
971
1045
972
1046
_OrderedMerge .__init__ (self , left , right , on = on , left_on = left_on ,
973
- right_on = right_on , axis = axis ,
1047
+ right_on = right_on , left_index = left_index ,
1048
+ right_index = right_index , axis = axis ,
974
1049
how = how , suffixes = suffixes ,
975
1050
fill_method = fill_method )
976
1051
977
1052
def _validate_specification (self ):
978
1053
super (_AsOfMerge , self )._validate_specification ()
979
1054
980
1055
# we only allow on to be a single item for on
981
- if len (self .left_on ) != 1 :
1056
+ if len (self .left_on ) != 1 and not self . left_index :
982
1057
raise MergeError ("can only asof on a key for left" )
983
1058
984
- if len (self .right_on ) != 1 :
1059
+ if len (self .right_on ) != 1 and not self . right_index :
985
1060
raise MergeError ("can only asof on a key for right" )
986
1061
1062
+ if self .left_index and isinstance (self .left .index , MultiIndex ):
1063
+ raise MergeError ("left can only have one index" )
1064
+
1065
+ if self .right_index and isinstance (self .right .index , MultiIndex ):
1066
+ raise MergeError ("right can only have one index" )
1067
+
1068
+ # set 'by' columns
1069
+ if self .by is not None :
1070
+ if self .left_by is not None or self .right_by is not None :
1071
+ raise MergeError ('Can only pass by OR left_by '
1072
+ 'and right_by' )
1073
+ self .left_by = self .right_by = self .by
1074
+ if self .left_by is None and self .right_by is not None :
1075
+ raise MergeError ('missing left_by' )
1076
+ if self .left_by is not None and self .right_by is None :
1077
+ raise MergeError ('missing right_by' )
1078
+
987
1079
# add by to our key-list so we can have it in the
988
1080
# output as a key
989
- if self .by is not None :
990
- if not is_list_like (self .by ):
991
- self .by = [self .by ]
1081
+ if self .left_by is not None :
1082
+ if not is_list_like (self .left_by ):
1083
+ self .left_by = [self .left_by ]
1084
+ if not is_list_like (self .right_by ):
1085
+ self .right_by = [self .right_by ]
992
1086
993
- if len (self .by ) != 1 :
1087
+ if len (self .left_by ) != 1 :
1088
+ raise MergeError ("can only asof by a single key" )
1089
+ if len (self .right_by ) != 1 :
994
1090
raise MergeError ("can only asof by a single key" )
995
1091
996
- self .left_on = self .by + list (self .left_on )
997
- self .right_on = self .by + list (self .right_on )
1092
+ self .left_on = self .left_by + list (self .left_on )
1093
+ self .right_on = self .right_by + list (self .right_on )
998
1094
999
1095
@property
1000
1096
def _asof_key (self ):
@@ -1017,7 +1113,7 @@ def _get_merge_keys(self):
1017
1113
# validate tolerance; must be a Timedelta if we have a DTI
1018
1114
if self .tolerance is not None :
1019
1115
1020
- lt = left_join_keys [self . left_on . index ( self . _asof_key ) ]
1116
+ lt = left_join_keys [- 1 ]
1021
1117
msg = "incompatible tolerance, must be compat " \
1022
1118
"with type {0}" .format (type (lt ))
1023
1119
@@ -1047,8 +1143,10 @@ def _get_join_indexers(self):
1047
1143
""" return the join indexers """
1048
1144
1049
1145
# values to compare
1050
- left_values = self .left_join_keys [- 1 ]
1051
- right_values = self .right_join_keys [- 1 ]
1146
+ left_values = (self .left .index .values if self .left_index else
1147
+ self .left_join_keys [- 1 ])
1148
+ right_values = (self .right .index .values if self .right_index else
1149
+ self .right_join_keys [- 1 ])
1052
1150
tolerance = self .tolerance
1053
1151
1054
1152
# we required sortedness in the join keys
@@ -1066,7 +1164,7 @@ def _get_join_indexers(self):
1066
1164
tolerance = tolerance .value
1067
1165
1068
1166
# a "by" parameter requires special handling
1069
- if self .by is not None :
1167
+ if self .left_by is not None :
1070
1168
left_by_values = self .left_join_keys [0 ]
1071
1169
right_by_values = self .right_join_keys [0 ]
1072
1170
0 commit comments