5
5
import copy
6
6
import warnings
7
7
8
+ import string
9
+
8
10
import numpy as np
9
11
from pandas .compat import range , lrange , lzip , zip , map , filter
10
12
import pandas .compat as compat
28
30
is_list_like ,
29
31
_ensure_int64 ,
30
32
_ensure_float64 ,
31
- _ensure_object )
33
+ _ensure_object ,
34
+ _get_dtype )
32
35
from pandas .types .missing import na_value_for_dtype
33
36
34
37
from pandas .core .generic import NDFrame
@@ -271,8 +274,8 @@ def merge_asof(left, right, on=None,
271
274
DataFrame whose 'on' key is less than or equal to the left's key. Both
272
275
DataFrames must be sorted by the key.
273
276
274
- Optionally perform group-wise merge. This searches for the nearest match
275
- on the 'on' key within the same group according to 'by '.
277
+ Optionally match on equivalent keys with 'by' before searching for nearest
278
+ match with 'on'.
276
279
277
280
.. versionadded:: 0.19.0
278
281
@@ -299,16 +302,15 @@ def merge_asof(left, right, on=None,
299
302
300
303
.. versionadded:: 0.19.2
301
304
302
- by : column name
303
- Group both the left and right DataFrames by the group column; perform
304
- the merge operation on these pieces and recombine.
305
+ by : column name or list of column names
306
+ Match on these columns before performing merge operation.
305
307
left_by : column name
306
- Field name to group by in the left DataFrame.
308
+ Field names to match on in the left DataFrame.
307
309
308
310
.. versionadded:: 0.19.2
309
311
310
312
right_by : column name
311
- Field name to group by in the right DataFrame.
313
+ Field names to match on in the right DataFrame.
312
314
313
315
.. versionadded:: 0.19.2
314
316
@@ -997,27 +999,46 @@ def get_result(self):
997
999
return result
998
1000
999
1001
1000
- _asof_functions = {
1001
- 'int64_t' : _join .asof_join_int64_t ,
1002
- 'double' : _join .asof_join_double ,
1003
- }
1002
+ def _asof_function (on_type ):
1003
+ return getattr (_join , 'asof_join_%s' % on_type , None )
1004
+
1005
+
1006
+ def _asof_by_function (on_type , by_type ):
1007
+ return getattr (_join , 'asof_join_%s_by_%s' % (on_type , by_type ), None )
1004
1008
1005
- _asof_by_functions = {
1006
- ('int64_t' , 'int64_t' ): _join .asof_join_int64_t_by_int64_t ,
1007
- ('double' , 'int64_t' ): _join .asof_join_double_by_int64_t ,
1008
- ('int64_t' , 'object' ): _join .asof_join_int64_t_by_object ,
1009
- ('double' , 'object' ): _join .asof_join_double_by_object ,
1010
- }
1011
1009
1012
1010
_type_casters = {
1013
1011
'int64_t' : _ensure_int64 ,
1014
1012
'double' : _ensure_float64 ,
1015
1013
'object' : _ensure_object ,
1016
1014
}
1017
1015
1016
+ _cython_types = {
1017
+ 'uint8' : 'uint8_t' ,
1018
+ 'uint32' : 'uint32_t' ,
1019
+ 'uint16' : 'uint16_t' ,
1020
+ 'uint64' : 'uint64_t' ,
1021
+ 'int8' : 'int8_t' ,
1022
+ 'int32' : 'int32_t' ,
1023
+ 'int16' : 'int16_t' ,
1024
+ 'int64' : 'int64_t' ,
1025
+ 'float16' : 'error' ,
1026
+ 'float32' : 'float' ,
1027
+ 'float64' : 'double' ,
1028
+ }
1029
+
1018
1030
1019
1031
def _get_cython_type (dtype ):
1020
- """ Given a dtype, return 'int64_t', 'double', or 'object' """
1032
+ """ Given a dtype, return a C name like 'int64_t' or 'double' """
1033
+ type_name = _get_dtype (dtype ).name
1034
+ ctype = _cython_types .get (type_name , 'object' )
1035
+ if ctype == 'error' :
1036
+ raise MergeError ('unsupported type: ' + type_name )
1037
+ return ctype
1038
+
1039
+
1040
+ def _get_cython_type_upcast (dtype ):
1041
+ """ Upcast a dtype to 'int64_t', 'double', or 'object' """
1021
1042
if is_integer_dtype (dtype ):
1022
1043
return 'int64_t'
1023
1044
elif is_float_dtype (dtype ):
@@ -1084,11 +1105,6 @@ def _validate_specification(self):
1084
1105
if not is_list_like (self .right_by ):
1085
1106
self .right_by = [self .right_by ]
1086
1107
1087
- if len (self .left_by ) != 1 :
1088
- raise MergeError ("can only asof by a single key" )
1089
- if len (self .right_by ) != 1 :
1090
- raise MergeError ("can only asof by a single key" )
1091
-
1092
1108
self .left_on = self .left_by + list (self .left_on )
1093
1109
self .right_on = self .right_by + list (self .right_on )
1094
1110
@@ -1142,6 +1158,13 @@ def _get_merge_keys(self):
1142
1158
def _get_join_indexers (self ):
1143
1159
""" return the join indexers """
1144
1160
1161
+ def flip (xs ):
1162
+ """ unlike np.transpose, this returns an array of tuples """
1163
+ labels = list (string .ascii_lowercase [:len (xs )])
1164
+ dtypes = [x .dtype for x in xs ]
1165
+ labeled_dtypes = list (zip (labels , dtypes ))
1166
+ return np .array (lzip (* xs ), labeled_dtypes )
1167
+
1145
1168
# values to compare
1146
1169
left_values = (self .left .index .values if self .left_index else
1147
1170
self .left_join_keys [- 1 ])
@@ -1165,22 +1188,23 @@ def _get_join_indexers(self):
1165
1188
1166
1189
# a "by" parameter requires special handling
1167
1190
if self .left_by is not None :
1168
- left_by_values = self .left_join_keys [0 ]
1169
- right_by_values = self .right_join_keys [0 ]
1170
-
1171
- # choose appropriate function by type
1172
- on_type = _get_cython_type (left_values .dtype )
1173
- by_type = _get_cython_type (left_by_values .dtype )
1191
+ if len (self .left_join_keys ) > 2 :
1192
+ # get tuple representation of values if more than one
1193
+ left_by_values = flip (self .left_join_keys [0 :- 1 ])
1194
+ right_by_values = flip (self .right_join_keys [0 :- 1 ])
1195
+ else :
1196
+ left_by_values = self .left_join_keys [0 ]
1197
+ right_by_values = self .right_join_keys [0 ]
1174
1198
1175
- on_type_caster = _type_casters [on_type ]
1199
+ # upcast 'by' parameter because HashTable is limited
1200
+ by_type = _get_cython_type_upcast (left_by_values .dtype )
1176
1201
by_type_caster = _type_casters [by_type ]
1177
- func = _asof_by_functions [(on_type , by_type )]
1178
-
1179
- left_values = on_type_caster (left_values )
1180
- right_values = on_type_caster (right_values )
1181
1202
left_by_values = by_type_caster (left_by_values )
1182
1203
right_by_values = by_type_caster (right_by_values )
1183
1204
1205
+ # choose appropriate function by type
1206
+ on_type = _get_cython_type (left_values .dtype )
1207
+ func = _asof_by_function (on_type , by_type )
1184
1208
return func (left_values ,
1185
1209
right_values ,
1186
1210
left_by_values ,
@@ -1190,12 +1214,7 @@ def _get_join_indexers(self):
1190
1214
else :
1191
1215
# choose appropriate function by type
1192
1216
on_type = _get_cython_type (left_values .dtype )
1193
- type_caster = _type_casters [on_type ]
1194
- func = _asof_functions [on_type ]
1195
-
1196
- left_values = type_caster (left_values )
1197
- right_values = type_caster (right_values )
1198
-
1217
+ func = _asof_function (on_type )
1199
1218
return func (left_values ,
1200
1219
right_values ,
1201
1220
self .allow_exact_matches ,
0 commit comments