@@ -97,13 +97,27 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
97
97
98
98
# insert group keys
99
99
for i , name in enumerate (self .join_names ):
100
- # a faster way?
101
- key_col = com .take_1d (self .left_join_keys [i ], left_indexer )
102
- na_indexer = (left_indexer == - 1 ).nonzero ()[0 ]
103
- right_na_indexer = right_indexer .take (na_indexer )
104
- key_col .put (na_indexer , com .take_1d (self .right_join_keys [i ],
105
- right_na_indexer ))
106
- result .insert (i , name , key_col )
100
+ if name in result :
101
+ key_col = result [name ]
102
+
103
+ if name in self .left :
104
+ na_indexer = (left_indexer == - 1 ).nonzero ()[0 ]
105
+ right_na_indexer = right_indexer .take (na_indexer )
106
+ key_col .put (na_indexer , com .take_1d (self .right_join_keys [i ],
107
+ right_na_indexer ))
108
+ else :
109
+ na_indexer = (right_indexer == - 1 ).nonzero ()[0 ]
110
+ left_na_indexer = right_indexer .take (na_indexer )
111
+ key_col .put (na_indexer , com .take_1d (self .left_join_keys [i ],
112
+ left_na_indexer ))
113
+ else :
114
+ # a faster way?
115
+ key_col = com .take_1d (self .left_join_keys [i ], left_indexer )
116
+ na_indexer = (left_indexer == - 1 ).nonzero ()[0 ]
117
+ right_na_indexer = right_indexer .take (na_indexer )
118
+ key_col .put (na_indexer , com .take_1d (self .right_join_keys [i ],
119
+ right_na_indexer ))
120
+ result .insert (i , name , key_col )
107
121
108
122
def _get_join_info (self ):
109
123
left_ax = self .left ._data .axes [self .axis ]
@@ -144,17 +158,8 @@ def _get_merge_data(self):
144
158
"""
145
159
ldata , rdata = self .left ._data , self .right ._data
146
160
lsuf , rsuf = self .suffixes
147
- exclude_names = set (x for x in self .join_names if x is not None )
148
- if self .left_on is not None :
149
- exclude_names -= set (c .name if hasattr (c , 'name' ) else c
150
- for c in self .left_on )
151
- if self .right_on is not None :
152
- exclude_names -= set (c .name if hasattr (c , 'name' ) else c
153
- for c in self .right_on )
154
161
ldata , rdata = ldata ._maybe_rename_join (rdata , lsuf , rsuf ,
155
- exclude = exclude_names ,
156
162
copydata = False )
157
-
158
163
return ldata , rdata
159
164
160
165
def _get_merge_keys (self ):
@@ -172,8 +177,6 @@ def _get_merge_keys(self):
172
177
left_keys, right_keys
173
178
"""
174
179
# Hm, any way to make this logic less complicated??
175
- join_names = []
176
-
177
180
if (self .on is None and self .left_on is None
178
181
and self .right_on is None ):
179
182
@@ -198,18 +201,56 @@ def _get_merge_keys(self):
198
201
self .left_on = self .right_on = self .on
199
202
self .drop_keys = True
200
203
201
- # this is a touch kludgy, but accomplishes the goal
202
- left_keys = None
203
- if self .left_on is not None :
204
- self .left , left_keys , left_names = \
205
- _get_keys (self .left , self .left_on , drop = self .drop_keys )
206
- join_names = left_names
207
-
208
- right_keys = None
209
- if self .right_on is not None :
210
- self .right , right_keys , right_names = \
211
- _get_keys (self .right , self .right_on , drop = self .drop_keys )
212
- join_names = right_names
204
+ left_keys = []
205
+ right_keys = []
206
+ join_names = []
207
+ left_drop , right_drop = [], []
208
+ left , right = self .left , self .right
209
+
210
+ is_lkey = lambda x : isinstance (x , np .ndarray ) and len (x ) == len (left )
211
+ is_rkey = lambda x : isinstance (x , np .ndarray ) and len (x ) == len (right )
212
+
213
+ # ugh, spaghetti re #733
214
+ if _any (self .left_on ) and _any (self .right_on ):
215
+ for lk , rk in zip (self .left_on , self .right_on ):
216
+ if is_lkey (lk ):
217
+ left_keys .append (lk )
218
+ if is_rkey (rk ):
219
+ right_keys .append (rk )
220
+ join_names .append (None ) # what to do?
221
+ else :
222
+ right_keys .append (right [rk ].values )
223
+ join_names .append (rk )
224
+ else :
225
+ if not is_rkey (rk ):
226
+ right_keys .append (right [rk ].values )
227
+ if lk == rk :
228
+ right_drop .append (rk )
229
+ else :
230
+ right_keys .append (rk )
231
+ left_keys .append (left [lk ].values )
232
+ join_names .append (lk )
233
+ elif _any (self .left_on ):
234
+ for k in self .left_on :
235
+ if is_lkey (k ):
236
+ left_keys .append (k )
237
+ join_names .append (None )
238
+ else :
239
+ left_keys .append (left [k ].values )
240
+ join_names .append (k )
241
+ elif _any (self .right_on ):
242
+ for k in self .right_on :
243
+ if is_rkey (k ):
244
+ right_keys .append (k )
245
+ join_names .append (None )
246
+ else :
247
+ right_keys .append (right [k ].values )
248
+ join_names .append (k )
249
+
250
+ if right_drop :
251
+ self .right = self .right .drop (right_drop , axis = 1 )
252
+ if left_drop :
253
+ self .left = self .left .drop (left_drop , axis = 1 )
213
254
214
255
return left_keys , right_keys , join_names
215
256
@@ -271,7 +312,8 @@ def _get_group_keys(self):
271
312
sort = self .sort )
272
313
return left_group_key , right_group_key , max_groups
273
314
274
- def _get_keys (frame , on , drop = False ):
315
+
316
+ def _get_join_keys (left , right , left_on , right_on , drop = False ):
275
317
to_drop = []
276
318
keys = []
277
319
names = []
@@ -993,3 +1035,5 @@ def _all_indexes_same(indexes):
993
1035
return False
994
1036
return True
995
1037
1038
+ def _any (x ):
1039
+ return x is not None and len (x ) > 0
0 commit comments