30
30
@Appender (_merge_doc , indents = 0 )
31
31
def merge (left , right , how = 'inner' , on = None , left_on = None , right_on = None ,
32
32
left_index = False , right_index = False , sort = False ,
33
- suffixes = ('_x' , '_y' ), copy = True ):
33
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
34
34
op = _MergeOperation (left , right , how = how , on = on , left_on = left_on ,
35
35
right_on = right_on , left_index = left_index ,
36
36
right_index = right_index , sort = sort , suffixes = suffixes ,
37
- copy = copy )
37
+ copy = copy , indicator = indicator )
38
38
return op .get_result ()
39
39
if __debug__ :
40
40
merge .__doc__ = _merge_doc % '\n left : DataFrame'
@@ -160,7 +160,7 @@ class _MergeOperation(object):
160
160
def __init__ (self , left , right , how = 'inner' , on = None ,
161
161
left_on = None , right_on = None , axis = 1 ,
162
162
left_index = False , right_index = False , sort = True ,
163
- suffixes = ('_x' , '_y' ), copy = True ):
163
+ suffixes = ('_x' , '_y' ), copy = True , indicator = False ):
164
164
self .left = self .orig_left = left
165
165
self .right = self .orig_right = right
166
166
self .how = how
@@ -177,12 +177,18 @@ def __init__(self, left, right, how='inner', on=None,
177
177
self .left_index = left_index
178
178
self .right_index = right_index
179
179
180
+ self .indicator = indicator
181
+
180
182
# note this function has side effects
181
183
(self .left_join_keys ,
182
184
self .right_join_keys ,
183
185
self .join_names ) = self ._get_merge_keys ()
184
186
185
187
def get_result (self ):
188
+ if self .indicator :
189
+ # Has side-effects. Most cleaned up in `self._indicator_post_merge`
190
+ self ._indicator_pre_merge (self .left , self .right )
191
+
186
192
join_index , left_indexer , right_indexer = self ._get_join_info ()
187
193
188
194
ldata , rdata = self .left ._data , self .right ._data
@@ -202,10 +208,41 @@ def get_result(self):
202
208
typ = self .left ._constructor
203
209
result = typ (result_data ).__finalize__ (self , method = 'merge' )
204
210
211
+ if self .indicator :
212
+ # Has side-effects
213
+ self ._indicator_post_merge (result , self .left , self .right )
214
+
205
215
self ._maybe_add_join_keys (result , left_indexer , right_indexer )
206
216
207
217
return result
208
218
219
+ def _indicator_pre_merge (self , left , right ):
220
+
221
+ columns = left .columns .values .tolist () + right .columns .values .tolist ()
222
+
223
+ for i in ['_left_indicator' , '_right_indicator' , '_merge' ]:
224
+ if i in columns :
225
+ raise ValueError ("Cannot use `indicator=True` option when data contains a column named {}" .format (i ))
226
+
227
+ left ['_left_indicator' ] = 1
228
+ left ['_left_indicator' ] = left ['_left_indicator' ].astype ('int8' )
229
+
230
+ right ['_right_indicator' ] = 2
231
+ right ['_right_indicator' ] = right ['_right_indicator' ].astype ('int8' )
232
+
233
+
234
+ def _indicator_post_merge (self , result , left , right ):
235
+ result ['_left_indicator' ].fillna (0 , inplace = True )
236
+ result ['_right_indicator' ].fillna (0 , inplace = True )
237
+
238
+ result ['_merge' ] = Categorical ((result ['_left_indicator' ] + result ['_right_indicator' ]), categories = [1 ,2 ,3 ])
239
+ result ['_merge' ].cat .rename_categories (['left_only' , 'right_only' , 'both' ], inplace = True )
240
+
241
+ # Cleanup
242
+ result .drop (labels = ['_left_indicator' , '_right_indicator' ], axis = 1 , inplace = True )
243
+ left .drop (labels = ['_left_indicator' ], axis = 1 , inplace = True )
244
+ right .drop (labels = ['_right_indicator' ], axis = 1 , inplace = True )
245
+
209
246
def _maybe_add_join_keys (self , result , left_indexer , right_indexer ):
210
247
# insert group keys
211
248
0 commit comments