Skip to content

Commit cf8412f

Browse files
author
Nick Eubank
committed
No inplace, but with duplication of input data
1 parent 76e4f33 commit cf8412f

File tree

2 files changed

+26
-17
lines changed

2 files changed

+26
-17
lines changed

pandas/tools/merge.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,7 @@ def __init__(self, left, right, how='inner', on=None,
186186

187187
def get_result(self):
188188
if self.indicator:
189-
# Has side-effects. Most cleaned up in `self._indicator_post_merge`
190-
self._indicator_pre_merge(self.left, self.right)
189+
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
191190

192191
join_index, left_indexer, right_indexer = self._get_join_info()
193192

@@ -209,8 +208,7 @@ def get_result(self):
209208
result = typ(result_data).__finalize__(self, method='merge')
210209

211210
if self.indicator:
212-
# Has side-effects
213-
self._indicator_post_merge(result, self.left, self.right)
211+
result = self._indicator_post_merge(result)
214212

215213
self._maybe_add_join_keys(result, left_indexer, right_indexer)
216214

@@ -224,24 +222,28 @@ def _indicator_pre_merge(self, left, right):
224222
if i in columns:
225223
raise ValueError("Cannot use `indicator=True` option when data contains a column named {}".format(i))
226224

225+
left = left.copy()
226+
right = right.copy()
227+
227228
left['_left_indicator'] = 1
228229
left['_left_indicator'] = left['_left_indicator'].astype('int8')
229230

230231
right['_right_indicator'] = 2
231-
right['_right_indicator'] = right['_right_indicator'].astype('int8')
232+
right['_right_indicator'] = right['_right_indicator'].astype('int8')
233+
234+
return left, right
232235

236+
def _indicator_post_merge(self, result):
237+
238+
result['_left_indicator'] = result['_left_indicator'].fillna(0)
239+
result['_right_indicator'] = result['_right_indicator'].fillna(0)
233240

234-
def _indicator_post_merge(self, result, left, right):
235-
result['_left_indicator'].fillna(0, inplace=True)
236-
result['_right_indicator'].fillna(0, inplace=True)
237-
238241
result['_merge'] = Categorical((result['_left_indicator'] + result['_right_indicator']), categories=[1,2,3])
239-
result['_merge'].cat.rename_categories(['left_only', 'right_only', 'both'], inplace=True)
242+
result['_merge'] = result['_merge'].cat.rename_categories(['left_only', 'right_only', 'both'])
240243

241-
# Cleanup
242-
result.drop(labels=['_left_indicator', '_right_indicator'], axis=1, inplace=True)
243-
left.drop(labels=['_left_indicator'], axis=1, inplace=True)
244-
right.drop(labels=['_right_indicator'], axis=1, inplace=True)
244+
result = result.drop(labels=['_left_indicator', '_right_indicator'], axis=1)
245+
246+
return result
245247

246248
def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
247249
# insert group keys

pandas/tools/tests/test_merge.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -858,23 +858,30 @@ def test_overlapping_columns_error_message(self):
858858

859859
def test_indicator(self):
860860

861-
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
861+
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]})
862862
df1_copy = df1.copy()
863863

864-
df2 = pd.DataFrame({'col1':[1,2,3,4,5],'col_right':[2,2,2,2,2]})
864+
df2 = pd.DataFrame({'col1':[1,2,3,4,5],'col_right':[2,2,2,2,2],
865+
'col_conflict':[1,2,3,4,5]})
865866
df2_copy = df2.copy()
866867

867868
df_result = pd.DataFrame({'col1':[0,1,2,3,4,5],
869+
'col_conflict_x':[1,2,np.nan,np.nan,np.nan,np.nan],
868870
'col_left':['a','b', np.nan,np.nan,np.nan,np.nan],
869-
'col_right':[np.nan, 2,2,2,2,2]},
871+
'col_conflict_y':[np.nan,1,2,3,4,5],
872+
'col_right':[np.nan, 2,2,2,2,2]},
870873
dtype='float64')
871874
df_result['_merge'] = pd.Categorical(['left_only','both','right_only',
872875
'right_only','right_only','right_only']
873876
, categories=['left_only', 'right_only', 'both'])
874877

878+
df_result = df_result[['col1', 'col_conflict_x', 'col_left',
879+
'col_conflict_y', 'col_right', '_merge' ]]
880+
875881
test = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
876882
assert_frame_equal(test, df_result)
877883

884+
878885
# No side effects
879886
assert_frame_equal(df1, df1_copy)
880887
assert_frame_equal(df2, df2_copy)

0 commit comments

Comments
 (0)