Skip to content

Commit d35c84b

Browse files
committed
Merge pull request #11154 from chris-b1/merge-indicator-method
ENH: add merge indicator to DataFrame.merge
2 parents 6d048d9 + 23802d4 commit d35c84b

File tree

2 files changed

+32
-14
lines changed

2 files changed

+32
-14
lines changed

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4265,12 +4265,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
42654265
@Appender(_merge_doc, indents=2)
42664266
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
42674267
left_index=False, right_index=False, sort=False,
4268-
suffixes=('_x', '_y'), copy=True):
4268+
suffixes=('_x', '_y'), copy=True, indicator=False):
42694269
from pandas.tools.merge import merge
42704270
return merge(self, right, how=how, on=on,
42714271
left_on=left_on, right_on=right_on,
42724272
left_index=left_index, right_index=right_index, sort=sort,
4273-
suffixes=suffixes, copy=copy)
4273+
suffixes=suffixes, copy=copy, indicator=indicator)
42744274

42754275
def round(self, decimals=0, out=None):
42764276
"""

pandas/tools/tests/test_merge.py

+30-12
Original file line numberDiff line numberDiff line change
@@ -951,25 +951,27 @@ def test_indicator(self):
951951
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b'], 'col_conflict':[1,2]})
952952
df1_copy = df1.copy()
953953

954-
df2 = pd.DataFrame({'col1':[1,2,3,4,5],'col_right':[2,2,2,2,2],
954+
df2 = pd.DataFrame({'col1':[1,2,3,4,5],'col_right':[2,2,2,2,2],
955955
'col_conflict':[1,2,3,4,5]})
956956
df2_copy = df2.copy()
957-
958-
df_result = pd.DataFrame({'col1':[0,1,2,3,4,5],
957+
958+
df_result = pd.DataFrame({'col1':[0,1,2,3,4,5],
959959
'col_conflict_x':[1,2,np.nan,np.nan,np.nan,np.nan],
960-
'col_left':['a','b', np.nan,np.nan,np.nan,np.nan],
961-
'col_conflict_y':[np.nan,1,2,3,4,5],
960+
'col_left':['a','b', np.nan,np.nan,np.nan,np.nan],
961+
'col_conflict_y':[np.nan,1,2,3,4,5],
962962
'col_right':[np.nan, 2,2,2,2,2]},
963963
dtype='float64')
964964
df_result['_merge'] = pd.Categorical(['left_only','both','right_only',
965965
'right_only','right_only','right_only']
966966
, categories=['left_only', 'right_only', 'both'])
967967

968-
df_result = df_result[['col1', 'col_conflict_x', 'col_left',
968+
df_result = df_result[['col1', 'col_conflict_x', 'col_left',
969969
'col_conflict_y', 'col_right', '_merge' ]]
970970

971971
test = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
972972
assert_frame_equal(test, df_result)
973+
test = df1.merge(df2, on='col1', how='outer', indicator=True)
974+
assert_frame_equal(test, df_result)
973975

974976
# No side effects
975977
assert_frame_equal(df1, df1_copy)
@@ -981,49 +983,65 @@ def test_indicator(self):
981983

982984
test_custom_name = pd.merge(df1, df2, on='col1', how='outer', indicator='custom_name')
983985
assert_frame_equal(test_custom_name, df_result_custom_name)
986+
test_custom_name = df1.merge(df2, on='col1', how='outer', indicator='custom_name')
987+
assert_frame_equal(test_custom_name, df_result_custom_name)
984988

985989
# Check only accepts strings and booleans
986990
with tm.assertRaises(ValueError):
987991
pd.merge(df1, df2, on='col1', how='outer', indicator=5)
992+
with tm.assertRaises(ValueError):
993+
df1.merge(df2, on='col1', how='outer', indicator=5)
988994

989995
# Check result integrity
990-
996+
991997
test2 = pd.merge(df1, df2, on='col1', how='left', indicator=True)
992998
self.assertTrue((test2._merge != 'right_only').all())
999+
test2 = df1.merge(df2, on='col1', how='left', indicator=True)
1000+
self.assertTrue((test2._merge != 'right_only').all())
9931001

9941002
test3 = pd.merge(df1, df2, on='col1', how='right', indicator=True)
9951003
self.assertTrue((test3._merge != 'left_only').all())
1004+
test3 = df1.merge(df2, on='col1', how='right', indicator=True)
1005+
self.assertTrue((test3._merge != 'left_only').all())
9961006

9971007
test4 = pd.merge(df1, df2, on='col1', how='inner', indicator=True)
9981008
self.assertTrue((test4._merge == 'both').all())
1009+
test4 = df1.merge(df2, on='col1', how='inner', indicator=True)
1010+
self.assertTrue((test4._merge == 'both').all())
9991011

10001012
# Check if working name in df
10011013
for i in ['_right_indicator', '_left_indicator', '_merge']:
10021014
df_badcolumn = pd.DataFrame({'col1':[1,2], i:[2,2]})
1003-
1015+
10041016
with tm.assertRaises(ValueError):
10051017
pd.merge(df1, df_badcolumn, on='col1', how='outer', indicator=True)
1018+
with tm.assertRaises(ValueError):
1019+
df1.merge(df_badcolumn, on='col1', how='outer', indicator=True)
10061020

10071021
# Check for name conflict with custom name
10081022
df_badcolumn = pd.DataFrame({'col1':[1,2], 'custom_column_name':[2,2]})
1009-
1023+
10101024
with tm.assertRaises(ValueError):
10111025
pd.merge(df1, df_badcolumn, on='col1', how='outer', indicator='custom_column_name')
1026+
with tm.assertRaises(ValueError):
1027+
df1.merge(df_badcolumn, on='col1', how='outer', indicator='custom_column_name')
10121028

10131029
# Merge on multiple columns
10141030
df3 = pd.DataFrame({'col1':[0,1], 'col2':['a','b']})
10151031

10161032
df4 = pd.DataFrame({'col1':[1,1,3], 'col2':['b','x','y']})
10171033

1018-
hand_coded_result = pd.DataFrame({'col1':[0,1,1,3.0],
1034+
hand_coded_result = pd.DataFrame({'col1':[0,1,1,3.0],
10191035
'col2':['a','b','x','y']})
10201036
hand_coded_result['_merge'] = pd.Categorical(
10211037
['left_only','both','right_only','right_only']
10221038
, categories=['left_only', 'right_only', 'both'])
1023-
1039+
10241040
test5 = pd.merge(df3, df4, on=['col1', 'col2'], how='outer', indicator=True)
10251041
assert_frame_equal(test5, hand_coded_result)
1026-
1042+
test5 = df3.merge(df4, on=['col1', 'col2'], how='outer', indicator=True)
1043+
assert_frame_equal(test5, hand_coded_result)
1044+
10271045

10281046
def _check_merge(x, y):
10291047
for how in ['inner', 'left', 'outer']:

0 commit comments

Comments
 (0)