Skip to content

Commit 7559522

Browse files
jdrudolphjreback
authored andcommitted
BUG: allow duplicate column names if they are not merged upon, pandas-dev#10639
1 parent 1d295cd commit 7559522

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ Other enhancements
108108
- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`).
109109
- ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`)
110110

111+
- ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`).
112+
111113
.. _whatsnew_0170.api:
112114

113115
.. _whatsnew_0170.api_breaking:

pandas/tools/merge.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -402,19 +402,14 @@ def _validate_specification(self):
402402
if self.left_on is None:
403403
raise MergeError('Must pass left_on or left_index=True')
404404
else:
405-
if not self.left.columns.is_unique:
406-
raise MergeError("Left data columns not unique: %s"
407-
% repr(self.left.columns))
408-
409-
if not self.right.columns.is_unique:
410-
raise MergeError("Right data columns not unique: %s"
411-
% repr(self.right.columns))
412-
413405
# use the common columns
414406
common_cols = self.left.columns.intersection(
415407
self.right.columns)
416408
if len(common_cols) == 0:
417409
raise MergeError('No common columns to perform merge on')
410+
if not common_cols.is_unique:
411+
raise MergeError("Data columns not unique: %s"
412+
% repr(common_cols))
418413
self.left_on = self.right_on = common_cols
419414
elif self.on is not None:
420415
if self.left_on is not None or self.right_on is not None:

pandas/tools/tests/test_merge.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,6 @@ def test_join_append_timedeltas(self):
843843
assert_frame_equal(result, expected)
844844

845845
def test_overlapping_columns_error_message(self):
846-
# #2649
847846
df = DataFrame({'key': [1, 2, 3],
848847
'v1': [4, 5, 6],
849848
'v2': [7, 8, 9]})
@@ -853,7 +852,16 @@ def test_overlapping_columns_error_message(self):
853852

854853
df.columns = ['key', 'foo', 'foo']
855854
df2.columns = ['key', 'bar', 'bar']
856-
855+
expected = DataFrame({'key': [1, 2, 3],
856+
'v1': [4, 5, 6],
857+
'v2': [7, 8, 9],
858+
'v3': [4, 5, 6],
859+
'v4': [7, 8, 9]})
860+
expected.columns = ['key', 'foo', 'foo', 'bar', 'bar']
861+
assert_frame_equal(merge(df, df2), expected)
862+
863+
# #2649, #10639
864+
df2.columns = ['key1', 'foo', 'foo']
857865
self.assertRaises(ValueError, merge, df, df2)
858866

859867
def _check_merge(x, y):

0 commit comments

Comments
 (0)