Skip to content

Commit e11777e

Browse files
committed
BUG: implement multiple DataFrame.join / merge on non-unique indexes by multiple merges, close #1421
1 parent 490a80b commit e11777e

File tree

4 files changed

+51
-12
lines changed

4 files changed

+51
-12
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ pandas 0.8.0
182182
- Respect column selection for DataFrame in in GroupBy.transform (#1365)
183183
- Fix MultiIndex partial indexing bug (#1352)
184184
- Enable assignment of rows in mixed-type DataFrame via .ix (#1432)
185+
- Reset index mapping when grouping Series in Cython (#1423)
185186

186187
pandas 0.7.3
187188
============

pandas/core/frame.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3809,8 +3809,21 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
38093809
else:
38103810
join_axes = None
38113811

3812-
return concat([self] + list(other), axis=1, join=how,
3813-
join_axes=join_axes, verify_integrity=True)
3812+
frames = [self] + list(other)
3813+
3814+
can_concat = all(df.index.is_unique for df in frames)
3815+
3816+
if can_concat:
3817+
return concat(frames, axis=1, join=how, join_axes=join_axes,
3818+
verify_integrity=True)
3819+
3820+
joined = frames[0]
3821+
3822+
for frame in frames[1:]:
3823+
joined = merge(joined, frame, how=how,
3824+
left_index=True, right_index=True)
3825+
3826+
return joined
38143827

38153828
@Substitution('')
38163829
@Appender(_merge_doc, indents=2)

pandas/tools/merge.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,6 @@ def _get_merge_keys(self):
347347
self.left.index.labels)]
348348
else:
349349
left_keys = [self.left.index.values]
350-
# else:
351-
# left_keys.append(self.left.index)
352-
# right_keys.append(self.right.index)
353350

354351
if right_drop:
355352
self.right = self.right.drop(right_drop, axis=1)
@@ -1039,13 +1036,6 @@ def _get_new_axes(self):
10391036
ndim = self._get_result_dim()
10401037
new_axes = [None] * ndim
10411038

1042-
# if self.ignore_index:
1043-
# concat_axis = None
1044-
# else:
1045-
# concat_axis = self._get_concat_axis()
1046-
1047-
# new_axes[self.axis] = concat_axis
1048-
10491039
if self.join_axes is None:
10501040
for i in range(ndim):
10511041
if i == self.axis:

pandas/tools/tests/test_merge.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,41 @@ def test_join_float64_float32(self):
414414
expected = a.join(b.astype('f8'))
415415
assert_frame_equal(joined, expected)
416416

417+
def test_join_many_non_unique_index(self):
418+
df1 = DataFrame({"a": [1,1], "b": [1,1], "c": [10,20]})
419+
df2 = DataFrame({"a": [1,1], "b": [1,2], "d": [100,200]})
420+
df3 = DataFrame({"a": [1,1], "b": [1,2], "e": [1000,2000]})
421+
idf1 = df1.set_index(["a", "b"])
422+
idf2 = df2.set_index(["a", "b"])
423+
idf3 = df3.set_index(["a", "b"])
424+
425+
result = idf1.join([idf2, idf3], how='outer')
426+
427+
df_partially_merged = merge(df1, df2, on=['a', 'b'], how='outer')
428+
expected = merge(df_partially_merged, df3, on=['a', 'b'], how='outer')
429+
430+
result = result.reset_index()
431+
432+
result['a'] = result['a'].astype(np.float64)
433+
result['b'] = result['b'].astype(np.float64)
434+
435+
assert_frame_equal(result, expected.ix[:, result.columns])
436+
437+
df1 = DataFrame({"a": [1, 1, 1], "b": [1,1, 1], "c": [10,20, 30]})
438+
df2 = DataFrame({"a": [1, 1, 1], "b": [1,1, 2], "d": [100,200, 300]})
439+
df3 = DataFrame({"a": [1, 1, 1], "b": [1,1, 2], "e": [1000,2000, 3000]})
440+
idf1 = df1.set_index(["a", "b"])
441+
idf2 = df2.set_index(["a", "b"])
442+
idf3 = df3.set_index(["a", "b"])
443+
result = idf1.join([idf2, idf3], how='inner')
444+
445+
df_partially_merged = merge(df1, df2, on=['a', 'b'], how='inner')
446+
expected = merge(df_partially_merged, df3, on=['a', 'b'], how='inner')
447+
448+
result = result.reset_index()
449+
450+
assert_frame_equal(result, expected.ix[:, result.columns])
451+
417452
def test_merge_index_singlekey_right_vs_left(self):
418453
left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
419454
'v1': np.random.randn(7)})

0 commit comments

Comments
 (0)