Skip to content

Commit e2ea151

Browse files
elrubiojreback
authored andcommitted
Fix left join turning into outer join (#19624)
1 parent bae38fc commit e2ea151

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ Reshaping
813813
- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`)
814814
- Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`)
815815
- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`)
816-
-
816+
- Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`)
817817

818818
Other
819819
^^^^^

pandas/core/frame.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -5345,18 +5345,17 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
53455345
raise ValueError('Joining multiple DataFrames only supported'
53465346
' for joining on index')
53475347

5348-
# join indexes only using concat
5349-
if how == 'left':
5350-
how = 'outer'
5351-
join_axes = [self.index]
5352-
else:
5353-
join_axes = None
5354-
53555348
frames = [self] + list(other)
53565349

53575350
can_concat = all(df.index.is_unique for df in frames)
53585351

5352+
# join indexes only using concat
53595353
if can_concat:
5354+
if how == 'left':
5355+
how = 'outer'
5356+
join_axes = [self.index]
5357+
else:
5358+
join_axes = None
53605359
return concat(frames, axis=1, join=how, join_axes=join_axes,
53615360
verify_integrity=True)
53625361

pandas/tests/frame/test_join.py

+17
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,20 @@ def test_join_period_index(frame_with_period_index):
165165
index=frame_with_period_index.index)
166166

167167
tm.assert_frame_equal(joined, expected)
168+
169+
170+
def test_join_left_sequence_non_unique_index():
171+
# https://github.com/pandas-dev/pandas/issues/19607
172+
df1 = DataFrame({'a': [0, 10, 20]}, index=[1, 2, 3])
173+
df2 = DataFrame({'b': [100, 200, 300]}, index=[4, 3, 2])
174+
df3 = DataFrame({'c': [400, 500, 600]}, index=[2, 2, 4])
175+
176+
joined = df1.join([df2, df3], how='left')
177+
178+
expected = DataFrame({
179+
'a': [0, 10, 10, 20],
180+
'b': [np.nan, 300, 300, 200],
181+
'c': [np.nan, 400, 500, np.nan]
182+
}, index=[1, 2, 2, 3])
183+
184+
tm.assert_frame_equal(joined, expected)

0 commit comments

Comments
 (0)