diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 083242cd69b74..3b626ddced5bf 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -772,7 +772,7 @@ Reshaping - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) - Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) -- +- Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) Other ^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6d8dcb8a1ca89..a6417f821a4e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5328,18 +5328,17 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', raise ValueError('Joining multiple DataFrames only supported' ' for joining on index') - # join indexes only using concat - if how == 'left': - how = 'outer' - join_axes = [self.index] - else: - join_axes = None - frames = [self] + list(other) can_concat = all(df.index.is_unique for df in frames) + # join indexes only using concat if can_concat: + if how == 'left': + how = 'outer' + join_axes = [self.index] + else: + join_axes = None return concat(frames, axis=1, join=how, join_axes=join_axes, verify_integrity=True) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index afecba2026dd7..ccdba6df2521a 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -165,3 +165,20 @@ def test_join_period_index(frame_with_period_index): index=frame_with_period_index.index) tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({'a': [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({'b': [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({'c': [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how='left') + + expected = DataFrame({ + 'a': [0, 10, 10, 20], + 'b': [np.nan, 300, 300, 200], + 'c': [np.nan, 400, 500, np.nan] + }, index=[1, 2, 2, 3]) + + tm.assert_frame_equal(joined, expected)