From 1f56d060a38f54960c36cbfa3f0f6dc060103f00 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 10 Jan 2024 07:41:20 -0500 Subject: [PATCH 1/2] PERF: join non-unique --- pandas/core/indexes/base.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 74c1f165ac06c..85b8c708768eb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4806,11 +4806,18 @@ def _join_non_unique( left_idx, right_idx = get_join_indexers_non_unique( self._values, other._values, how=how, sort=sort ) - mask = left_idx == -1 - join_idx = self.take(left_idx) - right = other.take(right_idx) - join_index = join_idx.putmask(mask, right) + if how == "right": + join_index = other.take(right_idx) + else: + join_index = self.take(left_idx) + + if how == "outer": + mask = left_idx == -1 + if mask.any(): + right = other.take(right_idx) + join_index = join_index.putmask(mask, right) + if isinstance(join_index, ABCMultiIndex) and how == "outer": # test_join_index_levels join_index = join_index._sort_levels_monotonic() From a847d18def7150936b6fa1fa876c6e5fd74469f9 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 10 Jan 2024 17:12:18 -0500 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v2.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 7b53ddb3923f0..2dc05ac8057c8 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -101,6 +101,7 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) -