PERF: Index.join to maintain cached attributes in more cases (pandas-dev#57023)

lukemanley · pmhatre1 · commit 107bef80be45 · 2024-05-06T23:12:52.000-07:00
* Index.join result name

* whatsnew

* update test

* Index._wrap_join_result to maintain cached attributes if possible

* Index._wrap_join_result to maintain cached attributes if possible

* whatsnew

* allow indexers to be None

* gh ref

* rename variables for clarity
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -105,6 +105,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
 - Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
+- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8012,19 +8012,17 @@ def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame:
         left = self
 
         # GH#31623, only operate on shared columns
-        cols, lcols, rcols = left.columns.join(
-            right.columns, how="inner", level=None, return_indexers=True
+        cols, lcol_indexer, rcol_indexer = left.columns.join(
+            right.columns, how="inner", return_indexers=True
         )
 
-        new_left = left.iloc[:, lcols]
-        new_right = right.iloc[:, rcols]
+        new_left = left if lcol_indexer is None else left.iloc[:, lcol_indexer]
+        new_right = right if rcol_indexer is None else right.iloc[:, rcol_indexer]
         result = op(new_left, new_right)
 
         # Do the join on the columns instead of using left._align_for_op
         #  to avoid constructing two potentially large/sparse DataFrames
-        join_columns, _, _ = left.columns.join(
-            right.columns, how="outer", level=None, return_indexers=True
-        )
+        join_columns = left.columns.join(right.columns, how="outer")
 
         if result.columns.has_duplicates:
             # Avoid reindexing with a duplicate axis.
diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py
@@ -35,7 +35,10 @@ def test_join_level(idx, other, join_type):
 
         assert join_index.equals(join_index2)
         tm.assert_numpy_array_equal(lidx, lidx2)
-        tm.assert_numpy_array_equal(ridx, ridx2)
+        if ridx is None:
+            assert ridx == ridx2
+        else:
+            tm.assert_numpy_array_equal(ridx, ridx2)
         tm.assert_numpy_array_equal(join_index2.values, exp_values)