diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 814dbe999d5c1..37b4a182b6a9d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -915,6 +915,7 @@ Numeric - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) +- Bug in arithmetic operations between ``DataFrame`` objects with non-overlapping columns with duplicate labels causing an infinite loop (:issue:`35194`) - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) Conversion diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 5dd94a8af74ac..60f3d23aaed13 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -17,6 +17,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna +from pandas.core import algorithms from pandas.core.construction import extract_array from pandas.core.ops.array_ops import ( arithmetic_op, @@ -562,10 +563,12 @@ def _frame_arith_method_with_reindex( DataFrame """ # GH#31623, only operate on shared columns - cols = left.columns.intersection(right.columns) + cols, lcols, rcols = left.columns.join( + right.columns, how="inner", level=None, return_indexers=True + ) - new_left = left[cols] - new_right = right[cols] + new_left = left.iloc[:, lcols] + new_right = right.iloc[:, rcols] result = op(new_left, new_right) # Do the join on the columns instead of using _align_method_FRAME @@ -573,7 +576,19 @@ def _frame_arith_method_with_reindex( join_columns, _, _ = left.columns.join( right.columns, how="outer", level=None, return_indexers=True ) - return result.reindex(join_columns, axis=1) + + if result.columns.has_duplicates: + # Avoid reindexing with a duplicate axis. + # https://github.com/pandas-dev/pandas/issues/35194 + indexer, _ = result.columns.get_indexer_non_unique(join_columns) + indexer = algorithms.unique1d(indexer) + result = result._reindex_with_indexers( + {1: [join_columns, indexer]}, allow_dups=True + ) + else: + result = result.reindex(join_columns, axis=1) + + return result def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index a6b0ece58b095..e17357e9845b5 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1552,3 +1552,12 @@ def test_dataframe_operation_with_non_numeric_types(df, col_dtype): expected = expected.astype({"b": col_dtype}) result = df + pd.Series([-1.0], index=list("a")) tm.assert_frame_equal(result, expected) + + +def test_arith_reindex_with_duplicates(): + # https://github.com/pandas-dev/pandas/issues/35194 + df1 = pd.DataFrame(data=[[0]], columns=["second"]) + df2 = pd.DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"]) + result = df1 + df2 + expected = pd.DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"]) + tm.assert_frame_equal(result, expected)