Skip to content

Commit d396111

Browse files
Fixed reindexing arith with duplicates (#35303)
Closes #35194
1 parent 697a538 commit d396111

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,7 @@ Numeric
953953
- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
954954
- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
955955
- Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)
956+
- Bug in arithmetic operations between ``DataFrame`` objects with non-overlapping columns with duplicate labels causing an infinite loop (:issue:`35194`)
956957
- Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`)
957958
- Bug in :meth:`Index.difference` incorrect results when comparing a :class:`Float64Index` and object :class:`Index` (:issue:`35217`)
958959
- Bug in :class:`DataFrame` reductions (e.g. ``df.min()``, ``df.max()``) with ``ExtensionArray`` dtypes (:issue:`34520`, :issue:`32651`)

pandas/core/ops/__init__.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
1818
from pandas.core.dtypes.missing import isna
1919

20+
from pandas.core import algorithms
2021
from pandas.core.construction import extract_array
2122
from pandas.core.ops.array_ops import (
2223
arithmetic_op,
@@ -562,18 +563,32 @@ def _frame_arith_method_with_reindex(
562563
DataFrame
563564
"""
564565
# GH#31623, only operate on shared columns
565-
cols = left.columns.intersection(right.columns)
566+
cols, lcols, rcols = left.columns.join(
567+
right.columns, how="inner", level=None, return_indexers=True
568+
)
566569

567-
new_left = left[cols]
568-
new_right = right[cols]
570+
new_left = left.iloc[:, lcols]
571+
new_right = right.iloc[:, rcols]
569572
result = op(new_left, new_right)
570573

571574
# Do the join on the columns instead of using _align_method_FRAME
572575
# to avoid constructing two potentially large/sparse DataFrames
573576
join_columns, _, _ = left.columns.join(
574577
right.columns, how="outer", level=None, return_indexers=True
575578
)
576-
return result.reindex(join_columns, axis=1)
579+
580+
if result.columns.has_duplicates:
581+
# Avoid reindexing with a duplicate axis.
582+
# https://github.com/pandas-dev/pandas/issues/35194
583+
indexer, _ = result.columns.get_indexer_non_unique(join_columns)
584+
indexer = algorithms.unique1d(indexer)
585+
result = result._reindex_with_indexers(
586+
{1: [join_columns, indexer]}, allow_dups=True
587+
)
588+
else:
589+
result = result.reindex(join_columns, axis=1)
590+
591+
return result
577592

578593

579594
def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int):

pandas/tests/frame/test_arithmetic.py

+9
Original file line numberDiff line numberDiff line change
@@ -1552,3 +1552,12 @@ def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
15521552
expected = expected.astype({"b": col_dtype})
15531553
result = df + pd.Series([-1.0], index=list("a"))
15541554
tm.assert_frame_equal(result, expected)
1555+
1556+
1557+
def test_arith_reindex_with_duplicates():
1558+
# https://github.com/pandas-dev/pandas/issues/35194
1559+
df1 = pd.DataFrame(data=[[0]], columns=["second"])
1560+
df2 = pd.DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"])
1561+
result = df1 + df2
1562+
expected = pd.DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"])
1563+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)