Skip to content

Commit 99c14ed

Browse files
REGR: fix op(frame, frame2) with reindex (pandas-dev#31679) (pandas-dev#32088)
Co-authored-by: Simon Hawkins <[email protected]>
1 parent 37a4f4f commit 99c14ed

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
2020
- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
2121
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
22+
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

pandas/core/ops/__init__.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66
import datetime
77
import operator
8-
from typing import Set, Tuple, Union
8+
from typing import TYPE_CHECKING, Set, Tuple, Union
99

1010
import numpy as np
1111

@@ -60,6 +60,9 @@
6060
rxor,
6161
)
6262

63+
if TYPE_CHECKING:
64+
from pandas import DataFrame # noqa:F401
65+
6366
# -----------------------------------------------------------------------------
6467
# constants
6568
ARITHMETIC_BINOPS: Set[str] = {
@@ -675,6 +678,58 @@ def to_series(right):
675678
return right
676679

677680

681+
def _should_reindex_frame_op(
682+
left: "DataFrame", right, axis, default_axis: int, fill_value, level
683+
) -> bool:
684+
"""
685+
Check if this is an operation between DataFrames that will need to reindex.
686+
"""
687+
assert isinstance(left, ABCDataFrame)
688+
689+
if not isinstance(right, ABCDataFrame):
690+
return False
691+
692+
if fill_value is None and level is None and axis is default_axis:
693+
# TODO: any other cases we should handle here?
694+
cols = left.columns.intersection(right.columns)
695+
if not (cols.equals(left.columns) and cols.equals(right.columns)):
696+
return True
697+
698+
return False
699+
700+
701+
def _frame_arith_method_with_reindex(
702+
left: "DataFrame", right: "DataFrame", op
703+
) -> "DataFrame":
704+
"""
705+
For DataFrame-with-DataFrame operations that require reindexing,
706+
operate only on shared columns, then reindex.
707+
708+
Parameters
709+
----------
710+
left : DataFrame
711+
right : DataFrame
712+
op : binary operator
713+
714+
Returns
715+
-------
716+
DataFrame
717+
"""
718+
# GH#31623, only operate on shared columns
719+
cols = left.columns.intersection(right.columns)
720+
721+
new_left = left[cols]
722+
new_right = right[cols]
723+
result = op(new_left, new_right)
724+
725+
# Do the join on the columns instead of using _align_method_FRAME
726+
# to avoid constructing two potentially large/sparse DataFrames
727+
join_columns, _, _ = left.columns.join(
728+
right.columns, how="outer", level=None, return_indexers=True
729+
)
730+
return result.reindex(join_columns, axis=1)
731+
732+
678733
def _arith_method_FRAME(cls, op, special):
679734
str_rep = _get_opstr(op)
680735
op_name = _get_op_name(op, special)
@@ -692,6 +747,9 @@ def _arith_method_FRAME(cls, op, special):
692747
@Appender(doc)
693748
def f(self, other, axis=default_axis, level=None, fill_value=None):
694749

750+
if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level):
751+
return _frame_arith_method_with_reindex(self, other, op)
752+
695753
other = _align_method_FRAME(self, other, axis)
696754

697755
if isinstance(other, ABCDataFrame):

pandas/tests/frame/test_arithmetic.py

+19
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator
696696
expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
697697
tm.assert_frame_equal(result, expected)
698698

699+
def test_frame_with_frame_reindex(self):
700+
# GH#31623
701+
df = pd.DataFrame(
702+
{
703+
"foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
704+
"bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
705+
},
706+
columns=["foo", "bar"],
707+
)
708+
df2 = df[["foo"]]
709+
710+
result = df - df2
711+
712+
expected = pd.DataFrame(
713+
{"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
714+
columns=["bar", "foo"],
715+
)
716+
tm.assert_frame_equal(result, expected)
717+
699718

700719
def test_frame_with_zero_len_series_corner_cases():
701720
# GH#28600

0 commit comments

Comments
 (0)