Skip to content

Commit 8451b99

Browse files
jbrockmendelroberthdevries
authored andcommitted
REGR: fix op(frame, frame2) with reindex (pandas-dev#31679)
1 parent fba4e46 commit 8451b99

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
2020
- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
2121
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
22+
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

pandas/core/ops/__init__.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66
import datetime
77
import operator
8-
from typing import Optional, Set, Tuple, Union
8+
from typing import TYPE_CHECKING, Optional, Set, Tuple, Union
99

1010
import numpy as np
1111

@@ -61,6 +61,9 @@
6161
rxor,
6262
)
6363

64+
if TYPE_CHECKING:
65+
from pandas import DataFrame # noqa:F401
66+
6467
# -----------------------------------------------------------------------------
6568
# constants
6669
ARITHMETIC_BINOPS: Set[str] = {
@@ -703,6 +706,58 @@ def to_series(right):
703706
return left, right
704707

705708

709+
def _should_reindex_frame_op(
710+
left: "DataFrame", right, axis, default_axis: int, fill_value, level
711+
) -> bool:
712+
"""
713+
Check if this is an operation between DataFrames that will need to reindex.
714+
"""
715+
assert isinstance(left, ABCDataFrame)
716+
717+
if not isinstance(right, ABCDataFrame):
718+
return False
719+
720+
if fill_value is None and level is None and axis is default_axis:
721+
# TODO: any other cases we should handle here?
722+
cols = left.columns.intersection(right.columns)
723+
if not (cols.equals(left.columns) and cols.equals(right.columns)):
724+
return True
725+
726+
return False
727+
728+
729+
def _frame_arith_method_with_reindex(
730+
left: "DataFrame", right: "DataFrame", op
731+
) -> "DataFrame":
732+
"""
733+
For DataFrame-with-DataFrame operations that require reindexing,
734+
operate only on shared columns, then reindex.
735+
736+
Parameters
737+
----------
738+
left : DataFrame
739+
right : DataFrame
740+
op : binary operator
741+
742+
Returns
743+
-------
744+
DataFrame
745+
"""
746+
# GH#31623, only operate on shared columns
747+
cols = left.columns.intersection(right.columns)
748+
749+
new_left = left[cols]
750+
new_right = right[cols]
751+
result = op(new_left, new_right)
752+
753+
# Do the join on the columns instead of using _align_method_FRAME
754+
# to avoid constructing two potentially large/sparse DataFrames
755+
join_columns, _, _ = left.columns.join(
756+
right.columns, how="outer", level=None, return_indexers=True
757+
)
758+
return result.reindex(join_columns, axis=1)
759+
760+
706761
def _arith_method_FRAME(cls, op, special):
707762
str_rep = _get_opstr(op)
708763
op_name = _get_op_name(op, special)
@@ -720,6 +775,9 @@ def _arith_method_FRAME(cls, op, special):
720775
@Appender(doc)
721776
def f(self, other, axis=default_axis, level=None, fill_value=None):
722777

778+
if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level):
779+
return _frame_arith_method_with_reindex(self, other, op)
780+
723781
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)
724782

725783
if isinstance(other, ABCDataFrame):

pandas/tests/frame/test_arithmetic.py

+19
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator
711711
expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
712712
tm.assert_frame_equal(result, expected)
713713

714+
def test_frame_with_frame_reindex(self):
715+
# GH#31623
716+
df = pd.DataFrame(
717+
{
718+
"foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
719+
"bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
720+
},
721+
columns=["foo", "bar"],
722+
)
723+
df2 = df[["foo"]]
724+
725+
result = df - df2
726+
727+
expected = pd.DataFrame(
728+
{"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
729+
columns=["bar", "foo"],
730+
)
731+
tm.assert_frame_equal(result, expected)
732+
714733

715734
def test_frame_with_zero_len_series_corner_cases():
716735
# GH#28600

0 commit comments

Comments
 (0)