Skip to content

"Backport PR #31679 on branch 1.0.x" #32088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Fixed regressions

- Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`)
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have some "merge conflicts" here, those are from other PRs (that were not backported yet .. we should more diligently follow up with backports to avoid this)

- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
-

.. ---------------------------------------------------------------------------
Expand Down
60 changes: 59 additions & 1 deletion pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
import datetime
import operator
from typing import Set, Tuple, Union
from typing import TYPE_CHECKING, Set, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -60,6 +60,9 @@
rxor,
)

if TYPE_CHECKING:
from pandas import DataFrame # noqa:F401

# -----------------------------------------------------------------------------
# constants
ARITHMETIC_BINOPS: Set[str] = {
Expand Down Expand Up @@ -675,6 +678,58 @@ def to_series(right):
return right


def _should_reindex_frame_op(
left: "DataFrame", right, axis, default_axis: int, fill_value, level
) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
"""
assert isinstance(left, ABCDataFrame)

if not isinstance(right, ABCDataFrame):
return False

if fill_value is None and level is None and axis is default_axis:
# TODO: any other cases we should handle here?
cols = left.columns.intersection(right.columns)
if not (cols.equals(left.columns) and cols.equals(right.columns)):
return True

return False


def _frame_arith_method_with_reindex(
left: "DataFrame", right: "DataFrame", op
) -> "DataFrame":
"""
For DataFrame-with-DataFrame operations that require reindexing,
operate only on shared columns, then reindex.

Parameters
----------
left : DataFrame
right : DataFrame
op : binary operator

Returns
-------
DataFrame
"""
# GH#31623, only operate on shared columns
cols = left.columns.intersection(right.columns)

new_left = left[cols]
new_right = right[cols]
result = op(new_left, new_right)

# Do the join on the columns instead of using _align_method_FRAME
# to avoid constructing two potentially large/sparse DataFrames
join_columns, _, _ = left.columns.join(
right.columns, how="outer", level=None, return_indexers=True
)
return result.reindex(join_columns, axis=1)


def _arith_method_FRAME(cls, op, special):
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
Expand All @@ -692,6 +747,9 @@ def _arith_method_FRAME(cls, op, special):
@Appender(doc)
def f(self, other, axis=default_axis, level=None, fill_value=None):

if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level):
return _frame_arith_method_with_reindex(self, other, op)

other = _align_method_FRAME(self, other, axis)

if isinstance(other, ABCDataFrame):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator
expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
tm.assert_frame_equal(result, expected)

def test_frame_with_frame_reindex(self):
# GH#31623
df = pd.DataFrame(
{
"foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
"bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
},
columns=["foo", "bar"],
)
df2 = df[["foo"]]

result = df - df2

expected = pd.DataFrame(
{"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
columns=["bar", "foo"],
)
tm.assert_frame_equal(result, expected)


def test_frame_with_zero_len_series_corner_cases():
# GH#28600
Expand Down