Skip to content

DEPR: enforce not-automatically aligning in DataFrame comparisons #49946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ Removal of prior version deprecations/changes
- Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`)
- Enforced deprecation disallowing passing ``numeric_only=True`` to :class:`Series` reductions (``rank``, ``any``, ``all``, ...) with non-numeric dtype (:issue:`47500`)
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
- Comparisons between a :class:`DataFrame` and a :class:`Series` where the frame's columns do not match the series's index raise ``ValueError`` instead of automatically aligning, do ``left, right = left.align(right, axis=1, copy=False)`` before comparing (:issue:`36795`)
- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import operator
from typing import TYPE_CHECKING
import warnings

import numpy as np

Expand All @@ -18,7 +17,6 @@
Level,
)
from pandas.util._decorators import Appender
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_array_like,
Expand Down Expand Up @@ -299,13 +297,10 @@ def to_series(right):

if not flex:
if not left.axes[axis].equals(right.index):
warnings.warn(
"Automatic reindexing on DataFrame vs Series comparisons "
"is deprecated and will raise ValueError in a future version. "
"Do `left, right = left.align(right, axis=1, copy=False)` "
"before e.g. `left == right`",
FutureWarning,
stacklevel=find_stack_level(),
raise ValueError(
"Operands are not aligned. Do "
"`left, right = left.align(right, axis=1, copy=False)` "
"before operating."
)

left, right = left.align(
Expand Down
45 changes: 31 additions & 14 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,43 +307,60 @@ def test_timestamp_compare_series(self, left, right):

def test_dt64arr_timestamp_equality(self, box_with_array):
# GH#11034
box = box_with_array

ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), NaT])
ser = tm.box_expected(ser, box_with_array)
ser = tm.box_expected(ser, box)
Comment on lines +310 to +313
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for my understanding, is this purely cosmetic? (no objections anyway)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it’s a pattern we use elsewhere

xbox = get_upcast_box(ser, ser, True)

result = ser != ser
expected = tm.box_expected([False, False, True], xbox)
tm.assert_equal(result, expected)

warn = FutureWarning if box_with_array is pd.DataFrame else None
with tm.assert_produces_warning(warn):
if box is pd.DataFrame:
# alignment for frame vs series comparisons deprecated
# in GH#46795 enforced 2.0
with pytest.raises(ValueError, match="not aligned"):
ser != ser[0]

else:
result = ser != ser[0]
expected = tm.box_expected([False, True, True], xbox)
tm.assert_equal(result, expected)
expected = tm.box_expected([False, True, True], xbox)
tm.assert_equal(result, expected)

with tm.assert_produces_warning(warn):
if box is pd.DataFrame:
# alignment for frame vs series comparisons deprecated
# in GH#46795 enforced 2.0
with pytest.raises(ValueError, match="not aligned"):
ser != ser[2]
else:
result = ser != ser[2]
expected = tm.box_expected([True, True, True], xbox)
tm.assert_equal(result, expected)
expected = tm.box_expected([True, True, True], xbox)
tm.assert_equal(result, expected)

result = ser == ser
expected = tm.box_expected([True, True, False], xbox)
tm.assert_equal(result, expected)

with tm.assert_produces_warning(warn):
if box is pd.DataFrame:
# alignment for frame vs series comparisons deprecated
# in GH#46795 enforced 2.0
with pytest.raises(ValueError, match="not aligned"):
ser == ser[0]
else:
result = ser == ser[0]
expected = tm.box_expected([True, False, False], xbox)
tm.assert_equal(result, expected)
expected = tm.box_expected([True, False, False], xbox)
tm.assert_equal(result, expected)

with tm.assert_produces_warning(warn):
if box is pd.DataFrame:
# alignment for frame vs series comparisons deprecated
# in GH#46795 enforced 2.0
with pytest.raises(ValueError, match="not aligned"):
ser == ser[2]
else:
result = ser == ser[2]
expected = tm.box_expected([False, False, False], xbox)
tm.assert_equal(result, expected)
expected = tm.box_expected([False, False, False], xbox)
tm.assert_equal(result, expected)

@pytest.mark.parametrize(
"datetimelike",
Expand Down
16 changes: 6 additions & 10 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,19 +1164,15 @@ def test_frame_with_zero_len_series_corner_cases():
expected = DataFrame(df.values * np.nan, columns=df.columns)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
# Automatic alignment for comparisons deprecated
result = df == ser
expected = DataFrame(False, index=df.index, columns=df.columns)
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError, match="not aligned"):
# Automatic alignment for comparisons deprecated GH#36795, enforced 2.0
df == ser

# non-float case should not raise on comparison
# non-float case should not raise TypeError on comparison
df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns)
with tm.assert_produces_warning(FutureWarning):
with pytest.raises(ValueError, match="not aligned"):
# Automatic alignment for comparisons deprecated
result = df2 == ser
expected = DataFrame(False, index=df.index, columns=df.columns)
tm.assert_frame_equal(result, expected)
df2 == ser


def test_zero_len_frame_with_series_corner_cases():
Expand Down
17 changes: 14 additions & 3 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,6 @@ def test_finalize_called_eval_numexpr():
# Binary operations


@pytest.mark.filterwarnings(
"ignore:Automatic reindexing on DataFrame vs Series:FutureWarning"
)
@pytest.mark.parametrize("annotate", ["left", "right", "both"])
@pytest.mark.parametrize(
"args",
Expand All @@ -504,6 +501,20 @@ def test_binops(request, args, annotate, all_binary_operators):
if annotate in {"left", "both"} and not isinstance(right, int):
right.attrs = {"a": 1}

is_cmp = all_binary_operators in [
operator.eq,
operator.ne,
operator.gt,
operator.ge,
operator.lt,
operator.le,
]
if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
# in 2.0 silent alignment on comparisons was removed xref GH#28759
left, right = left.align(right, axis=1, copy=False)
elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
right, left = right.align(left, axis=1, copy=False)

result = all_binary_operators(left, right)
assert result.attrs == {"a": 1}

Expand Down