Skip to content

Commit fc91090

Browse files
authored
DEPR: enforce not-automatically aligning in DataFrame comparisons (#49946)
* DEPR: enforce not-automatically aligning in DataFrame comparisons * fix GH ref * troubleshoot asv
1 parent 176621a commit fc91090

File tree

6 files changed

+60
-36
lines changed

6 files changed

+60
-36
lines changed

asv_bench/benchmarks/arithmetic.py

+4
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ def time_frame_op_with_series_axis0(self, opname):
106106
def time_frame_op_with_series_axis1(self, opname):
107107
getattr(operator, opname)(self.df, self.ser)
108108

109+
# exclude comparisons from the params for time_frame_op_with_series_axis1
110+
# since they do not do alignment so raise
111+
time_frame_op_with_series_axis1.params = [params[0][6:]]
112+
109113

110114
class FrameWithFrameWide:
111115
# Many-columns, mixed dtypes

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ Removal of prior version deprecations/changes
570570
- Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`)
571571
- Enforced deprecation disallowing passing ``numeric_only=True`` to :class:`Series` reductions (``rank``, ``any``, ``all``, ...) with non-numeric dtype (:issue:`47500`)
572572
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
573+
- Comparisons between a :class:`DataFrame` and a :class:`Series` where the frame's columns do not match the series's index raise ``ValueError`` instead of automatically aligning, do ``left, right = left.align(right, axis=1, copy=False)`` before comparing (:issue:`36795`)
573574
- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
574575
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
575576
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)

pandas/core/ops/__init__.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import operator
99
from typing import TYPE_CHECKING
10-
import warnings
1110

1211
import numpy as np
1312

@@ -18,7 +17,6 @@
1817
Level,
1918
)
2019
from pandas.util._decorators import Appender
21-
from pandas.util._exceptions import find_stack_level
2220

2321
from pandas.core.dtypes.common import (
2422
is_array_like,
@@ -299,13 +297,10 @@ def to_series(right):
299297

300298
if not flex:
301299
if not left.axes[axis].equals(right.index):
302-
warnings.warn(
303-
"Automatic reindexing on DataFrame vs Series comparisons "
304-
"is deprecated and will raise ValueError in a future version. "
305-
"Do `left, right = left.align(right, axis=1, copy=False)` "
306-
"before e.g. `left == right`",
307-
FutureWarning,
308-
stacklevel=find_stack_level(),
300+
raise ValueError(
301+
"Operands are not aligned. Do "
302+
"`left, right = left.align(right, axis=1, copy=False)` "
303+
"before operating."
309304
)
310305

311306
left, right = left.align(

pandas/tests/arithmetic/test_datetime64.py

+31-14
Original file line numberDiff line numberDiff line change
@@ -307,43 +307,60 @@ def test_timestamp_compare_series(self, left, right):
307307

308308
def test_dt64arr_timestamp_equality(self, box_with_array):
309309
# GH#11034
310+
box = box_with_array
310311

311312
ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), NaT])
312-
ser = tm.box_expected(ser, box_with_array)
313+
ser = tm.box_expected(ser, box)
313314
xbox = get_upcast_box(ser, ser, True)
314315

315316
result = ser != ser
316317
expected = tm.box_expected([False, False, True], xbox)
317318
tm.assert_equal(result, expected)
318319

319-
warn = FutureWarning if box_with_array is pd.DataFrame else None
320-
with tm.assert_produces_warning(warn):
320+
if box is pd.DataFrame:
321321
# alignment for frame vs series comparisons deprecated
322+
# in GH#46795 enforced 2.0
323+
with pytest.raises(ValueError, match="not aligned"):
324+
ser != ser[0]
325+
326+
else:
322327
result = ser != ser[0]
323-
expected = tm.box_expected([False, True, True], xbox)
324-
tm.assert_equal(result, expected)
328+
expected = tm.box_expected([False, True, True], xbox)
329+
tm.assert_equal(result, expected)
325330

326-
with tm.assert_produces_warning(warn):
331+
if box is pd.DataFrame:
327332
# alignment for frame vs series comparisons deprecated
333+
# in GH#46795 enforced 2.0
334+
with pytest.raises(ValueError, match="not aligned"):
335+
ser != ser[2]
336+
else:
328337
result = ser != ser[2]
329-
expected = tm.box_expected([True, True, True], xbox)
330-
tm.assert_equal(result, expected)
338+
expected = tm.box_expected([True, True, True], xbox)
339+
tm.assert_equal(result, expected)
331340

332341
result = ser == ser
333342
expected = tm.box_expected([True, True, False], xbox)
334343
tm.assert_equal(result, expected)
335344

336-
with tm.assert_produces_warning(warn):
345+
if box is pd.DataFrame:
337346
# alignment for frame vs series comparisons deprecated
347+
# in GH#46795 enforced 2.0
348+
with pytest.raises(ValueError, match="not aligned"):
349+
ser == ser[0]
350+
else:
338351
result = ser == ser[0]
339-
expected = tm.box_expected([True, False, False], xbox)
340-
tm.assert_equal(result, expected)
352+
expected = tm.box_expected([True, False, False], xbox)
353+
tm.assert_equal(result, expected)
341354

342-
with tm.assert_produces_warning(warn):
355+
if box is pd.DataFrame:
343356
# alignment for frame vs series comparisons deprecated
357+
# in GH#46795 enforced 2.0
358+
with pytest.raises(ValueError, match="not aligned"):
359+
ser == ser[2]
360+
else:
344361
result = ser == ser[2]
345-
expected = tm.box_expected([False, False, False], xbox)
346-
tm.assert_equal(result, expected)
362+
expected = tm.box_expected([False, False, False], xbox)
363+
tm.assert_equal(result, expected)
347364

348365
@pytest.mark.parametrize(
349366
"datetimelike",

pandas/tests/frame/test_arithmetic.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -1164,19 +1164,15 @@ def test_frame_with_zero_len_series_corner_cases():
11641164
expected = DataFrame(df.values * np.nan, columns=df.columns)
11651165
tm.assert_frame_equal(result, expected)
11661166

1167-
with tm.assert_produces_warning(FutureWarning):
1168-
# Automatic alignment for comparisons deprecated
1169-
result = df == ser
1170-
expected = DataFrame(False, index=df.index, columns=df.columns)
1171-
tm.assert_frame_equal(result, expected)
1167+
with pytest.raises(ValueError, match="not aligned"):
1168+
# Automatic alignment for comparisons deprecated GH#36795, enforced 2.0
1169+
df == ser
11721170

1173-
# non-float case should not raise on comparison
1171+
# non-float case should not raise TypeError on comparison
11741172
df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns)
1175-
with tm.assert_produces_warning(FutureWarning):
1173+
with pytest.raises(ValueError, match="not aligned"):
11761174
# Automatic alignment for comparisons deprecated
1177-
result = df2 == ser
1178-
expected = DataFrame(False, index=df.index, columns=df.columns)
1179-
tm.assert_frame_equal(result, expected)
1175+
df2 == ser
11801176

11811177

11821178
def test_zero_len_frame_with_series_corner_cases():

pandas/tests/generic/test_finalize.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,6 @@ def test_finalize_called_eval_numexpr():
476476
# Binary operations
477477

478478

479-
@pytest.mark.filterwarnings(
480-
"ignore:Automatic reindexing on DataFrame vs Series:FutureWarning"
481-
)
482479
@pytest.mark.parametrize("annotate", ["left", "right", "both"])
483480
@pytest.mark.parametrize(
484481
"args",
@@ -504,6 +501,20 @@ def test_binops(request, args, annotate, all_binary_operators):
504501
if annotate in {"left", "both"} and not isinstance(right, int):
505502
right.attrs = {"a": 1}
506503

504+
is_cmp = all_binary_operators in [
505+
operator.eq,
506+
operator.ne,
507+
operator.gt,
508+
operator.ge,
509+
operator.lt,
510+
operator.le,
511+
]
512+
if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
513+
# in 2.0 silent alignment on comparisons was removed xref GH#28759
514+
left, right = left.align(right, axis=1, copy=False)
515+
elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
516+
right, left = right.align(left, axis=1, copy=False)
517+
507518
result = all_binary_operators(left, right)
508519
assert result.attrs == {"a": 1}
509520

0 commit comments

Comments
 (0)