From 45cc092d9c2a487a1bfbe112907575053f88c8fa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 27 Dec 2022 18:19:18 -0800 Subject: [PATCH 1/2] DEPR: Enforce alignment with numpy ufuncs --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arraylike.py | 82 --------------------------- pandas/tests/frame/test_ufunc.py | 97 ++++++++++++++++---------------- 3 files changed, 48 insertions(+), 132 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9957ccb4fde50..f043b0c7c4751 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -525,6 +525,7 @@ Removal of prior version deprecations/changes - Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - Removed deprecated :meth:`Index.asi8` (:issue:`37877`) - Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`) +- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`) - Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`) - Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`) - Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 15bb2f59fcccf..4bed021cabea1 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -8,13 +8,11 @@ import operator from typing import Any -import warnings import numpy as np from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.generic import ABCNDFrame @@ -166,81 +164,6 @@ def __rpow__(self, other): # Helpers to implement __array_ufunc__ -def _is_aligned(frame, other): - """ - Helper to check if a DataFrame is aligned with another DataFrame or Series. - """ - from pandas import DataFrame - - if isinstance(other, DataFrame): - return frame._indexed_same(other) - else: - # Series -> match index - return frame.columns.equals(other.index) - - -def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): - """ - In the future DataFrame, inputs to ufuncs will be aligned before applying - the ufunc, but for now we ignore the index but raise a warning if behaviour - would change in the future. - This helper detects the case where a warning is needed and then fallbacks - to applying the ufunc on arrays to avoid alignment. - - See https://github.com/pandas-dev/pandas/pull/39239 - """ - from pandas import DataFrame - from pandas.core.generic import NDFrame - - n_alignable = sum(isinstance(x, NDFrame) for x in inputs) - n_frames = sum(isinstance(x, DataFrame) for x in inputs) - - if n_alignable >= 2 and n_frames >= 1: - # if there are 2 alignable inputs (Series or DataFrame), of which at least 1 - # is a DataFrame -> we would have had no alignment before -> warn that this - # will align in the future - - # the first frame is what determines the output index/columns in pandas < 1.2 - first_frame = next(x for x in inputs if isinstance(x, DataFrame)) - - # check if the objects are aligned or not - non_aligned = sum( - not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame) - ) - - # if at least one is not aligned -> warn and fallback to array behaviour - if non_aligned: - warnings.warn( - "Calling a ufunc on non-aligned DataFrames (or DataFrame/Series " - "combination). Currently, the indices are ignored and the result " - "takes the index/columns of the first DataFrame. In the future , " - "the DataFrames/Series will be aligned before applying the ufunc.\n" - "Convert one of the arguments to a NumPy array " - "(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, " - "or align manually (eg 'df1, df2 = df1.align(df2)') before passing to " - "the ufunc to obtain the future behaviour and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - # keep the first dataframe of the inputs, other DataFrame/Series is - # converted to array for fallback behaviour - new_inputs = [] - for x in inputs: - if x is first_frame: - new_inputs.append(x) - elif isinstance(x, NDFrame): - new_inputs.append(np.asarray(x)) - else: - new_inputs.append(x) - - # call the ufunc on those transformed inputs - return getattr(ufunc, method)(*new_inputs, **kwargs) - - # signal that we didn't fallback / execute the ufunc yet - return NotImplemented - - def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): """ Compatibility with numpy ufuncs. @@ -260,11 +183,6 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) kwargs = _standardize_out_kwarg(**kwargs) - # for backwards compatibility check and potentially fallback for non-aligned frames - result = _maybe_fallback(ufunc, method, *inputs, **kwargs) - if result is not NotImplemented: - return result - # for binary ops, use our custom dunder methods result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 611e0eeb3e5f0..9223c4364579e 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -118,21 +118,18 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b): if isinstance(dtype_a, dict) and isinstance(dtype_b, dict): dtype_b["C"] = dtype_b.pop("B") - df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b) - with tm.assert_produces_warning(FutureWarning): - result = np.heaviside(df1, df2) - # Expected future behaviour: - # expected = np.heaviside( - # np.array([[1, 3, np.nan], [2, 4, np.nan]]), - # np.array([[1, np.nan, 3], [2, np.nan, 4]]), - # ) - # expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) - expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"]) + # As of 2.0, align first before applying the ufunc + result = np.heaviside(df1, df2) + expected = np.heaviside( + np.array([[1, 3, np.nan], [2, 4, np.nan]]), + np.array([[1, np.nan, 3], [2, np.nan, 4]]), + ) + expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"]) tm.assert_frame_equal(result, expected) - # ensure the expected is the same when applying with numpy array result = np.heaviside(df1, df2.values) + expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"]) tm.assert_frame_equal(result, expected) @@ -146,35 +143,29 @@ def test_binary_input_aligns_index(request, dtype): ) df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype) df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype) - with tm.assert_produces_warning(FutureWarning): - result = np.heaviside(df1, df2) - # Expected future behaviour: - # expected = np.heaviside( - # np.array([[1, 3], [3, 4], [np.nan, np.nan]]), - # np.array([[1, 3], [np.nan, np.nan], [3, 4]]), - # ) - # # TODO(FloatArray): this will be Float64Dtype. - # expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) - expected = pd.DataFrame( - [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"] + result = np.heaviside(df1, df2) + expected = np.heaviside( + np.array([[1, 3], [3, 4], [np.nan, np.nan]]), + np.array([[1, 3], [np.nan, np.nan], [3, 4]]), ) + # TODO(FloatArray): this will be Float64Dtype. + expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"]) tm.assert_frame_equal(result, expected) - # ensure the expected is the same when applying with numpy array result = np.heaviside(df1, df2.values) + expected = pd.DataFrame( + [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"] + ) tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning") def test_binary_frame_series_raises(): # We don't currently implement df = pd.DataFrame({"A": [1, 2]}) - # with pytest.raises(NotImplementedError, match="logaddexp"): - with pytest.raises(ValueError, match=""): + with pytest.raises(NotImplementedError, match="logaddexp"): np.logaddexp(df, df["A"]) - # with pytest.raises(NotImplementedError, match="logaddexp"): - with pytest.raises(ValueError, match=""): + with pytest.raises(NotImplementedError, match="logaddexp"): np.logaddexp(df["A"], df) @@ -206,7 +197,8 @@ def test_frame_outer_disallowed(): np.subtract.outer(df, df) -def test_alignment_deprecation(): +def test_alignment_deprecation_enforced(): + # Enforced in 2.0 # https://github.com/pandas-dev/pandas/issues/39184 df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) @@ -221,12 +213,11 @@ def test_alignment_deprecation(): result = np.add(df1, df1) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - # non-aligned -> warns - result = np.add(df1, df2) + result = np.add(df1, df2.values) tm.assert_frame_equal(result, expected) - result = np.add(df1, df2.values) + result = np.add(df1, df2) + expected = pd.DataFrame({"a": [np.nan] * 3, "b": [5, 7, 9], "c": [np.nan] * 3}) tm.assert_frame_equal(result, expected) result = np.add(df1.values, df2) @@ -241,20 +232,23 @@ def test_alignment_deprecation(): result = np.add(df1, s1) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = np.add(df1, s2) + result = np.add(df1, s2.values) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = np.add(s2, df1) + expected = pd.DataFrame( + {"a": [np.nan] * 3, "b": [5.0, 6.0, 7.0], "c": [np.nan] * 3} + ) + result = np.add(df1, s2) tm.assert_frame_equal(result, expected) - result = np.add(df1, s2.values) - tm.assert_frame_equal(result, expected) + msg = "Cannot apply ufunc to mixed DataFrame and Series inputs." + with pytest.raises(NotImplementedError, match=msg): + np.add(s2, df1) @td.skip_if_no("numba") -def test_alignment_deprecation_many_inputs(request): +def test_alignment_deprecation_many_inputs_enforced(): + # Enforced in 2.0 # https://github.com/pandas-dev/pandas/issues/39184 # test that the deprecation also works with > 2 inputs -> using a numba # written ufunc for this because numpy itself doesn't have such ufuncs @@ -271,20 +265,22 @@ def my_ufunc(x, y, z): df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]}) - with tm.assert_produces_warning(FutureWarning): - result = my_ufunc(df1, df2, df3) - expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"]) + result = my_ufunc(df1, df2, df3) + expected = pd.DataFrame(np.full((3, 3), np.nan), columns=["a", "b", "c"]) tm.assert_frame_equal(result, expected) # all aligned -> no warning with tm.assert_produces_warning(None): result = my_ufunc(df1, df1, df1) + expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) # mixed frame / arrays - with tm.assert_produces_warning(FutureWarning): - result = my_ufunc(df1, df2, df3.values) - tm.assert_frame_equal(result, expected) + msg = ( + r"operands could not be broadcast together with shapes \(3,3\) \(3,3\) \(3,2\)" + ) + with pytest.raises(ValueError, match=msg): + my_ufunc(df1, df2, df3.values) # single frame -> no warning with tm.assert_produces_warning(None): @@ -292,10 +288,11 @@ def my_ufunc(x, y, z): tm.assert_frame_equal(result, expected) # takes indices of first frame - with tm.assert_produces_warning(FutureWarning): - result = my_ufunc(df1.values, df2, df3) - expected = expected.set_axis(["b", "c"], axis=1) - tm.assert_frame_equal(result, expected) + msg = ( + r"operands could not be broadcast together with shapes \(3,2\) \(3,3\) \(3,3\)" + ) + with pytest.raises(ValueError, match=msg): + my_ufunc(df1.values, df2, df3) def test_array_ufuncs_for_many_arguments(): From 8245bfe81a17fca388bd450df56f2509dcb547d6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Dec 2022 11:02:14 -0800 Subject: [PATCH 2/2] fix other test --- pandas/tests/series/test_ufunc.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index dae06a58f0e49..b3f1a1be903e5 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -426,14 +426,10 @@ def test_np_matmul(): # GH26650 df1 = pd.DataFrame(data=[[-1, 1, 10]]) df2 = pd.DataFrame(data=[-1, 1, 10]) - expected_result = pd.DataFrame(data=[102]) + expected = pd.DataFrame(data=[102]) - with tm.assert_produces_warning(FutureWarning, match="on non-aligned"): - result = np.matmul(df1, df2) - tm.assert_frame_equal( - expected_result, - result, - ) + result = np.matmul(df1, df2) + tm.assert_frame_equal(expected, result) def test_array_ufuncs_for_many_arguments():