Skip to content

Commit 23c3676

Browse files
authored
DEPR: Enforce alignment with numpy ufuncs (pandas-dev#50455)
* DEPR: Enforce alignment with numpy ufuncs * fix other test
1 parent ddde1dd commit 23c3676

File tree

4 files changed

+51
-139
lines changed

4 files changed

+51
-139
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ Removal of prior version deprecations/changes
527527
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
528528
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
529529
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
530+
- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`)
530531
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
531532
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
532533
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)

pandas/core/arraylike.py

-82
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@
88

99
import operator
1010
from typing import Any
11-
import warnings
1211

1312
import numpy as np
1413

1514
from pandas._libs import lib
1615
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
17-
from pandas.util._exceptions import find_stack_level
1816

1917
from pandas.core.dtypes.generic import ABCNDFrame
2018

@@ -166,81 +164,6 @@ def __rpow__(self, other):
166164
# Helpers to implement __array_ufunc__
167165

168166

169-
def _is_aligned(frame, other):
170-
"""
171-
Helper to check if a DataFrame is aligned with another DataFrame or Series.
172-
"""
173-
from pandas import DataFrame
174-
175-
if isinstance(other, DataFrame):
176-
return frame._indexed_same(other)
177-
else:
178-
# Series -> match index
179-
return frame.columns.equals(other.index)
180-
181-
182-
def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
183-
"""
184-
In the future DataFrame, inputs to ufuncs will be aligned before applying
185-
the ufunc, but for now we ignore the index but raise a warning if behaviour
186-
would change in the future.
187-
This helper detects the case where a warning is needed and then fallbacks
188-
to applying the ufunc on arrays to avoid alignment.
189-
190-
See https://github.com/pandas-dev/pandas/pull/39239
191-
"""
192-
from pandas import DataFrame
193-
from pandas.core.generic import NDFrame
194-
195-
n_alignable = sum(isinstance(x, NDFrame) for x in inputs)
196-
n_frames = sum(isinstance(x, DataFrame) for x in inputs)
197-
198-
if n_alignable >= 2 and n_frames >= 1:
199-
# if there are 2 alignable inputs (Series or DataFrame), of which at least 1
200-
# is a DataFrame -> we would have had no alignment before -> warn that this
201-
# will align in the future
202-
203-
# the first frame is what determines the output index/columns in pandas < 1.2
204-
first_frame = next(x for x in inputs if isinstance(x, DataFrame))
205-
206-
# check if the objects are aligned or not
207-
non_aligned = sum(
208-
not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame)
209-
)
210-
211-
# if at least one is not aligned -> warn and fallback to array behaviour
212-
if non_aligned:
213-
warnings.warn(
214-
"Calling a ufunc on non-aligned DataFrames (or DataFrame/Series "
215-
"combination). Currently, the indices are ignored and the result "
216-
"takes the index/columns of the first DataFrame. In the future , "
217-
"the DataFrames/Series will be aligned before applying the ufunc.\n"
218-
"Convert one of the arguments to a NumPy array "
219-
"(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, "
220-
"or align manually (eg 'df1, df2 = df1.align(df2)') before passing to "
221-
"the ufunc to obtain the future behaviour and silence this warning.",
222-
FutureWarning,
223-
stacklevel=find_stack_level(),
224-
)
225-
226-
# keep the first dataframe of the inputs, other DataFrame/Series is
227-
# converted to array for fallback behaviour
228-
new_inputs = []
229-
for x in inputs:
230-
if x is first_frame:
231-
new_inputs.append(x)
232-
elif isinstance(x, NDFrame):
233-
new_inputs.append(np.asarray(x))
234-
else:
235-
new_inputs.append(x)
236-
237-
# call the ufunc on those transformed inputs
238-
return getattr(ufunc, method)(*new_inputs, **kwargs)
239-
240-
# signal that we didn't fallback / execute the ufunc yet
241-
return NotImplemented
242-
243-
244167
def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
245168
"""
246169
Compatibility with numpy ufuncs.
@@ -260,11 +183,6 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
260183

261184
kwargs = _standardize_out_kwarg(**kwargs)
262185

263-
# for backwards compatibility check and potentially fallback for non-aligned frames
264-
result = _maybe_fallback(ufunc, method, *inputs, **kwargs)
265-
if result is not NotImplemented:
266-
return result
267-
268186
# for binary ops, use our custom dunder methods
269187
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
270188
if result is not NotImplemented:

pandas/tests/frame/test_ufunc.py

+47-50
Original file line numberDiff line numberDiff line change
@@ -118,21 +118,18 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):
118118

119119
if isinstance(dtype_a, dict) and isinstance(dtype_b, dict):
120120
dtype_b["C"] = dtype_b.pop("B")
121-
122121
df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b)
123-
with tm.assert_produces_warning(FutureWarning):
124-
result = np.heaviside(df1, df2)
125-
# Expected future behaviour:
126-
# expected = np.heaviside(
127-
# np.array([[1, 3, np.nan], [2, 4, np.nan]]),
128-
# np.array([[1, np.nan, 3], [2, np.nan, 4]]),
129-
# )
130-
# expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
131-
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
122+
# As of 2.0, align first before applying the ufunc
123+
result = np.heaviside(df1, df2)
124+
expected = np.heaviside(
125+
np.array([[1, 3, np.nan], [2, 4, np.nan]]),
126+
np.array([[1, np.nan, 3], [2, np.nan, 4]]),
127+
)
128+
expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
132129
tm.assert_frame_equal(result, expected)
133130

134-
# ensure the expected is the same when applying with numpy array
135131
result = np.heaviside(df1, df2.values)
132+
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
136133
tm.assert_frame_equal(result, expected)
137134

138135

@@ -146,35 +143,29 @@ def test_binary_input_aligns_index(request, dtype):
146143
)
147144
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype)
148145
df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype)
149-
with tm.assert_produces_warning(FutureWarning):
150-
result = np.heaviside(df1, df2)
151-
# Expected future behaviour:
152-
# expected = np.heaviside(
153-
# np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
154-
# np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
155-
# )
156-
# # TODO(FloatArray): this will be Float64Dtype.
157-
# expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
158-
expected = pd.DataFrame(
159-
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
146+
result = np.heaviside(df1, df2)
147+
expected = np.heaviside(
148+
np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
149+
np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
160150
)
151+
# TODO(FloatArray): this will be Float64Dtype.
152+
expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
161153
tm.assert_frame_equal(result, expected)
162154

163-
# ensure the expected is the same when applying with numpy array
164155
result = np.heaviside(df1, df2.values)
156+
expected = pd.DataFrame(
157+
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
158+
)
165159
tm.assert_frame_equal(result, expected)
166160

167161

168-
@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning")
169162
def test_binary_frame_series_raises():
170163
# We don't currently implement
171164
df = pd.DataFrame({"A": [1, 2]})
172-
# with pytest.raises(NotImplementedError, match="logaddexp"):
173-
with pytest.raises(ValueError, match=""):
165+
with pytest.raises(NotImplementedError, match="logaddexp"):
174166
np.logaddexp(df, df["A"])
175167

176-
# with pytest.raises(NotImplementedError, match="logaddexp"):
177-
with pytest.raises(ValueError, match=""):
168+
with pytest.raises(NotImplementedError, match="logaddexp"):
178169
np.logaddexp(df["A"], df)
179170

180171

@@ -206,7 +197,8 @@ def test_frame_outer_disallowed():
206197
np.subtract.outer(df, df)
207198

208199

209-
def test_alignment_deprecation():
200+
def test_alignment_deprecation_enforced():
201+
# Enforced in 2.0
210202
# https://github.com/pandas-dev/pandas/issues/39184
211203
df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
212204
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
@@ -221,12 +213,11 @@ def test_alignment_deprecation():
221213
result = np.add(df1, df1)
222214
tm.assert_frame_equal(result, expected)
223215

224-
with tm.assert_produces_warning(FutureWarning):
225-
# non-aligned -> warns
226-
result = np.add(df1, df2)
216+
result = np.add(df1, df2.values)
227217
tm.assert_frame_equal(result, expected)
228218

229-
result = np.add(df1, df2.values)
219+
result = np.add(df1, df2)
220+
expected = pd.DataFrame({"a": [np.nan] * 3, "b": [5, 7, 9], "c": [np.nan] * 3})
230221
tm.assert_frame_equal(result, expected)
231222

232223
result = np.add(df1.values, df2)
@@ -241,20 +232,23 @@ def test_alignment_deprecation():
241232
result = np.add(df1, s1)
242233
tm.assert_frame_equal(result, expected)
243234

244-
with tm.assert_produces_warning(FutureWarning):
245-
result = np.add(df1, s2)
235+
result = np.add(df1, s2.values)
246236
tm.assert_frame_equal(result, expected)
247237

248-
with tm.assert_produces_warning(FutureWarning):
249-
result = np.add(s2, df1)
238+
expected = pd.DataFrame(
239+
{"a": [np.nan] * 3, "b": [5.0, 6.0, 7.0], "c": [np.nan] * 3}
240+
)
241+
result = np.add(df1, s2)
250242
tm.assert_frame_equal(result, expected)
251243

252-
result = np.add(df1, s2.values)
253-
tm.assert_frame_equal(result, expected)
244+
msg = "Cannot apply ufunc <ufunc 'add'> to mixed DataFrame and Series inputs."
245+
with pytest.raises(NotImplementedError, match=msg):
246+
np.add(s2, df1)
254247

255248

256249
@td.skip_if_no("numba")
257-
def test_alignment_deprecation_many_inputs(request):
250+
def test_alignment_deprecation_many_inputs_enforced():
251+
# Enforced in 2.0
258252
# https://github.com/pandas-dev/pandas/issues/39184
259253
# test that the deprecation also works with > 2 inputs -> using a numba
260254
# written ufunc for this because numpy itself doesn't have such ufuncs
@@ -271,31 +265,34 @@ def my_ufunc(x, y, z):
271265
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
272266
df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})
273267

274-
with tm.assert_produces_warning(FutureWarning):
275-
result = my_ufunc(df1, df2, df3)
276-
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
268+
result = my_ufunc(df1, df2, df3)
269+
expected = pd.DataFrame(np.full((3, 3), np.nan), columns=["a", "b", "c"])
277270
tm.assert_frame_equal(result, expected)
278271

279272
# all aligned -> no warning
280273
with tm.assert_produces_warning(None):
281274
result = my_ufunc(df1, df1, df1)
275+
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
282276
tm.assert_frame_equal(result, expected)
283277

284278
# mixed frame / arrays
285-
with tm.assert_produces_warning(FutureWarning):
286-
result = my_ufunc(df1, df2, df3.values)
287-
tm.assert_frame_equal(result, expected)
279+
msg = (
280+
r"operands could not be broadcast together with shapes \(3,3\) \(3,3\) \(3,2\)"
281+
)
282+
with pytest.raises(ValueError, match=msg):
283+
my_ufunc(df1, df2, df3.values)
288284

289285
# single frame -> no warning
290286
with tm.assert_produces_warning(None):
291287
result = my_ufunc(df1, df2.values, df3.values)
292288
tm.assert_frame_equal(result, expected)
293289

294290
# takes indices of first frame
295-
with tm.assert_produces_warning(FutureWarning):
296-
result = my_ufunc(df1.values, df2, df3)
297-
expected = expected.set_axis(["b", "c"], axis=1)
298-
tm.assert_frame_equal(result, expected)
291+
msg = (
292+
r"operands could not be broadcast together with shapes \(3,2\) \(3,3\) \(3,3\)"
293+
)
294+
with pytest.raises(ValueError, match=msg):
295+
my_ufunc(df1.values, df2, df3)
299296

300297

301298
def test_array_ufuncs_for_many_arguments():

pandas/tests/series/test_ufunc.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -426,14 +426,10 @@ def test_np_matmul():
426426
# GH26650
427427
df1 = pd.DataFrame(data=[[-1, 1, 10]])
428428
df2 = pd.DataFrame(data=[-1, 1, 10])
429-
expected_result = pd.DataFrame(data=[102])
429+
expected = pd.DataFrame(data=[102])
430430

431-
with tm.assert_produces_warning(FutureWarning, match="on non-aligned"):
432-
result = np.matmul(df1, df2)
433-
tm.assert_frame_equal(
434-
expected_result,
435-
result,
436-
)
431+
result = np.matmul(df1, df2)
432+
tm.assert_frame_equal(expected, result)
437433

438434

439435
def test_array_ufuncs_for_many_arguments():

0 commit comments

Comments
 (0)