Skip to content

Commit ce123cd

Browse files
authored
REGR: NumPy func warning when dropping nuisance in agg, apply, transform (#50627)
REGR: Warnings for NumPy funcs when dropping nuisance in agg, apply, transform
1 parent 54b4037 commit ce123cd

File tree

10 files changed

+163
-13
lines changed

10 files changed

+163
-13
lines changed

doc/source/whatsnew/v1.5.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
2020
- Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
2121
- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`)
22+
- Fixed regression in the methods ``apply``, ``agg``, and ``transform`` when used with NumPy functions that informed users to supply ``numeric_only=True`` if the operation failed on non-numeric dtypes; such columns must be dropped prior to using these methods (:issue:`50538`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

pandas/core/apply.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@
3939
SpecificationError,
4040
)
4141
from pandas.util._decorators import cache_readonly
42-
from pandas.util._exceptions import find_stack_level
42+
from pandas.util._exceptions import (
43+
find_stack_level,
44+
rewrite_warning,
45+
)
4346

4447
from pandas.core.dtypes.cast import is_nested_object
4548
from pandas.core.dtypes.common import (
@@ -174,7 +177,15 @@ def agg(self) -> DataFrame | Series | None:
174177
if callable(arg):
175178
f = com.get_cython_func(arg)
176179
if f and not args and not kwargs:
177-
return getattr(obj, f)()
180+
# GH#50538
181+
old_msg = "The default value of numeric_only"
182+
new_msg = (
183+
f"The operation {arg} failed on a column. If any error is "
184+
f"raised, this will raise an exception in a future version "
185+
f"of pandas. Drop these columns to avoid this warning."
186+
)
187+
with rewrite_warning(old_msg, FutureWarning, new_msg):
188+
return getattr(obj, f)()
178189

179190
# caller can react
180191
return None
@@ -309,7 +320,14 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
309320
if not args and not kwargs:
310321
f = com.get_cython_func(func)
311322
if f:
312-
return getattr(obj, f)()
323+
old_msg = "The default value of numeric_only"
324+
new_msg = (
325+
f"The operation {func} failed on a column. If any error is "
326+
f"raised, this will raise an exception in a future version "
327+
f"of pandas. Drop these columns to avoid this warning."
328+
)
329+
with rewrite_warning(old_msg, FutureWarning, new_msg):
330+
return getattr(obj, f)()
313331

314332
# Two possible ways to use a UDF - apply or call directly
315333
try:

pandas/core/groupby/groupby.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ class providing the base-class of operations.
88
"""
99
from __future__ import annotations
1010

11-
from contextlib import contextmanager
11+
from contextlib import (
12+
contextmanager,
13+
nullcontext,
14+
)
1215
import datetime
1316
from functools import (
1417
partial,
@@ -64,7 +67,10 @@ class providing the base-class of operations.
6467
cache_readonly,
6568
doc,
6669
)
67-
from pandas.util._exceptions import find_stack_level
70+
from pandas.util._exceptions import (
71+
find_stack_level,
72+
rewrite_warning,
73+
)
6874

6975
from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
7076
from pandas.core.dtypes.common import (
@@ -1508,7 +1514,9 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
15081514
)
15091515
)
15101516
def apply(self, func, *args, **kwargs) -> NDFrameT:
1511-
1517+
# GH#50538
1518+
is_np_func = func in com._cython_table and func not in com._builtin_table
1519+
orig_func = func
15121520
func = com.is_builtin_func(func)
15131521

15141522
if isinstance(func, str):
@@ -1546,7 +1554,17 @@ def f(g):
15461554
# ignore SettingWithCopy here in case the user mutates
15471555
with option_context("mode.chained_assignment", None):
15481556
try:
1549-
result = self._python_apply_general(f, self._selected_obj)
1557+
# GH#50538
1558+
old_msg = "The default value of numeric_only"
1559+
new_msg = (
1560+
f"The operation {orig_func} failed on a column. If any error is "
1561+
f"raised, this will raise an exception in a future version "
1562+
f"of pandas. Drop these columns to avoid this warning."
1563+
)
1564+
with rewrite_warning(
1565+
old_msg, FutureWarning, new_msg
1566+
) if is_np_func else nullcontext():
1567+
result = self._python_apply_general(f, self._selected_obj)
15501568
except TypeError:
15511569
# gh-20949
15521570
# try again, with .apply acting as a filtering
@@ -1557,7 +1575,17 @@ def f(g):
15571575
# on a string grouper column
15581576

15591577
with self._group_selection_context():
1560-
return self._python_apply_general(f, self._selected_obj)
1578+
# GH#50538
1579+
old_msg = "The default value of numeric_only"
1580+
new_msg = (
1581+
f"The operation {orig_func} failed on a column. If any error "
1582+
f"is raised, this will raise an exception in a future version "
1583+
f"of pandas. Drop these columns to avoid this warning."
1584+
)
1585+
with rewrite_warning(
1586+
old_msg, FutureWarning, new_msg
1587+
) if is_np_func else nullcontext():
1588+
return self._python_apply_general(f, self._selected_obj)
15611589

15621590
return result
15631591

pandas/tests/apply/test_frame_apply.py

+21
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,27 @@ def test_nuiscance_columns():
12871287
tm.assert_frame_equal(result, expected)
12881288

12891289

1290+
@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
1291+
def test_numeric_only_warning_numpy(method):
1292+
# GH#50538
1293+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz")})
1294+
if method == "agg":
1295+
msg = "The operation <function mean.*failed"
1296+
with tm.assert_produces_warning(FutureWarning, match=msg):
1297+
getattr(df, method)(np.mean)
1298+
# Ensure users can't pass numeric_only
1299+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1300+
getattr(df, method)(np.mean, numeric_only=True)
1301+
elif method == "apply":
1302+
with pytest.raises(TypeError, match="Could not convert"):
1303+
getattr(df, method)(np.mean)
1304+
else:
1305+
with pytest.raises(ValueError, match="Function did not transform"):
1306+
msg = "The operation <function mean.*failed"
1307+
with tm.assert_produces_warning(FutureWarning, match=msg):
1308+
getattr(df, method)(np.mean)
1309+
1310+
12901311
@pytest.mark.parametrize("how", ["agg", "apply"])
12911312
def test_non_callable_aggregates(how):
12921313

pandas/tests/groupby/aggregate/test_aggregate.py

+12
Original file line numberDiff line numberDiff line change
@@ -1454,3 +1454,15 @@ def test_agg_of_mode_list(test, constant):
14541454
expected = expected.set_index(0)
14551455

14561456
tm.assert_frame_equal(result, expected)
1457+
1458+
1459+
def test_numeric_only_warning_numpy():
1460+
# GH#50538
1461+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1462+
gb = df.groupby("a")
1463+
msg = "The operation <function mean.*failed"
1464+
with tm.assert_produces_warning(FutureWarning, match=msg):
1465+
gb.agg(np.mean)
1466+
# Ensure users can't pass numeric_only
1467+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1468+
gb.agg(np.mean, numeric_only=True)

pandas/tests/groupby/test_apply.py

+13
Original file line numberDiff line numberDiff line change
@@ -1357,3 +1357,16 @@ def test_empty_df(method, op):
13571357
)
13581358

13591359
tm.assert_series_equal(result, expected)
1360+
1361+
1362+
def test_numeric_only_warning_numpy():
1363+
# GH#50538
1364+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1365+
gb = df.groupby("a")
1366+
msg = "The operation <function mean.*failed"
1367+
# Warning is raised from within NumPy
1368+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1369+
gb.apply(np.mean)
1370+
# Ensure users can't pass numeric_only
1371+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1372+
gb.apply(np.mean, numeric_only=True)

pandas/tests/groupby/test_groupby.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,14 @@ def test_frame_set_name_single(df):
486486
result = df.groupby("A", as_index=False).mean()
487487
assert result.index.name != "A"
488488

489+
# GH#50538
490+
msg = "The operation <function mean.*failed"
489491
with tm.assert_produces_warning(FutureWarning, match=msg):
490492
result = grouped.agg(np.mean)
491493
assert result.index.name == "A"
494+
# Ensure users can't pass numeric_only
495+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
496+
grouped.agg(np.mean, numeric_only=True)
492497

493498
result = grouped.agg({"C": np.mean, "D": np.std})
494499
assert result.index.name == "A"
@@ -766,19 +771,24 @@ def test_as_index_series_return_frame(df):
766771
grouped = df.groupby("A", as_index=False)
767772
grouped2 = df.groupby(["A", "B"], as_index=False)
768773

769-
msg = "The default value of numeric_only"
774+
# GH#50538
775+
msg = "The operation <function sum.*failed"
770776
with tm.assert_produces_warning(FutureWarning, match=msg):
771777
result = grouped["C"].agg(np.sum)
772778
expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
773779
assert isinstance(result, DataFrame)
774780
tm.assert_frame_equal(result, expected)
781+
# Ensure users can't pass numeric_only
782+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
783+
grouped.agg(np.mean, numeric_only=True)
775784

776785
result2 = grouped2["C"].agg(np.sum)
777786
expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]]
778787
assert isinstance(result2, DataFrame)
779788
tm.assert_frame_equal(result2, expected2)
780789

781790
result = grouped["C"].sum()
791+
msg = "The default value of numeric_only"
782792
with tm.assert_produces_warning(FutureWarning, match=msg):
783793
expected = grouped.sum().loc[:, ["A", "C"]]
784794
assert isinstance(result, DataFrame)
@@ -1021,10 +1031,14 @@ def test_wrap_aggregated_output_multindex(mframe):
10211031
df["baz", "two"] = "peekaboo"
10221032

10231033
keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
1024-
msg = "The default value of numeric_only"
1034+
# GH#50538
1035+
msg = "The operation <function mean.*failed"
10251036
with tm.assert_produces_warning(FutureWarning, match=msg):
10261037
agged = df.groupby(keys).agg(np.mean)
10271038
assert isinstance(agged.columns, MultiIndex)
1039+
# Ensure users can't pass numeric_only
1040+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
1041+
df.groupby(keys).agg(np.mean, numeric_only=True)
10281042

10291043
def aggfun(ser):
10301044
if ser.name == ("foo", "one"):

pandas/tests/groupby/transform/test_transform.py

+15
Original file line numberDiff line numberDiff line change
@@ -1563,3 +1563,18 @@ def test_as_index_no_change(keys, df, groupby_func):
15631563
result = gb_as_index_true.transform(groupby_func, *args)
15641564
expected = gb_as_index_false.transform(groupby_func, *args)
15651565
tm.assert_equal(result, expected)
1566+
1567+
1568+
@pytest.mark.parametrize("func", [np.mean, np.cumprod])
1569+
def test_numeric_only_warning_numpy(func):
1570+
# GH#50538
1571+
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
1572+
gb = df.groupby("a")
1573+
msg = "The default value of numeric_only"
1574+
with tm.assert_produces_warning(FutureWarning, match=msg):
1575+
gb.transform(func)
1576+
# Ensure users can pass numeric_only
1577+
result = gb.transform(func, numeric_only=True)
1578+
values = [3.5, 3.5, 5.0] if func == np.mean else [3, 12, 5]
1579+
expected = DataFrame({"c": values})
1580+
tm.assert_frame_equal(result, expected)

pandas/tests/resample/test_resample_api.py

+21
Original file line numberDiff line numberDiff line change
@@ -938,3 +938,24 @@ def test_series_downsample_method(method, numeric_only, expected_data):
938938
result = func(numeric_only=numeric_only)
939939
expected = Series(expected_data, index=expected_index)
940940
tm.assert_series_equal(result, expected)
941+
942+
943+
@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
944+
def test_numeric_only_warning_numpy(method):
945+
# GH#50538
946+
resampled = _test_frame.assign(D="x").resample("H")
947+
if method == "transform":
948+
msg = "The default value of numeric_only"
949+
with tm.assert_produces_warning(FutureWarning, match=msg):
950+
getattr(resampled, method)(np.mean)
951+
# Ensure users can pass numeric_only
952+
result = getattr(resampled, method)(np.mean, numeric_only=True)
953+
expected = resampled.transform("mean", numeric_only=True)
954+
tm.assert_frame_equal(result, expected)
955+
else:
956+
msg = "The operation <function mean.*failed"
957+
with tm.assert_produces_warning(FutureWarning, match=msg):
958+
getattr(resampled, method)(np.mean)
959+
# Ensure users can't pass numeric_only
960+
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
961+
getattr(resampled, method)(np.mean, numeric_only=True)

pandas/tests/reshape/test_pivot.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def test_pivot_table_nocols(self):
146146
df = DataFrame(
147147
{"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
148148
)
149-
msg = "pivot_table dropped a column because it failed to aggregate"
149+
# GH#50538
150+
msg = "The operation <function sum.*failed"
150151
with tm.assert_produces_warning(FutureWarning, match=msg):
151152
rs = df.pivot_table(columns="cols", aggfunc=np.sum)
152153
xp = df.pivot_table(index="cols", aggfunc=np.sum).T
@@ -907,7 +908,8 @@ def test_no_col(self):
907908

908909
# to help with a buglet
909910
self.data.columns = [k * 2 for k in self.data.columns]
910-
msg = "pivot_table dropped a column because it failed to aggregate"
911+
# GH#50538
912+
msg = "The operation <function mean.*failed"
911913
with tm.assert_produces_warning(FutureWarning, match=msg):
912914
table = self.data.pivot_table(
913915
index=["AA", "BB"], margins=True, aggfunc=np.mean
@@ -916,6 +918,7 @@ def test_no_col(self):
916918
totals = table.loc[("All", ""), value_col]
917919
assert totals == self.data[value_col].mean()
918920

921+
msg = "pivot_table dropped a column because it failed to aggregate"
919922
with tm.assert_produces_warning(FutureWarning, match=msg):
920923
table = self.data.pivot_table(
921924
index=["AA", "BB"], margins=True, aggfunc="mean"
@@ -975,7 +978,11 @@ def test_margin_with_only_columns_defined(
975978
}
976979
)
977980

978-
msg = "pivot_table dropped a column because it failed to aggregate"
981+
if aggfunc == "sum":
982+
msg = "pivot_table dropped a column because it failed to aggregate"
983+
else:
984+
# GH#50538
985+
msg = "The operation <function mean.*failed"
979986
with tm.assert_produces_warning(FutureWarning, match=msg):
980987
result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
981988
expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)

0 commit comments

Comments
 (0)