Skip to content

Commit 4b54c53

Browse files
authored
DEPR: Silent dropping of nuisance columns in agg_list_like (#43741)
1 parent e27ef39 commit 4b54c53

File tree

10 files changed

+98
-20
lines changed

10 files changed

+98
-20
lines changed

doc/source/user_guide/basics.rst

+4
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,9 @@ not noted for a particular column will be ``NaN``:
10451045
Mixed dtypes
10461046
++++++++++++
10471047

1048+
.. deprecated:: 1.4.0
1049+
Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise.
1050+
10481051
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
10491052
aggregations. This is similar to how ``.groupby.agg`` works.
10501053

@@ -1061,6 +1064,7 @@ aggregations. This is similar to how ``.groupby.agg`` works.
10611064
mdf.dtypes
10621065
10631066
.. ipython:: python
1067+
:okwarning:
10641068
10651069
mdf.agg(["min", "sum"])
10661070

doc/source/user_guide/groupby.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ column, which produces an aggregated result with a hierarchical index:
578578

579579
.. ipython:: python
580580
581-
grouped.agg([np.sum, np.mean, np.std])
581+
grouped[["C", "D"]].agg([np.sum, np.mean, np.std])
582582
583583
584584
The resulting aggregations are named for the functions themselves. If you
@@ -597,7 +597,7 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
597597
.. ipython:: python
598598
599599
(
600-
grouped.agg([np.sum, np.mean, np.std]).rename(
600+
grouped[["C", "D"]].agg([np.sum, np.mean, np.std]).rename(
601601
columns={"sum": "foo", "mean": "bar", "std": "baz"}
602602
)
603603
)

doc/source/whatsnew/v0.20.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`)
105105
df.dtypes
106106
107107
.. ipython:: python
108+
:okwarning:
108109
109110
df.agg(['min', 'sum'])
110111

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ Other Deprecations
338338
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
339339
- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
340340
- Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
341+
- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
341342

342343
.. ---------------------------------------------------------------------------
343344

pandas/core/apply.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections import defaultdict
55
from functools import partial
66
import inspect
7+
import re
78
from typing import (
89
TYPE_CHECKING,
910
Any,
@@ -336,6 +337,13 @@ def agg_list_like(self) -> DataFrame | Series:
336337

337338
results = []
338339
keys = []
340+
failed_names = []
341+
342+
depr_nuisance_columns_msg = (
343+
"{} did not aggregate successfully. If any error is "
344+
"raised this will raise in a future version of pandas. "
345+
"Drop these columns/ops to avoid this warning."
346+
)
339347

340348
# degenerate case
341349
if selected_obj.ndim == 1:
@@ -345,7 +353,7 @@ def agg_list_like(self) -> DataFrame | Series:
345353
new_res = colg.aggregate(a)
346354

347355
except TypeError:
348-
pass
356+
failed_names.append(com.get_callable_name(a) or a)
349357
else:
350358
results.append(new_res)
351359

@@ -359,20 +367,37 @@ def agg_list_like(self) -> DataFrame | Series:
359367
for index, col in enumerate(selected_obj):
360368
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
361369
try:
362-
new_res = colg.aggregate(arg)
370+
# Capture and suppress any warnings emitted by us in the call
371+
# to agg below, but pass through any warnings that were
372+
# generated otherwise.
373+
with warnings.catch_warnings(record=True) as record:
374+
new_res = colg.aggregate(arg)
375+
if len(record) > 0:
376+
match = re.compile(depr_nuisance_columns_msg.format(".*"))
377+
for warning in record:
378+
if re.match(match, str(warning.message)):
379+
failed_names.append(col)
380+
else:
381+
warnings.warn_explicit(
382+
message=warning.message,
383+
category=warning.category,
384+
filename=warning.filename,
385+
lineno=warning.lineno,
386+
)
387+
363388
except (TypeError, DataError):
364-
pass
389+
failed_names.append(col)
365390
except ValueError as err:
366391
# cannot aggregate
367392
if "Must produce aggregated value" in str(err):
368393
# raised directly in _aggregate_named
369-
pass
394+
failed_names.append(col)
370395
elif "no results" in str(err):
371396
# reached in test_frame_apply.test_nuiscance_columns
372397
# where the colg.aggregate(arg) ends up going through
373398
# the selected_obj.ndim == 1 branch above with arg == ["sum"]
374399
# on a datetime64[ns] column
375-
pass
400+
failed_names.append(col)
376401
else:
377402
raise
378403
else:
@@ -385,6 +410,13 @@ def agg_list_like(self) -> DataFrame | Series:
385410
if not len(results):
386411
raise ValueError("no results")
387412

413+
if len(failed_names) > 0:
414+
warnings.warn(
415+
depr_nuisance_columns_msg.format(failed_names),
416+
FutureWarning,
417+
stacklevel=find_stack_level(),
418+
)
419+
388420
try:
389421
concatenated = concat(results, keys=keys, axis=1, sort=False)
390422
except TypeError as err:

pandas/tests/apply/test_frame_apply.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -1087,12 +1087,16 @@ def test_agg_multiple_mixed_no_warning():
10871087
index=["min", "sum"],
10881088
)
10891089
# sorted index
1090-
with tm.assert_produces_warning(None):
1090+
with tm.assert_produces_warning(
1091+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1092+
):
10911093
result = mdf.agg(["min", "sum"])
10921094

10931095
tm.assert_frame_equal(result, expected)
10941096

1095-
with tm.assert_produces_warning(None):
1097+
with tm.assert_produces_warning(
1098+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1099+
):
10961100
result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
10971101

10981102
# GH40420: the result of .agg should have an index that is sorted
@@ -1201,7 +1205,10 @@ def test_nuiscance_columns():
12011205
expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
12021206
tm.assert_series_equal(result, expected)
12031207

1204-
result = df.agg(["sum"])
1208+
with tm.assert_produces_warning(
1209+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1210+
):
1211+
result = df.agg(["sum"])
12051212
expected = DataFrame(
12061213
[[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
12071214
)
@@ -1433,7 +1440,10 @@ def foo(s):
14331440
return s.sum() / 2
14341441

14351442
aggs = ["sum", foo, "count", "min"]
1436-
result = df.agg(aggs)
1443+
with tm.assert_produces_warning(
1444+
FutureWarning, match=r"\['item'\] did not aggregate successfully"
1445+
):
1446+
result = df.agg(aggs)
14371447
expected = DataFrame(
14381448
{
14391449
"item": ["123456", np.nan, 6, "1"],
@@ -1452,3 +1462,20 @@ def test_apply_getitem_axis_1():
14521462
result = df[["a", "a"]].apply(lambda x: x[0] + x[1], axis=1)
14531463
expected = Series([0, 2, 4])
14541464
tm.assert_series_equal(result, expected)
1465+
1466+
1467+
def test_nuisance_depr_passes_through_warnings():
1468+
# GH 43740
1469+
# DataFrame.agg with list-likes may emit warnings for both individual
1470+
# args and for entire columns, but we only want to emit once. We
1471+
# catch and suppress the warnings for individual args, but need to make
1472+
# sure if some other warnings were raised, they get passed through to
1473+
# the user.
1474+
1475+
def foo(x):
1476+
warnings.warn("Hello, World!")
1477+
return x.sum()
1478+
1479+
df = DataFrame({"a": [1, 2, 3]})
1480+
with tm.assert_produces_warning(UserWarning, match="Hello, World!"):
1481+
df.agg([foo])

pandas/tests/groupby/aggregate/test_aggregate.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,14 @@ def test_multiple_functions_tuples_and_non_tuples(df):
339339
expected = df.groupby("A")["C"].agg(ex_funcs)
340340
tm.assert_frame_equal(result, expected)
341341

342-
result = df.groupby("A").agg(funcs)
343-
expected = df.groupby("A").agg(ex_funcs)
342+
with tm.assert_produces_warning(
343+
FutureWarning, match=r"\['B'\] did not aggregate successfully"
344+
):
345+
result = df.groupby("A").agg(funcs)
346+
with tm.assert_produces_warning(
347+
FutureWarning, match=r"\['B'\] did not aggregate successfully"
348+
):
349+
expected = df.groupby("A").agg(ex_funcs)
344350
tm.assert_frame_equal(result, expected)
345351

346352

pandas/tests/groupby/aggregate/test_other.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@ def peak_to_peak(arr):
4545
return arr.max() - arr.min()
4646

4747
with tm.assert_produces_warning(
48-
FutureWarning, match="Dropping invalid", check_stacklevel=False
48+
FutureWarning,
49+
match=r"\['key2'\] did not aggregate successfully",
4950
):
5051
expected = grouped.agg([peak_to_peak])
5152
expected.columns = ["data1", "data2"]
5253

5354
with tm.assert_produces_warning(
54-
FutureWarning, match="Dropping invalid", check_stacklevel=False
55+
FutureWarning,
56+
match=r"\['key2'\] did not aggregate successfully",
5557
):
5658
result = grouped.agg(peak_to_peak)
5759
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_groupby.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,10 @@ def test_frame_multi_key_function_list():
583583

584584
grouped = data.groupby(["A", "B"])
585585
funcs = [np.mean, np.std]
586-
agged = grouped.agg(funcs)
586+
with tm.assert_produces_warning(
587+
FutureWarning, match=r"\['C'\] did not aggregate successfully"
588+
):
589+
agged = grouped.agg(funcs)
587590
expected = pd.concat(
588591
[grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
589592
keys=["D", "E", "F"],

pandas/tests/resample/test_resample_api.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,12 @@ def test_agg():
350350
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
351351
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
352352
for t in cases:
353-
with tm.assert_produces_warning(None):
354-
# .var on dt64 column raises and is dropped, but the path in core.apply
355-
# that it goes through will still suppress a TypeError even
356-
# once the deprecations in the groupby code are enforced
353+
warn = FutureWarning if t in cases[1:3] else None
354+
with tm.assert_produces_warning(
355+
warn,
356+
match=r"\['date'\] did not aggregate successfully",
357+
):
358+
# .var on dt64 column raises and is dropped
357359
result = t.aggregate([np.mean, np.std])
358360
tm.assert_frame_equal(result, expected)
359361

0 commit comments

Comments
 (0)