Skip to content

Commit bc67672

Browse files
committed
DEPR: Dropping of silent columns in NDFrame.agg with list-like func
1 parent cd13e3a commit bc67672

File tree

7 files changed

+54
-13
lines changed

7 files changed

+54
-13
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ Other Deprecations
337337
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
338338
- Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
339339
- Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
340+
- Deprecated silent dropping of columns that raised a ``TypeError`` or ``DataError`` in :class:`Series.aggregate` and :class:`DataFrame.aggregate` when used with a list (:issue:``)
340341
341342
.. ---------------------------------------------------------------------------
342343

pandas/core/apply.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
FrameOrSeries,
3434
)
3535
from pandas.util._decorators import cache_readonly
36+
from pandas.util._exceptions import find_stack_level
3637

3738
from pandas.core.dtypes.cast import is_nested_object
3839
from pandas.core.dtypes.common import (
@@ -335,6 +336,7 @@ def agg_list_like(self) -> DataFrame | Series:
335336

336337
results = []
337338
keys = []
339+
failed_names = []
338340

339341
# degenerate case
340342
if selected_obj.ndim == 1:
@@ -344,7 +346,7 @@ def agg_list_like(self) -> DataFrame | Series:
344346
new_res = colg.aggregate(a)
345347

346348
except TypeError:
347-
pass
349+
failed_names.append(com.get_callable_name(a) or a)
348350
else:
349351
results.append(new_res)
350352

@@ -358,10 +360,14 @@ def agg_list_like(self) -> DataFrame | Series:
358360
for index, col in enumerate(selected_obj):
359361
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
360362
try:
361-
new_res = colg.aggregate(arg)
363+
with warnings.catch_warnings(record=True) as w:
364+
new_res = colg.aggregate(arg)
365+
if len(w) > 0:
366+
failed_names.append(col)
362367
except (TypeError, DataError):
363-
pass
368+
failed_names.append(col)
364369
except ValueError as err:
370+
failed_names.append(col)
365371
# cannot aggregate
366372
if "Must produce aggregated value" in str(err):
367373
# raised directly in _aggregate_named
@@ -384,6 +390,15 @@ def agg_list_like(self) -> DataFrame | Series:
384390
if not len(results):
385391
raise ValueError("no results")
386392

393+
if len(failed_names) > 0:
394+
warnings.warn(
395+
f"{failed_names} did not aggregate successfully. If any error is "
396+
f"raised this will raise in a future version of pandas. "
397+
f"Drop these columns/ops to avoid this warning.",
398+
FutureWarning,
399+
stacklevel=find_stack_level(),
400+
)
401+
387402
try:
388403
concatenated = concat(results, keys=keys, axis=1, sort=False)
389404
except TypeError as err:

pandas/tests/apply/test_frame_apply.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -1087,12 +1087,16 @@ def test_agg_multiple_mixed_no_warning():
10871087
index=["min", "sum"],
10881088
)
10891089
# sorted index
1090-
with tm.assert_produces_warning(None):
1090+
with tm.assert_produces_warning(
1091+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1092+
):
10911093
result = mdf.agg(["min", "sum"])
10921094

10931095
tm.assert_frame_equal(result, expected)
10941096

1095-
with tm.assert_produces_warning(None):
1097+
with tm.assert_produces_warning(
1098+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1099+
):
10961100
result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
10971101

10981102
# GH40420: the result of .agg should have an index that is sorted
@@ -1201,7 +1205,10 @@ def test_nuiscance_columns():
12011205
expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
12021206
tm.assert_series_equal(result, expected)
12031207

1204-
result = df.agg(["sum"])
1208+
with tm.assert_produces_warning(
1209+
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1210+
):
1211+
result = df.agg(["sum"])
12051212
expected = DataFrame(
12061213
[[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
12071214
)
@@ -1433,7 +1440,10 @@ def foo(s):
14331440
return s.sum() / 2
14341441

14351442
aggs = ["sum", foo, "count", "min"]
1436-
result = df.agg(aggs)
1443+
with tm.assert_produces_warning(
1444+
FutureWarning, match=r"\['item'\] did not aggregate successfully"
1445+
):
1446+
result = df.agg(aggs)
14371447
expected = DataFrame(
14381448
{
14391449
"item": ["123456", np.nan, 6, "1"],

pandas/tests/groupby/aggregate/test_aggregate.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,14 @@ def test_multiple_functions_tuples_and_non_tuples(df):
339339
expected = df.groupby("A")["C"].agg(ex_funcs)
340340
tm.assert_frame_equal(result, expected)
341341

342-
result = df.groupby("A").agg(funcs)
343-
expected = df.groupby("A").agg(ex_funcs)
342+
with tm.assert_produces_warning(
343+
FutureWarning, match=r"\['B'\] did not aggregate successfully"
344+
):
345+
result = df.groupby("A").agg(funcs)
346+
with tm.assert_produces_warning(
347+
FutureWarning, match=r"\['B'\] did not aggregate successfully"
348+
):
349+
expected = df.groupby("A").agg(ex_funcs)
344350
tm.assert_frame_equal(result, expected)
345351

346352

pandas/tests/groupby/aggregate/test_other.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,17 @@ def peak_to_peak(arr):
4545
return arr.max() - arr.min()
4646

4747
with tm.assert_produces_warning(
48-
FutureWarning, match="Dropping invalid", check_stacklevel=False
48+
FutureWarning,
49+
match=r"\['key2'\] did not aggregate successfully",
50+
check_stacklevel=False,
4951
):
5052
expected = grouped.agg([peak_to_peak])
5153
expected.columns = ["data1", "data2"]
5254

5355
with tm.assert_produces_warning(
54-
FutureWarning, match="Dropping invalid", check_stacklevel=False
56+
FutureWarning,
57+
match=r"\['key2'\] did not aggregate successfully",
58+
check_stacklevel=False,
5559
):
5660
result = grouped.agg(peak_to_peak)
5761
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/test_groupby.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,10 @@ def test_frame_multi_key_function_list():
583583

584584
grouped = data.groupby(["A", "B"])
585585
funcs = [np.mean, np.std]
586-
agged = grouped.agg(funcs)
586+
with tm.assert_produces_warning(
587+
FutureWarning, match=r"\['C'\] did not aggregate successfully"
588+
):
589+
agged = grouped.agg(funcs)
587590
expected = pd.concat(
588591
[grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
589592
keys=["D", "E", "F"],

pandas/tests/resample/test_resample_api.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,9 @@ def test_agg():
352352
for t in cases:
353353
warn = FutureWarning if t in cases[1:3] else None
354354
with tm.assert_produces_warning(
355-
warn, match="Dropping invalid columns", check_stacklevel=False
355+
warn,
356+
match=r"\['date'\] did not aggregate successfully",
357+
check_stacklevel=False,
356358
):
357359
# .var on dt64 column raises and is dropped
358360
result = t.aggregate([np.mean, np.std])

0 commit comments

Comments
 (0)