Skip to content

Commit c62954f

Browse files
rhshadrachnoatamir
authored andcommitted
DEPR: Enforce deprecation of silent dropping of nuisance columns in agg_list_like (pandas-dev#49401)
* DEPR: Enforce deprecation of silent dropping of nuisance columns in agg_list_like * Remove type-ignore * Fixups * Remove outdated comment
1 parent 3171c91 commit c62954f

File tree

12 files changed

+127
-193
lines changed

12 files changed

+127
-193
lines changed

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def time_different_python_functions_multicol(self, df):
310310
df.groupby(["key1", "key2"]).agg([sum, min, max])
311311

312312
def time_different_python_functions_singlecol(self, df):
313-
df.groupby("key1").agg([sum, min, max])
313+
df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max])
314314

315315

316316
class GroupStrings:

doc/source/user_guide/basics.rst

-28
Original file line numberDiff line numberDiff line change
@@ -1039,34 +1039,6 @@ not noted for a particular column will be ``NaN``:
10391039
10401040
tsdf.agg({"A": ["mean", "min"], "B": "sum"})
10411041
1042-
.. _basics.aggregation.mixed_string:
1043-
1044-
Mixed dtypes
1045-
++++++++++++
1046-
1047-
.. deprecated:: 1.4.0
1048-
Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise.
1049-
1050-
When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
1051-
aggregations. This is similar to how ``.groupby.agg`` works.
1052-
1053-
.. ipython:: python
1054-
1055-
mdf = pd.DataFrame(
1056-
{
1057-
"A": [1, 2, 3],
1058-
"B": [1.0, 2.0, 3.0],
1059-
"C": ["foo", "bar", "baz"],
1060-
"D": pd.date_range("20130101", periods=3),
1061-
}
1062-
)
1063-
mdf.dtypes
1064-
1065-
.. ipython:: python
1066-
:okwarning:
1067-
1068-
mdf.agg(["min", "sum"])
1069-
10701042
.. _basics.aggregation.custom_describe:
10711043

10721044
Custom describe

doc/source/user_guide/groupby.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ functions:
10071007
.. ipython:: python
10081008
:okwarning:
10091009
1010-
grouped = df.groupby("A")
1010+
grouped = df.groupby("A")[["C", "D"]]
10111011
grouped.agg(lambda x: x.std())
10121012
10131013
But, it's rather verbose and can be untidy if you need to pass additional

doc/source/whatsnew/v0.20.0.rst

+6-3
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,13 @@ aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`)
104104
'D': pd.date_range('20130101', periods=3)})
105105
df.dtypes
106106
107-
.. ipython:: python
108-
:okwarning:
107+
.. code-block:: python
109108
110-
df.agg(['min', 'sum'])
109+
In [10]: df.agg(['min', 'sum'])
110+
Out[10]:
111+
A B C D
112+
min 1 1.0 bar 2013-01-01
113+
sum 6 6.0 foobarbaz NaT
111114
112115
.. _whatsnew_0200.enhancements.dataio_dtype:
113116

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ Removal of prior version deprecations/changes
302302
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
303303
- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
304304
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
305-
-
305+
- Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
306306

307307
.. ---------------------------------------------------------------------------
308308
.. _whatsnew_200.performance:

pandas/core/apply.py

+9-77
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from collections import defaultdict
55
from functools import partial
66
import inspect
7-
import re
87
from typing import (
98
TYPE_CHECKING,
109
Any,
@@ -18,7 +17,6 @@
1817
Sequence,
1918
cast,
2019
)
21-
import warnings
2220

2321
import numpy as np
2422

@@ -35,12 +33,8 @@
3533
NDFrameT,
3634
npt,
3735
)
38-
from pandas.errors import (
39-
DataError,
40-
SpecificationError,
41-
)
36+
from pandas.errors import SpecificationError
4237
from pandas.util._decorators import cache_readonly
43-
from pandas.util._exceptions import find_stack_level
4438

4539
from pandas.core.dtypes.cast import is_nested_object
4640
from pandas.core.dtypes.common import (
@@ -317,88 +311,28 @@ def agg_list_like(self) -> DataFrame | Series:
317311

318312
results = []
319313
keys = []
320-
failed_names = []
321-
322-
depr_nuisance_columns_msg = (
323-
"{} did not aggregate successfully. If any error is "
324-
"raised this will raise in a future version of pandas. "
325-
"Drop these columns/ops to avoid this warning."
326-
)
327314

328315
# degenerate case
329316
if selected_obj.ndim == 1:
330317
for a in arg:
331318
colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
332-
try:
333-
new_res = colg.aggregate(a)
334-
335-
except TypeError:
336-
failed_names.append(com.get_callable_name(a) or a)
337-
else:
338-
results.append(new_res)
319+
new_res = colg.aggregate(a)
320+
results.append(new_res)
339321

340-
# make sure we find a good name
341-
name = com.get_callable_name(a) or a
342-
keys.append(name)
322+
# make sure we find a good name
323+
name = com.get_callable_name(a) or a
324+
keys.append(name)
343325

344326
# multiples
345327
else:
346328
indices = []
347329
for index, col in enumerate(selected_obj):
348330
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
349-
try:
350-
# Capture and suppress any warnings emitted by us in the call
351-
# to agg below, but pass through any warnings that were
352-
# generated otherwise.
353-
# This is necessary because of https://bugs.python.org/issue29672
354-
# See GH #43741 for more details
355-
with warnings.catch_warnings(record=True) as record:
356-
new_res = colg.aggregate(arg)
357-
if len(record) > 0:
358-
match = re.compile(depr_nuisance_columns_msg.format(".*"))
359-
for warning in record:
360-
if re.match(match, str(warning.message)):
361-
failed_names.append(col)
362-
else:
363-
warnings.warn_explicit(
364-
message=warning.message,
365-
category=warning.category,
366-
filename=warning.filename,
367-
lineno=warning.lineno,
368-
)
369-
370-
except (TypeError, DataError):
371-
failed_names.append(col)
372-
except ValueError as err:
373-
# cannot aggregate
374-
if "Must produce aggregated value" in str(err):
375-
# raised directly in _aggregate_named
376-
failed_names.append(col)
377-
elif "no results" in str(err):
378-
# reached in test_frame_apply.test_nuiscance_columns
379-
# where the colg.aggregate(arg) ends up going through
380-
# the selected_obj.ndim == 1 branch above with arg == ["sum"]
381-
# on a datetime64[ns] column
382-
failed_names.append(col)
383-
else:
384-
raise
385-
else:
386-
results.append(new_res)
387-
indices.append(index)
388-
331+
new_res = colg.aggregate(arg)
332+
results.append(new_res)
333+
indices.append(index)
389334
keys = selected_obj.columns.take(indices)
390335

391-
# if we are empty
392-
if not len(results):
393-
raise ValueError("no results")
394-
395-
if len(failed_names) > 0:
396-
warnings.warn(
397-
depr_nuisance_columns_msg.format(failed_names),
398-
FutureWarning,
399-
stacklevel=find_stack_level(),
400-
)
401-
402336
try:
403337
concatenated = concat(results, keys=keys, axis=1, sort=False)
404338
except TypeError as err:
@@ -479,8 +413,6 @@ def agg_dict_like(self) -> DataFrame | Series:
479413
keys_to_use = ktu
480414

481415
axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
482-
# error: Key expression in dictionary comprehension has incompatible type
483-
# "Hashable"; expected type "NDFrame" [misc]
484416
result = concat(
485417
{k: results[k] for k in keys_to_use}, # type: ignore[misc]
486418
axis=axis,

pandas/core/groupby/generic.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1138,8 +1138,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
11381138
result = gba.agg()
11391139

11401140
except ValueError as err:
1141-
if "no results" not in str(err):
1142-
# raised directly by _aggregate_multiple_funcs
1141+
if "No objects to concatenate" not in str(err):
11431142
raise
11441143
result = self._aggregate_frame(func)
11451144

pandas/tests/apply/test_frame_apply.py

+37-34
Original file line numberDiff line numberDiff line change
@@ -1141,44 +1141,55 @@ def test_agg_with_name_as_column_name():
11411141
tm.assert_series_equal(result, expected)
11421142

11431143

1144-
def test_agg_multiple_mixed_no_warning():
1144+
def test_agg_multiple_mixed():
11451145
# GH 20909
11461146
mdf = DataFrame(
11471147
{
11481148
"A": [1, 2, 3],
11491149
"B": [1.0, 2.0, 3.0],
11501150
"C": ["foo", "bar", "baz"],
1151-
"D": date_range("20130101", periods=3),
11521151
}
11531152
)
11541153
expected = DataFrame(
11551154
{
11561155
"A": [1, 6],
11571156
"B": [1.0, 6.0],
11581157
"C": ["bar", "foobarbaz"],
1159-
"D": [Timestamp("2013-01-01"), pd.NaT],
11601158
},
11611159
index=["min", "sum"],
11621160
)
11631161
# sorted index
1164-
with tm.assert_produces_warning(
1165-
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1166-
):
1167-
result = mdf.agg(["min", "sum"])
1168-
1162+
result = mdf.agg(["min", "sum"])
11691163
tm.assert_frame_equal(result, expected)
11701164

1171-
with tm.assert_produces_warning(
1172-
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1173-
):
1174-
result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
1175-
1165+
result = mdf[["C", "B", "A"]].agg(["sum", "min"])
11761166
# GH40420: the result of .agg should have an index that is sorted
11771167
# according to the arguments provided to agg.
1178-
expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
1168+
expected = expected[["C", "B", "A"]].reindex(["sum", "min"])
11791169
tm.assert_frame_equal(result, expected)
11801170

11811171

1172+
def test_agg_multiple_mixed_raises():
1173+
# GH 20909
1174+
mdf = DataFrame(
1175+
{
1176+
"A": [1, 2, 3],
1177+
"B": [1.0, 2.0, 3.0],
1178+
"C": ["foo", "bar", "baz"],
1179+
"D": date_range("20130101", periods=3),
1180+
}
1181+
)
1182+
1183+
# sorted index
1184+
# TODO: GH#49399 will fix error message
1185+
msg = "DataFrame constructor called with"
1186+
with pytest.raises(TypeError, match=msg):
1187+
mdf.agg(["min", "sum"])
1188+
1189+
with pytest.raises(TypeError, match=msg):
1190+
mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
1191+
1192+
11821193
def test_agg_reduce(axis, float_frame):
11831194
other_axis = 1 if axis in {0, "index"} else 0
11841195
name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
@@ -1277,14 +1288,10 @@ def test_nuiscance_columns():
12771288
expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
12781289
tm.assert_series_equal(result, expected)
12791290

1280-
with tm.assert_produces_warning(
1281-
FutureWarning, match=r"\['D'\] did not aggregate successfully"
1282-
):
1283-
result = df.agg(["sum"])
1284-
expected = DataFrame(
1285-
[[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
1286-
)
1287-
tm.assert_frame_equal(result, expected)
1291+
# TODO: GH#49399 will fix error message
1292+
msg = "DataFrame constructor called with"
1293+
with pytest.raises(TypeError, match=msg):
1294+
df.agg(["sum"])
12881295

12891296

12901297
@pytest.mark.parametrize("how", ["agg", "apply"])
@@ -1499,27 +1506,23 @@ def test_aggregation_func_column_order():
14991506
# according to the arguments provided to agg.
15001507
df = DataFrame(
15011508
[
1502-
("1", 1, 0, 0),
1503-
("2", 2, 0, 0),
1504-
("3", 3, 0, 0),
1505-
("4", 4, 5, 4),
1506-
("5", 5, 6, 6),
1507-
("6", 6, 7, 7),
1509+
(1, 0, 0),
1510+
(2, 0, 0),
1511+
(3, 0, 0),
1512+
(4, 5, 4),
1513+
(5, 6, 6),
1514+
(6, 7, 7),
15081515
],
1509-
columns=("item", "att1", "att2", "att3"),
1516+
columns=("att1", "att2", "att3"),
15101517
)
15111518

15121519
def foo(s):
15131520
return s.sum() / 2
15141521

15151522
aggs = ["sum", foo, "count", "min"]
1516-
with tm.assert_produces_warning(
1517-
FutureWarning, match=r"\['item'\] did not aggregate successfully"
1518-
):
1519-
result = df.agg(aggs)
1523+
result = df.agg(aggs)
15201524
expected = DataFrame(
15211525
{
1522-
"item": ["123456", np.nan, 6, "1"],
15231526
"att1": [21.0, 10.5, 6.0, 1.0],
15241527
"att2": [18.0, 9.0, 6.0, 0.0],
15251528
"att3": [17.0, 8.5, 6.0, 0.0],

pandas/tests/groupby/aggregate/test_aggregate.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -383,21 +383,18 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
383383

384384
def test_multiple_functions_tuples_and_non_tuples(df):
385385
# #1359
386+
# Columns B and C would cause partial failure
387+
df = df.drop(columns=["B", "C"])
388+
386389
funcs = [("foo", "mean"), "std"]
387390
ex_funcs = [("foo", "mean"), ("std", "std")]
388391

389-
result = df.groupby("A")["C"].agg(funcs)
390-
expected = df.groupby("A")["C"].agg(ex_funcs)
392+
result = df.groupby("A")["D"].agg(funcs)
393+
expected = df.groupby("A")["D"].agg(ex_funcs)
391394
tm.assert_frame_equal(result, expected)
392395

393-
with tm.assert_produces_warning(
394-
FutureWarning, match=r"\['B'\] did not aggregate successfully"
395-
):
396-
result = df.groupby("A").agg(funcs)
397-
with tm.assert_produces_warning(
398-
FutureWarning, match=r"\['B'\] did not aggregate successfully"
399-
):
400-
expected = df.groupby("A").agg(ex_funcs)
396+
result = df.groupby("A").agg(funcs)
397+
expected = df.groupby("A").agg(ex_funcs)
401398
tm.assert_frame_equal(result, expected)
402399

403400

0 commit comments

Comments
 (0)