Skip to content

Commit 5c1303a

Browse files
authored
CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising (#57746)
* CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising * Test fixup
1 parent a0784d2 commit 5c1303a

File tree

5 files changed

+27
-39
lines changed

5 files changed

+27
-39
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ Other Deprecations
189189

190190
Removal of prior version deprecations/changes
191191
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
192+
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
192193
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
193194
- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
194195
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)

pandas/core/groupby/generic.py

+8-16
Original file line numberDiff line numberDiff line change
@@ -1179,8 +1179,7 @@ def idxmin(self, skipna: bool = True) -> Series:
11791179
Parameters
11801180
----------
11811181
skipna : bool, default True
1182-
Exclude NA/null values. If the entire Series is NA, the result
1183-
will be NA.
1182+
Exclude NA values.
11841183
11851184
Returns
11861185
-------
@@ -1190,7 +1189,7 @@ def idxmin(self, skipna: bool = True) -> Series:
11901189
Raises
11911190
------
11921191
ValueError
1193-
If the Series is empty.
1192+
If the Series is empty or skipna=False and any value is NA.
11941193
11951194
See Also
11961195
--------
@@ -1233,8 +1232,7 @@ def idxmax(self, skipna: bool = True) -> Series:
12331232
Parameters
12341233
----------
12351234
skipna : bool, default True
1236-
Exclude NA/null values. If the entire Series is NA, the result
1237-
will be NA.
1235+
Exclude NA values.
12381236
12391237
Returns
12401238
-------
@@ -1244,7 +1242,7 @@ def idxmax(self, skipna: bool = True) -> Series:
12441242
Raises
12451243
------
12461244
ValueError
1247-
If the Series is empty.
1245+
If the Series is empty or skipna=False and any value is NA.
12481246
12491247
See Also
12501248
--------
@@ -2165,13 +2163,10 @@ def idxmax(
21652163
"""
21662164
Return index of first occurrence of maximum in each group.
21672165
2168-
NA/null values are excluded.
2169-
21702166
Parameters
21712167
----------
21722168
skipna : bool, default True
2173-
Exclude NA/null values. If an entire row/column is NA, the result
2174-
will be NA.
2169+
Exclude NA values.
21752170
numeric_only : bool, default False
21762171
Include only `float`, `int` or `boolean` data.
21772172
@@ -2185,7 +2180,7 @@ def idxmax(
21852180
Raises
21862181
------
21872182
ValueError
2188-
* If the row/column is empty
2183+
* If a column is empty or skipna=False and any value is NA.
21892184
21902185
See Also
21912186
--------
@@ -2230,13 +2225,10 @@ def idxmin(
22302225
"""
22312226
Return index of first occurrence of minimum in each group.
22322227
2233-
NA/null values are excluded.
2234-
22352228
Parameters
22362229
----------
22372230
skipna : bool, default True
2238-
Exclude NA/null values. If an entire row/column is NA, the result
2239-
will be NA.
2231+
Exclude NA values.
22402232
numeric_only : bool, default False
22412233
Include only `float`, `int` or `boolean` data.
22422234
@@ -2250,7 +2242,7 @@ def idxmin(
22502242
Raises
22512243
------
22522244
ValueError
2253-
* If the row/column is empty
2245+
* If a column is empty or skipna=False and any value is NA.
22542246
22552247
See Also
22562248
--------

pandas/core/groupby/groupby.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -5553,15 +5553,11 @@ def _idxmax_idxmin(
55535553
f"Can't get {how} of an empty group due to unobserved categories. "
55545554
"Specify observed=True in groupby instead."
55555555
)
5556-
elif not skipna:
5557-
if self._obj_with_exclusions.isna().any(axis=None):
5558-
warnings.warn(
5559-
f"The behavior of {type(self).__name__}.{how} with all-NA "
5560-
"values, or any-NA and skipna=False, is deprecated. In a future "
5561-
"version this will raise ValueError",
5562-
FutureWarning,
5563-
stacklevel=find_stack_level(),
5564-
)
5556+
elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
5557+
raise ValueError(
5558+
f"{type(self).__name__}.{how} with skipna=False encountered an NA "
5559+
f"value."
5560+
)
55655561

55665562
result = self._agg_general(
55675563
numeric_only=numeric_only,

pandas/tests/groupby/test_reductions.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -291,16 +291,14 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
291291
)
292292
gb = df.groupby("a")
293293

294-
warn = None if skipna else FutureWarning
295-
msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values"
296-
with tm.assert_produces_warning(warn, match=msg):
297-
result = getattr(gb, how)(skipna=skipna)
298-
if skipna:
299-
values = [1, 3, 4, 6, np.nan]
300-
else:
301-
values = np.nan
294+
if not skipna:
295+
msg = f"DataFrameGroupBy.{how} with skipna=False"
296+
with pytest.raises(ValueError, match=msg):
297+
getattr(gb, how)(skipna=skipna)
298+
return
299+
result = getattr(gb, how)(skipna=skipna)
302300
expected = DataFrame(
303-
{"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp")
301+
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
304302
)
305303
tm.assert_frame_equal(result, expected)
306304

pandas/tests/groupby/transform/test_transform.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1525,10 +1525,11 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
15251525
# GH#55268 - ensure *args are passed through when calling transform
15261526
df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")})
15271527
gb = df.groupby("a")
1528-
warn = None if skipna else FutureWarning
1529-
msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False"
1530-
with tm.assert_produces_warning(warn, match=msg):
1528+
if skipna:
15311529
result = gb.transform(how, skipna, numeric_only)
1532-
with tm.assert_produces_warning(warn, match=msg):
15331530
expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
1534-
tm.assert_frame_equal(result, expected)
1531+
tm.assert_frame_equal(result, expected)
1532+
else:
1533+
msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value"
1534+
with pytest.raises(ValueError, match=msg):
1535+
gb.transform(how, skipna, numeric_only)

0 commit comments

Comments
 (0)