Skip to content

Commit aa3bfc4

Browse files
authored
DEPR: dropping nuisance columns in DataFrame reductions (#41480)
1 parent 751d500 commit aa3bfc4

14 files changed

+199
-48
lines changed

doc/source/whatsnew/v1.2.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ this pathological behavior (:issue:`37827`):
381381
*New behavior*:
382382

383383
.. ipython:: python
384+
:okwarning:
384385
385386
df.mean()
386387
@@ -394,6 +395,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
394395
:issue:`28949`, :issue:`21020`).
395396

396397
.. ipython:: python
398+
:okwarning:
397399
398400
ser = pd.Series([0, 1], dtype="category", name="A")
399401
df = ser.to_frame()
@@ -411,6 +413,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
411413
*New behavior*:
412414

413415
.. ipython:: python
416+
:okwarning:
414417
415418
df.any()
416419

doc/source/whatsnew/v1.3.0.rst

+41
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,47 @@ Deprecations
679679
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
680680
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
681681

682+
.. _whatsnew_130.deprecations.nuisance_columns:
683+
684+
Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations
685+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
686+
The default of calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with
687+
``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError``
688+
are silently ignored and dropped from the result.
689+
690+
This behavior is deprecated. In a future version, the ``TypeError`` will be raised,
691+
and users will need to select only valid columns before calling the function.
692+
693+
For example:
694+
695+
.. ipython:: python
696+
697+
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)})
698+
df
699+
700+
*Old behavior*:
701+
702+
.. code-block:: ipython
703+
704+
In [3]: df.prod()
705+
Out[3]:
706+
Out[3]:
707+
A 24
708+
dtype: int64
709+
710+
*Future behavior*:
711+
712+
.. code-block:: ipython
713+
714+
In [4]: df.prod()
715+
...
716+
TypeError: 'DatetimeArray' does not implement reduction 'prod'
717+
718+
In [5]: df[["A"]].prod()
719+
Out[5]:
720+
A 24
721+
dtype: int64
722+
682723
.. ---------------------------------------------------------------------------
683724
684725

pandas/core/frame.py

+28
Original file line numberDiff line numberDiff line change
@@ -9854,6 +9854,21 @@ def _get_data() -> DataFrame:
98549854
# Even if we are object dtype, follow numpy and return
98559855
# float64, see test_apply_funcs_over_empty
98569856
out = out.astype(np.float64)
9857+
9858+
if numeric_only is None and out.shape[0] != df.shape[1]:
9859+
# columns have been dropped GH#41480
9860+
arg_name = "numeric_only"
9861+
if name in ["all", "any"]:
9862+
arg_name = "bool_only"
9863+
warnings.warn(
9864+
"Dropping of nuisance columns in DataFrame reductions "
9865+
f"(with '{arg_name}=None') is deprecated; in a future "
9866+
"version this will raise TypeError. Select only valid "
9867+
"columns before calling the reduction.",
9868+
FutureWarning,
9869+
stacklevel=5,
9870+
)
9871+
98579872
return out
98589873

98599874
assert numeric_only is None
@@ -9874,6 +9889,19 @@ def _get_data() -> DataFrame:
98749889
with np.errstate(all="ignore"):
98759890
result = func(values)
98769891

9892+
# columns have been dropped GH#41480
9893+
arg_name = "numeric_only"
9894+
if name in ["all", "any"]:
9895+
arg_name = "bool_only"
9896+
warnings.warn(
9897+
"Dropping of nuisance columns in DataFrame reductions "
9898+
f"(with '{arg_name}=None') is deprecated; in a future "
9899+
"version this will raise TypeError. Select only valid "
9900+
"columns before calling the reduction.",
9901+
FutureWarning,
9902+
stacklevel=5,
9903+
)
9904+
98779905
if hasattr(result, "dtype"):
98789906
if filter_type == "bool" and notna(result).all():
98799907
result = result.astype(np.bool_)

pandas/tests/apply/test_frame_apply.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1209,7 +1209,10 @@ def test_nuiscance_columns():
12091209
)
12101210
tm.assert_frame_equal(result, expected)
12111211

1212-
result = df.agg("sum")
1212+
with tm.assert_produces_warning(
1213+
FutureWarning, match="Select only valid", check_stacklevel=False
1214+
):
1215+
result = df.agg("sum")
12131216
expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
12141217
tm.assert_series_equal(result, expected)
12151218

@@ -1426,8 +1429,9 @@ def test_apply_datetime_tz_issue():
14261429
@pytest.mark.parametrize("method", ["min", "max", "sum"])
14271430
def test_consistency_of_aggregates_of_columns_with_missing_values(df, method):
14281431
# GH 16832
1429-
none_in_first_column_result = getattr(df[["A", "B"]], method)()
1430-
none_in_second_column_result = getattr(df[["B", "A"]], method)()
1432+
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
1433+
none_in_first_column_result = getattr(df[["A", "B"]], method)()
1434+
none_in_second_column_result = getattr(df[["B", "A"]], method)()
14311435

14321436
tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result)
14331437

pandas/tests/apply/test_invalid_arg.py

+1
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ def test_transform_wont_agg_series(string_series, func):
342342
@pytest.mark.parametrize(
343343
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
344344
)
345+
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
345346
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
346347
# GH 35964
347348
op = op_wrapper(all_reductions)

pandas/tests/frame/methods/test_quantile.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ def test_quantile(self, datetime_frame):
5656
# non-numeric exclusion
5757
df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
5858
rs = df.quantile(0.5)
59-
xp = df.median().rename(0.5)
59+
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
60+
xp = df.median().rename(0.5)
6061
tm.assert_series_equal(rs, xp)
6162

6263
# axis

pandas/tests/frame/methods/test_rank.py

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def test_rank_methods_frame(self):
248248

249249
@td.skip_array_manager_not_yet_implemented
250250
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
251+
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
251252
def test_rank_descending(self, method, dtype):
252253

253254
if "i" in dtype:

pandas/tests/frame/test_arithmetic.py

+1
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,7 @@ def test_zero_len_frame_with_series_corner_cases():
10211021
tm.assert_frame_equal(result, expected)
10221022

10231023

1024+
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
10241025
def test_frame_single_columns_object_sum_axis_1():
10251026
# GH 13758
10261027
data = {

0 commit comments

Comments
 (0)