Skip to content

Commit 9176ff1

Browse files
rhshadrachmliu08
authored andcommitted
DEPR: Enforce deprecation of numeric_only=None in DataFrame aggregations (pandas-dev#49551)
* WIP * DEPR: Enforce deprecation of numeric_only=None in DataFrame aggregations * Partial reverts * numeric_only in generic/series, fixup * cleanup * Remove docs warning * fixups * Fixups
1 parent 8ba0edb commit 9176ff1

File tree

11 files changed

+221
-307
lines changed

11 files changed

+221
-307
lines changed

doc/source/whatsnew/v1.2.0.rst

+12-7
Original file line numberDiff line numberDiff line change
@@ -383,12 +383,17 @@ this pathological behavior (:issue:`37827`):
383383
384384
*New behavior*:
385385

386-
.. ipython:: python
387-
:okwarning:
386+
.. code-block:: ipython
388387
389-
df.mean()
388+
In [3]: df.mean()
389+
Out[3]:
390+
A 1.0
391+
dtype: float64
390392
391-
df[["A"]].mean()
393+
In [4]: df[["A"]].mean()
394+
Out[4]:
395+
A 1.0
396+
dtype: float64
392397
393398
Moreover, DataFrame reductions with ``numeric_only=None`` will now be
394399
consistent with their Series counterparts. In particular, for
@@ -415,10 +420,10 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
415420
416421
*New behavior*:
417422

418-
.. ipython:: python
419-
:okwarning:
423+
.. code-block:: ipython
420424
421-
df.any()
425+
In [5]: df.any()
426+
Out[5]: Series([], dtype: bool)
422427
423428
424429
.. _whatsnew_120.api_breaking.python:

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ Removal of prior version deprecations/changes
501501
- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
502502
- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
503503
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
504+
- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
504505
-
505506

506507
.. ---------------------------------------------------------------------------
@@ -570,6 +571,7 @@ Timezones
570571
Numeric
571572
^^^^^^^
572573
- Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
574+
- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
573575
-
574576

575577
Conversion

pandas/core/frame.py

+9-38
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,8 @@
266266
you to specify a location to update with some value.""",
267267
}
268268

269-
_numeric_only_doc = """numeric_only : bool or None, default None
270-
Include only float, int, boolean data. If None, will attempt to use
271-
everything, then use only numeric data
269+
_numeric_only_doc = """numeric_only : bool, default False
270+
Include only float, int, boolean data.
272271
"""
273272

274273
_merge_doc = """
@@ -10489,7 +10488,7 @@ def _reduce(
1048910488
*,
1049010489
axis: Axis = 0,
1049110490
skipna: bool = True,
10492-
numeric_only: bool | None = None,
10491+
numeric_only: bool = False,
1049310492
filter_type=None,
1049410493
**kwds,
1049510494
):
@@ -10498,7 +10497,6 @@ def _reduce(
1049810497

1049910498
# TODO: Make other agg func handle axis=None properly GH#21597
1050010499
axis = self._get_axis_number(axis)
10501-
labels = self._get_agg_axis(axis)
1050210500
assert axis in [0, 1]
1050310501

1050410502
def func(values: np.ndarray):
@@ -10524,25 +10522,22 @@ def _get_data() -> DataFrame:
1052410522
data = self._get_bool_data()
1052510523
return data
1052610524

10527-
numeric_only_bool = com.resolve_numeric_only(numeric_only)
10528-
if numeric_only is not None or axis == 0:
10525+
if numeric_only or axis == 0:
1052910526
# For numeric_only non-None and axis non-None, we know
1053010527
# which blocks to use and no try/except is needed.
1053110528
# For numeric_only=None only the case with axis==0 and no object
1053210529
# dtypes are unambiguous can be handled with BlockManager.reduce
1053310530
# Case with EAs see GH#35881
1053410531
df = self
10535-
if numeric_only_bool:
10532+
if numeric_only:
1053610533
df = _get_data()
1053710534
if axis == 1:
1053810535
df = df.T
1053910536
axis = 0
1054010537

10541-
ignore_failures = numeric_only is None
10542-
1054310538
# After possibly _get_data and transposing, we are now in the
1054410539
# simple case where we can use BlockManager.reduce
10545-
res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
10540+
res, _ = df._mgr.reduce(blk_func, ignore_failures=False)
1054610541
out = df._constructor(res).iloc[0]
1054710542
if out_dtype is not None:
1054810543
out = out.astype(out_dtype)
@@ -10559,36 +10554,11 @@ def _get_data() -> DataFrame:
1055910554

1056010555
return out
1056110556

10562-
assert numeric_only is None
10557+
assert not numeric_only and axis == 1
1056310558

1056410559
data = self
1056510560
values = data.values
10566-
10567-
try:
10568-
result = func(values)
10569-
10570-
except TypeError:
10571-
# e.g. in nanops trying to convert strs to float
10572-
10573-
data = _get_data()
10574-
labels = data._get_agg_axis(axis)
10575-
10576-
values = data.values
10577-
with np.errstate(all="ignore"):
10578-
result = func(values)
10579-
10580-
# columns have been dropped GH#41480
10581-
arg_name = "numeric_only"
10582-
if name in ["all", "any"]:
10583-
arg_name = "bool_only"
10584-
warnings.warn(
10585-
"Dropping of nuisance columns in DataFrame reductions "
10586-
f"(with '{arg_name}=None') is deprecated; in a future "
10587-
"version this will raise TypeError. Select only valid "
10588-
"columns before calling the reduction.",
10589-
FutureWarning,
10590-
stacklevel=find_stack_level(),
10591-
)
10561+
result = func(values)
1059210562

1059310563
if hasattr(result, "dtype"):
1059410564
if filter_type == "bool" and notna(result).all():
@@ -10600,6 +10570,7 @@ def _get_data() -> DataFrame:
1060010570
# try to coerce to the original dtypes item by item if we can
1060110571
pass
1060210572

10573+
labels = self._get_agg_axis(axis)
1060310574
result = self._constructor_sliced(result, index=labels)
1060410575
return result
1060510576

0 commit comments

Comments
 (0)