Skip to content

Commit dbb2adc

Browse files
authored
DEPR: Remove df.reduction(level) (#49611)
* DEPR: Remove df.reduction(level) * test_*_consistency * Fix asv * Add issue ref
1 parent a23eb83 commit dbb2adc

20 files changed

+70
-778
lines changed

asv_bench/benchmarks/frame_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -454,10 +454,10 @@ def setup(self, axis):
454454
)
455455

456456
def time_count_level_multi(self, axis):
457-
self.df.count(axis=axis, level=1)
457+
self.df.count(axis=axis)
458458

459459
def time_count_level_mixed_dtypes_multi(self, axis):
460-
self.df_mixed.count(axis=axis, level=1)
460+
self.df_mixed.count(axis=axis)
461461

462462

463463
class Apply:

asv_bench/benchmarks/stat_ops.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis):
2323

2424
class FrameMultiIndexOps:
2525

26-
params = ([0, 1, [0, 1]], ops)
27-
param_names = ["level", "op"]
26+
params = [ops]
27+
param_names = ["op"]
2828

29-
def setup(self, level, op):
29+
def setup(self, op):
3030
levels = [np.arange(10), np.arange(100), np.arange(100)]
3131
codes = [
3232
np.arange(10).repeat(10000),
@@ -37,8 +37,8 @@ def setup(self, level, op):
3737
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
3838
self.df_func = getattr(df, op)
3939

40-
def time_op(self, level, op):
41-
self.df_func(level=level)
40+
def time_op(self, op):
41+
self.df_func()
4242

4343

4444
class SeriesOps:
@@ -56,10 +56,10 @@ def time_op(self, op, dtype):
5656

5757
class SeriesMultiIndexOps:
5858

59-
params = ([0, 1, [0, 1]], ops)
60-
param_names = ["level", "op"]
59+
params = [ops]
60+
param_names = ["op"]
6161

62-
def setup(self, level, op):
62+
def setup(self, op):
6363
levels = [np.arange(10), np.arange(100), np.arange(100)]
6464
codes = [
6565
np.arange(10).repeat(10000),
@@ -70,8 +70,8 @@ def setup(self, level, op):
7070
s = pd.Series(np.random.randn(len(index)), index=index)
7171
self.s_func = getattr(s, op)
7272

73-
def time_op(self, level, op):
74-
self.s_func(level=level)
73+
def time_op(self, op):
74+
self.s_func()
7575

7676

7777
class Rank:

doc/source/whatsnew/v0.15.2.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,13 @@ Other enhancements:
154154
155155
- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):
156156

157-
.. ipython:: python
158-
:okwarning:
157+
.. code-block:: python
159158
160-
s = pd.Series([False, True, False], index=[0, 0, 1])
161-
s.any(level=0)
159+
>>> s = pd.Series([False, True, False], index=[0, 0, 1])
160+
>>> s.any(level=0)
161+
0 True
162+
1 False
163+
dtype: bool
162164
163165
- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`):
164166

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ Removal of prior version deprecations/changes
469469
- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
470470
- Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
471471
- Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`)
472+
- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`)
472473
- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
473474
- Removed deprecated :attr:`NaT.freq` (:issue:`45071`)
474475
- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)

pandas/core/frame.py

+1-63
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@
118118
maybe_downcast_to_dtype,
119119
)
120120
from pandas.core.dtypes.common import (
121-
ensure_platform_int,
122121
infer_dtype_from_object,
123122
is_1d_only_ea_dtype,
124123
is_bool_dtype,
@@ -10331,7 +10330,7 @@ def c(x):
1033110330
# ----------------------------------------------------------------------
1033210331
# ndarray-like stats methods
1033310332

10334-
def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False):
10333+
def count(self, axis: Axis = 0, numeric_only: bool = False):
1033510334
"""
1033610335
Count non-NA cells for each column or row.
1033710336
@@ -10343,10 +10342,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
1034310342
axis : {0 or 'index', 1 or 'columns'}, default 0
1034410343
If 0 or 'index' counts are generated for each column.
1034510344
If 1 or 'columns' counts are generated for each row.
10346-
level : int or str, optional
10347-
If the axis is a `MultiIndex` (hierarchical), count along a
10348-
particular `level`, collapsing into a `DataFrame`.
10349-
A `str` specifies the level name.
1035010345
numeric_only : bool, default False
1035110346
Include only `float`, `int` or `boolean` data.
1035210347
@@ -10400,16 +10395,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
1040010395
dtype: int64
1040110396
"""
1040210397
axis = self._get_axis_number(axis)
10403-
if level is not None:
10404-
warnings.warn(
10405-
"Using the level keyword in DataFrame and Series aggregations is "
10406-
"deprecated and will be removed in a future version. Use groupby "
10407-
"instead. df.count(level=1) should use df.groupby(level=1).count().",
10408-
FutureWarning,
10409-
stacklevel=find_stack_level(),
10410-
)
10411-
res = self._count_level(level, axis=axis, numeric_only=numeric_only)
10412-
return res.__finalize__(self, method="count")
1041310398

1041410399
if numeric_only:
1041510400
frame = self._get_numeric_data()
@@ -10434,53 +10419,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
1043410419

1043510420
return result.astype("int64").__finalize__(self, method="count")
1043610421

10437-
def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False):
10438-
if numeric_only:
10439-
frame = self._get_numeric_data()
10440-
else:
10441-
frame = self
10442-
10443-
count_axis = frame._get_axis(axis)
10444-
agg_axis = frame._get_agg_axis(axis)
10445-
10446-
if not isinstance(count_axis, MultiIndex):
10447-
raise TypeError(
10448-
f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
10449-
)
10450-
10451-
# Mask NaNs: Mask rows or columns where the index level is NaN, and all
10452-
# values in the DataFrame that are NaN
10453-
if frame._is_mixed_type:
10454-
# Since we have mixed types, calling notna(frame.values) might
10455-
# upcast everything to object
10456-
values_mask = notna(frame).values
10457-
else:
10458-
# But use the speedup when we have homogeneous dtypes
10459-
values_mask = notna(frame.values)
10460-
10461-
index_mask = notna(count_axis.get_level_values(level=level))
10462-
if axis == 1:
10463-
mask = index_mask & values_mask
10464-
else:
10465-
mask = index_mask.reshape(-1, 1) & values_mask
10466-
10467-
if isinstance(level, int):
10468-
level_number = level
10469-
else:
10470-
level_number = count_axis._get_level_number(level)
10471-
10472-
level_name = count_axis._names[level_number]
10473-
level_index = count_axis.levels[level_number]._rename(name=level_name)
10474-
level_codes = ensure_platform_int(count_axis.codes[level_number])
10475-
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)
10476-
10477-
if axis == 1:
10478-
result = self._constructor(counts, index=agg_axis, columns=level_index)
10479-
else:
10480-
result = self._constructor(counts, index=level_index, columns=agg_axis)
10481-
10482-
return result
10483-
1048410422
def _reduce(
1048510423
self,
1048610424
op,

0 commit comments

Comments
 (0)