Skip to content

Commit 815053e

Browse files
authored
Deprecate level keyword for dataframe and series aggregations (#40869)
1 parent 835b280 commit 815053e

20 files changed

+283
-155
lines changed

doc/source/user_guide/advanced.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ values across a level. For instance:
498498
)
499499
df = pd.DataFrame(np.random.randn(4, 2), index=midx)
500500
df
501-
df2 = df.mean(level=0)
501+
df2 = df.groupby(level=0).mean()
502502
df2
503503
df2.reindex(df.index, level=0)
504504

doc/source/user_guide/categorical.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ even if some categories are not present in the data:
633633
data=[[1, 2, 3], [4, 5, 6]],
634634
columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
635635
)
636-
df.sum(axis=1, level=1)
636+
df.groupby(axis=1, level=1).sum()
637637
638638
Groupby will also show "unused" categories:
639639

doc/source/user_guide/groupby.rst

-8
Original file line numberDiff line numberDiff line change
@@ -320,14 +320,6 @@ number:
320320
321321
s.groupby(level="second").sum()
322322
323-
The aggregation functions such as ``sum`` will take the level parameter
324-
directly. Additionally, the resulting index will be named according to the
325-
chosen level:
326-
327-
.. ipython:: python
328-
329-
s.sum(level="second")
330-
331323
Grouping with multiple levels is supported.
332324

333325
.. ipython:: python

doc/source/whatsnew/v0.15.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ Other enhancements:
154154
- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):
155155

156156
.. ipython:: python
157+
:okwarning:
157158
158159
s = pd.Series([False, True, False], index=[0, 0, 1])
159160
s.any(level=0)

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ Deprecations
563563
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
564564
- Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
565565
- Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
566+
- Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`)
566567

567568
.. ---------------------------------------------------------------------------
568569

pandas/core/frame.py

+7
Original file line numberDiff line numberDiff line change
@@ -9479,6 +9479,13 @@ def count(
94799479
"""
94809480
axis = self._get_axis_number(axis)
94819481
if level is not None:
9482+
warnings.warn(
9483+
"Using the level keyword in DataFrame and Series aggregations is "
9484+
"deprecated and will be removed in a future version. Use groupby "
9485+
"instead. df.count(level=1) should use df.groupby(level=1).count().",
9486+
FutureWarning,
9487+
stacklevel=2,
9488+
)
94829489
return self._count_level(level, axis=axis, numeric_only=numeric_only)
94839490

94849491
if numeric_only:

pandas/core/generic.py

+35
Original file line numberDiff line numberDiff line change
@@ -10368,6 +10368,13 @@ def _logical_func(
1036810368
):
1036910369
nv.validate_logical_func((), kwargs, fname=name)
1037010370
if level is not None:
10371+
warnings.warn(
10372+
"Using the level keyword in DataFrame and Series aggregations is "
10373+
"deprecated and will be removed in a future version. Use groupby "
10374+
"instead. df.any(level=1) should use df.groupby(level=1).any()",
10375+
FutureWarning,
10376+
stacklevel=4,
10377+
)
1037110378
if bool_only is not None:
1037210379
raise NotImplementedError(
1037310380
"Option bool_only is not implemented with option level."
@@ -10459,6 +10466,13 @@ def _stat_function_ddof(
1045910466
if axis is None:
1046010467
axis = self._stat_axis_number
1046110468
if level is not None:
10469+
warnings.warn(
10470+
"Using the level keyword in DataFrame and Series aggregations is "
10471+
"deprecated and will be removed in a future version. Use groupby "
10472+
"instead. df.var(level=1) should use df.groupby(level=1).var().",
10473+
FutureWarning,
10474+
stacklevel=4,
10475+
)
1046210476
return self._agg_by_level(
1046310477
name, axis=axis, level=level, skipna=skipna, ddof=ddof
1046410478
)
@@ -10507,6 +10521,13 @@ def _stat_function(
1050710521
if axis is None:
1050810522
axis = self._stat_axis_number
1050910523
if level is not None:
10524+
warnings.warn(
10525+
"Using the level keyword in DataFrame and Series aggregations is "
10526+
"deprecated and will be removed in a future version. Use groupby "
10527+
"instead. df.median(level=1) should use df.groupby(level=1).median().",
10528+
FutureWarning,
10529+
stacklevel=4,
10530+
)
1051010531
return self._agg_by_level(
1051110532
name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only
1051210533
)
@@ -10569,6 +10590,13 @@ def _min_count_stat_function(
1056910590
if axis is None:
1057010591
axis = self._stat_axis_number
1057110592
if level is not None:
10593+
warnings.warn(
10594+
"Using the level keyword in DataFrame and Series aggregations is "
10595+
"deprecated and will be removed in a future version. Use groupby "
10596+
"instead. df.sum(level=1) should use df.groupby(level=1).sum().",
10597+
FutureWarning,
10598+
stacklevel=4,
10599+
)
1057210600
return self._agg_by_level(
1057310601
name,
1057410602
axis=axis,
@@ -10646,6 +10674,13 @@ def mad(self, axis=None, skipna=None, level=None):
1064610674
if axis is None:
1064710675
axis = self._stat_axis_number
1064810676
if level is not None:
10677+
warnings.warn(
10678+
"Using the level keyword in DataFrame and Series aggregations is "
10679+
"deprecated and will be removed in a future version. Use groupby "
10680+
"instead. df.mad(level=1) should use df.groupby(level=1).mad()",
10681+
FutureWarning,
10682+
stacklevel=3,
10683+
)
1064910684
return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna)
1065010685

1065110686
data = self._get_numeric_data()

pandas/core/series.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -1894,8 +1894,16 @@ def count(self, level=None):
18941894
"""
18951895
if level is None:
18961896
return notna(self._values).sum()
1897-
elif not isinstance(self.index, MultiIndex):
1898-
raise ValueError("Series.count level is only valid with a MultiIndex")
1897+
else:
1898+
warnings.warn(
1899+
"Using the level keyword in DataFrame and Series aggregations is "
1900+
"deprecated and will be removed in a future version. Use groupby "
1901+
"instead. ser.count(level=1) should use ser.groupby(level=1).count().",
1902+
FutureWarning,
1903+
stacklevel=2,
1904+
)
1905+
if not isinstance(self.index, MultiIndex):
1906+
raise ValueError("Series.count level is only valid with a MultiIndex")
18991907

19001908
index = self.index
19011909
assert isinstance(index, MultiIndex) # for mypy
-104
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,11 @@
1-
import numpy as np
2-
import pytest
3-
41
from pandas import (
52
DataFrame,
6-
Index,
73
Series,
84
)
95
import pandas._testing as tm
106

117

128
class TestDataFrameCount:
13-
def test_count_multiindex(self, multiindex_dataframe_random_data):
14-
frame = multiindex_dataframe_random_data
15-
16-
frame = frame.copy()
17-
frame.index.names = ["a", "b"]
18-
19-
result = frame.count(level="b")
20-
expected = frame.count(level=1)
21-
tm.assert_frame_equal(result, expected, check_names=False)
22-
23-
result = frame.count(level="a")
24-
expected = frame.count(level=0)
25-
tm.assert_frame_equal(result, expected, check_names=False)
26-
27-
msg = "Level x not found"
28-
with pytest.raises(KeyError, match=msg):
29-
frame.count(level="x")
30-
319
def test_count(self):
3210
# corner case
3311
frame = DataFrame()
@@ -59,85 +37,3 @@ def test_count_objects(self, float_string_frame):
5937

6038
tm.assert_series_equal(dm.count(), df.count())
6139
tm.assert_series_equal(dm.count(1), df.count(1))
62-
63-
def test_count_level_corner(self, multiindex_dataframe_random_data):
64-
frame = multiindex_dataframe_random_data
65-
66-
ser = frame["A"][:0]
67-
result = ser.count(level=0)
68-
expected = Series(0, index=ser.index.levels[0], name="A")
69-
tm.assert_series_equal(result, expected)
70-
71-
df = frame[:0]
72-
result = df.count(level=0)
73-
expected = (
74-
DataFrame(
75-
index=ser.index.levels[0].set_names(["first"]), columns=df.columns
76-
)
77-
.fillna(0)
78-
.astype(np.int64)
79-
)
80-
tm.assert_frame_equal(result, expected)
81-
82-
def test_count_index_with_nan(self):
83-
# https://github.com/pandas-dev/pandas/issues/21824
84-
df = DataFrame(
85-
{
86-
"Person": ["John", "Myla", None, "John", "Myla"],
87-
"Age": [24.0, 5, 21.0, 33, 26],
88-
"Single": [False, True, True, True, False],
89-
}
90-
)
91-
92-
# count on row labels
93-
res = df.set_index(["Person", "Single"]).count(level="Person")
94-
expected = DataFrame(
95-
index=Index(["John", "Myla"], name="Person"),
96-
columns=Index(["Age"]),
97-
data=[2, 2],
98-
)
99-
tm.assert_frame_equal(res, expected)
100-
101-
# count on column labels
102-
res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
103-
expected = DataFrame(
104-
columns=Index(["John", "Myla"], name="Person"),
105-
index=Index(["Age"]),
106-
data=[[2, 2]],
107-
)
108-
tm.assert_frame_equal(res, expected)
109-
110-
def test_count_level(
111-
self,
112-
multiindex_year_month_day_dataframe_random_data,
113-
multiindex_dataframe_random_data,
114-
):
115-
ymd = multiindex_year_month_day_dataframe_random_data
116-
frame = multiindex_dataframe_random_data
117-
118-
def _check_counts(frame, axis=0):
119-
index = frame._get_axis(axis)
120-
for i in range(index.nlevels):
121-
result = frame.count(axis=axis, level=i)
122-
expected = frame.groupby(axis=axis, level=i).count()
123-
expected = expected.reindex_like(result).astype("i8")
124-
tm.assert_frame_equal(result, expected)
125-
126-
frame.iloc[1, [1, 2]] = np.nan
127-
frame.iloc[7, [0, 1]] = np.nan
128-
ymd.iloc[1, [1, 2]] = np.nan
129-
ymd.iloc[7, [0, 1]] = np.nan
130-
131-
_check_counts(frame)
132-
_check_counts(ymd)
133-
_check_counts(frame.T, axis=1)
134-
_check_counts(ymd.T, axis=1)
135-
136-
# can't call with level on regular DataFrame
137-
df = tm.makeTimeDataFrame()
138-
with pytest.raises(TypeError, match="hierarchical"):
139-
df.count(level=0)
140-
141-
frame["D"] = "foo"
142-
result = frame.count(level=0, numeric_only=True)
143-
tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import (
5+
DataFrame,
6+
Index,
7+
Series,
8+
)
9+
import pandas._testing as tm
10+
11+
12+
class TestDataFrameCount:
13+
def test_count_multiindex(self, multiindex_dataframe_random_data):
14+
frame = multiindex_dataframe_random_data
15+
16+
frame = frame.copy()
17+
frame.index.names = ["a", "b"]
18+
19+
with tm.assert_produces_warning(FutureWarning):
20+
result = frame.count(level="b")
21+
with tm.assert_produces_warning(FutureWarning):
22+
expected = frame.count(level=1)
23+
tm.assert_frame_equal(result, expected, check_names=False)
24+
25+
with tm.assert_produces_warning(FutureWarning):
26+
result = frame.count(level="a")
27+
with tm.assert_produces_warning(FutureWarning):
28+
expected = frame.count(level=0)
29+
tm.assert_frame_equal(result, expected, check_names=False)
30+
31+
msg = "Level x not found"
32+
with pytest.raises(KeyError, match=msg):
33+
with tm.assert_produces_warning(FutureWarning):
34+
frame.count(level="x")
35+
36+
def test_count_level_corner(self, multiindex_dataframe_random_data):
37+
frame = multiindex_dataframe_random_data
38+
39+
ser = frame["A"][:0]
40+
with tm.assert_produces_warning(FutureWarning):
41+
result = ser.count(level=0)
42+
expected = Series(0, index=ser.index.levels[0], name="A")
43+
tm.assert_series_equal(result, expected)
44+
45+
df = frame[:0]
46+
with tm.assert_produces_warning(FutureWarning):
47+
result = df.count(level=0)
48+
expected = (
49+
DataFrame(
50+
index=ser.index.levels[0].set_names(["first"]), columns=df.columns
51+
)
52+
.fillna(0)
53+
.astype(np.int64)
54+
)
55+
tm.assert_frame_equal(result, expected)
56+
57+
def test_count_index_with_nan(self):
58+
# https://github.com/pandas-dev/pandas/issues/21824
59+
df = DataFrame(
60+
{
61+
"Person": ["John", "Myla", None, "John", "Myla"],
62+
"Age": [24.0, 5, 21.0, 33, 26],
63+
"Single": [False, True, True, True, False],
64+
}
65+
)
66+
67+
# count on row labels
68+
with tm.assert_produces_warning(FutureWarning):
69+
res = df.set_index(["Person", "Single"]).count(level="Person")
70+
expected = DataFrame(
71+
index=Index(["John", "Myla"], name="Person"),
72+
columns=Index(["Age"]),
73+
data=[2, 2],
74+
)
75+
tm.assert_frame_equal(res, expected)
76+
77+
# count on column labels
78+
with tm.assert_produces_warning(FutureWarning):
79+
res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
80+
expected = DataFrame(
81+
columns=Index(["John", "Myla"], name="Person"),
82+
index=Index(["Age"]),
83+
data=[[2, 2]],
84+
)
85+
tm.assert_frame_equal(res, expected)
86+
87+
def test_count_level(
88+
self,
89+
multiindex_year_month_day_dataframe_random_data,
90+
multiindex_dataframe_random_data,
91+
):
92+
ymd = multiindex_year_month_day_dataframe_random_data
93+
frame = multiindex_dataframe_random_data
94+
95+
def _check_counts(frame, axis=0):
96+
index = frame._get_axis(axis)
97+
for i in range(index.nlevels):
98+
with tm.assert_produces_warning(FutureWarning):
99+
result = frame.count(axis=axis, level=i)
100+
expected = frame.groupby(axis=axis, level=i).count()
101+
expected = expected.reindex_like(result).astype("i8")
102+
tm.assert_frame_equal(result, expected)
103+
104+
frame.iloc[1, [1, 2]] = np.nan
105+
frame.iloc[7, [0, 1]] = np.nan
106+
ymd.iloc[1, [1, 2]] = np.nan
107+
ymd.iloc[7, [0, 1]] = np.nan
108+
109+
_check_counts(frame)
110+
_check_counts(ymd)
111+
_check_counts(frame.T, axis=1)
112+
_check_counts(ymd.T, axis=1)
113+
114+
# can't call with level on regular DataFrame
115+
df = tm.makeTimeDataFrame()
116+
with pytest.raises(TypeError, match="hierarchical"):
117+
with tm.assert_produces_warning(FutureWarning):
118+
df.count(level=0)
119+
120+
frame["D"] = "foo"
121+
with tm.assert_produces_warning(FutureWarning):
122+
result = frame.count(level=0, numeric_only=True)
123+
tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))

0 commit comments

Comments
 (0)