Skip to content

Commit b391397

Browse files
authored
DEPR: DataFrame.groupby(axis=1) (#51395)
1 parent ff187c0 commit b391397

33 files changed

+303
-102
lines changed

doc/source/user_guide/categorical.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,8 @@ even if some categories are not present in the data:
617617
df = pd.DataFrame(
618618
data=[[1, 2, 3], [4, 5, 6]],
619619
columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]),
620-
)
621-
df.groupby(axis=1, level=1).sum()
620+
).T
621+
df.groupby(level=1).sum()
622622
623623
Groupby will also show "unused" categories:
624624

doc/source/user_guide/groupby.rst

+8-9
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,13 @@ object (more on what the GroupBy object is later), you may do the following:
9494
)
9595
speeds
9696
97-
# default is axis=0
9897
grouped = speeds.groupby("class")
99-
grouped = speeds.groupby("order", axis="columns")
10098
grouped = speeds.groupby(["class", "order"])
10199
102100
The mapping can be specified many different ways:
103101

104102
* A Python function, to be called on each of the axis labels.
105-
* A list or NumPy array of the same length as the selected axis.
103+
* A list or NumPy array of the same length as the index.
106104
* A dict or ``Series``, providing a ``label -> group name`` mapping.
107105
* For ``DataFrame`` objects, a string indicating either a column name or
108106
an index level name to be used to group.
@@ -147,8 +145,8 @@ but the specified columns
147145
grouped = df2.groupby(level=df2.index.names.difference(["B"]))
148146
grouped.sum()
149147
150-
These will split the DataFrame on its index (rows). We could also split by the
151-
columns:
148+
These will split the DataFrame on its index (rows). To split by columns, first do
149+
a tranpose:
152150

153151
.. ipython::
154152

@@ -159,7 +157,7 @@ columns:
159157
...: return 'consonant'
160158
...:
161159

162-
In [5]: grouped = df.groupby(get_letter_type, axis=1)
160+
In [5]: grouped = df.T.groupby(get_letter_type)
163161

164162
pandas :class:`~pandas.Index` objects support duplicate values. If a
165163
non-unique index is used as the group key in a groupby operation, all values
@@ -254,7 +252,7 @@ above example we have:
254252
.. ipython:: python
255253
256254
df.groupby("A").groups
257-
df.groupby(get_letter_type, axis=1).groups
255+
df.T.groupby(get_letter_type).groups
258256
259257
Calling the standard Python ``len`` function on the GroupBy object just returns
260258
the length of the ``groups`` dict, so it is largely just a convenience:
@@ -496,7 +494,7 @@ An obvious one is aggregation via the
496494
grouped.aggregate(np.sum)
497495
498496
As you can see, the result of the aggregation will have the group names as the
499-
new index along the grouped axis. In the case of multiple keys, the result is a
497+
new index. In the case of multiple keys, the result is a
500498
:ref:`MultiIndex <advanced.hierarchical>` by default, though this can be
501499
changed by using the ``as_index`` option:
502500

@@ -1556,7 +1554,8 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on
15561554
15571555
df = pd.DataFrame({"a": [1, 0, 0], "b": [0, 1, 0], "c": [1, 0, 0], "d": [2, 3, 4]})
15581556
df
1559-
df.groupby(df.sum(), axis=1).sum()
1557+
dft = df.T
1558+
dft.groupby(dft.sum()).sum()
15601559
15611560
.. _groupby.multicolumn_factorization:
15621561

doc/source/user_guide/reshaping.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ some very expressive and fast data manipulations.
350350
df.stack().mean(1).unstack()
351351
352352
# same result, another way
353-
df.groupby(level=1, axis=1).mean()
353+
df.T.groupby(level=1).mean()
354354
355355
df.stack().groupby(level=1).mean()
356356

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Other API changes
9393
Deprecations
9494
~~~~~~~~~~~~
9595
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
96+
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
9697
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
9798
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
9899
-

pandas/core/frame.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -8247,19 +8247,37 @@ def update(
82478247
def groupby(
82488248
self,
82498249
by=None,
8250-
axis: Axis = 0,
8250+
axis: Axis | lib.NoDefault = no_default,
82518251
level: IndexLabel | None = None,
82528252
as_index: bool = True,
82538253
sort: bool = True,
82548254
group_keys: bool = True,
82558255
observed: bool = False,
82568256
dropna: bool = True,
82578257
) -> DataFrameGroupBy:
8258+
if axis is not lib.no_default:
8259+
axis = self._get_axis_number(axis)
8260+
if axis == 1:
8261+
warnings.warn(
8262+
"DataFrame.groupby with axis=1 is deprecated. Do "
8263+
"`frame.T.groupby(...)` without axis instead.",
8264+
FutureWarning,
8265+
stacklevel=find_stack_level(),
8266+
)
8267+
else:
8268+
warnings.warn(
8269+
"The 'axis' keyword in DataFrame.groupby is deprecated and "
8270+
"will be removed in a future version.",
8271+
FutureWarning,
8272+
stacklevel=find_stack_level(),
8273+
)
8274+
else:
8275+
axis = 0
8276+
82588277
from pandas.core.groupby.generic import DataFrameGroupBy
82598278

82608279
if level is None and by is None:
82618280
raise TypeError("You have to supply one of 'by' and 'level'")
8262-
axis = self._get_axis_number(axis)
82638281

82648282
return DataFrameGroupBy(
82658283
obj=self,

pandas/core/groupby/generic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2238,6 +2238,10 @@ def fillna(
22382238
the same results as :meth:`.DataFrame.fillna`. When the
22392239
:class:`DataFrameGroupBy` ``axis`` argument is ``1``, using ``axis=0``
22402240
or ``axis=1`` here will produce the same results.
2241+
2242+
.. deprecated:: 2.0.0
2243+
Use frame.T.groupby(...) instead.
2244+
22412245
inplace : bool, default False
22422246
Broken. Do not set to True.
22432247
limit : int, default None
@@ -2300,15 +2304,15 @@ def fillna(
23002304
23012305
Propagate non-null values forward or backward within each group along rows.
23022306
2303-
>>> df.groupby([0, 0, 1, 1], axis=1).fillna(method="ffill")
2307+
>>> df.T.groupby(np.array([0, 0, 1, 1])).fillna(method="ffill").T
23042308
key A B C
23052309
0 0.0 0.0 2.0 2.0
23062310
1 0.0 2.0 3.0 3.0
23072311
2 1.0 1.0 NaN 2.0
23082312
3 1.0 3.0 NaN NaN
23092313
4 1.0 1.0 NaN NaN
23102314
2311-
>>> df.groupby([0, 0, 1, 1], axis=1).fillna(method="bfill")
2315+
>>> df.T.groupby(np.array([0, 0, 1, 1])).fillna(method="bfill").T
23122316
key A B C
23132317
0 0.0 NaN 2.0 NaN
23142318
1 0.0 2.0 3.0 NaN

pandas/core/groupby/groupby.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -3071,9 +3071,10 @@ def _nth(
30713071
sort=self.sort,
30723072
)
30733073

3074-
grb = dropped.groupby(
3075-
grouper, as_index=self.as_index, sort=self.sort, axis=self.axis
3076-
)
3074+
if self.axis == 1:
3075+
grb = dropped.T.groupby(grouper, as_index=self.as_index, sort=self.sort)
3076+
else:
3077+
grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort)
30773078
return grb.nth(n)
30783079

30793080
@final
@@ -3882,10 +3883,13 @@ def pct_change(
38823883
fill_method = "ffill"
38833884
limit = 0
38843885
filled = getattr(self, fill_method)(limit=limit)
3885-
fill_grp = filled.groupby(
3886-
self.grouper.codes, axis=self.axis, group_keys=self.group_keys
3887-
)
3888-
shifted = fill_grp.shift(periods=periods, freq=freq, axis=self.axis)
3886+
if self.axis == 0:
3887+
fill_grp = filled.groupby(self.grouper.codes, group_keys=self.group_keys)
3888+
else:
3889+
fill_grp = filled.T.groupby(self.grouper.codes, group_keys=self.group_keys)
3890+
shifted = fill_grp.shift(periods=periods, freq=freq)
3891+
if self.axis == 1:
3892+
shifted = shifted.T
38893893
return (filled / shifted) - 1
38903894

38913895
@final

pandas/core/groupby/grouper.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from pandas._config import using_copy_on_write
1818

19+
from pandas._libs import lib
1920
from pandas._typing import (
2021
ArrayLike,
2122
Axis,
@@ -258,10 +259,25 @@ def __init__(
258259
key=None,
259260
level=None,
260261
freq=None,
261-
axis: Axis = 0,
262+
axis: Axis | lib.NoDefault = lib.no_default,
262263
sort: bool = False,
263264
dropna: bool = True,
264265
) -> None:
266+
if type(self) is Grouper:
267+
# i.e. not TimeGrouper
268+
if axis is not lib.no_default:
269+
warnings.warn(
270+
"Grouper axis keyword is deprecated and will be removed in a "
271+
"future version. To group on axis=1, use obj.T.groupby(...) "
272+
"instead",
273+
FutureWarning,
274+
stacklevel=find_stack_level(),
275+
)
276+
else:
277+
axis = 0
278+
if axis is lib.no_default:
279+
axis = 0
280+
265281
self.key = key
266282
self.level = level
267283
self.freq = freq

pandas/core/resample.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1294,7 +1294,11 @@ def _downsample(self, how, **kwargs):
12941294

12951295
# we are downsampling
12961296
# we want to call the actual grouper method here
1297-
result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs)
1297+
if self.axis == 0:
1298+
result = obj.groupby(self.grouper).aggregate(how, **kwargs)
1299+
else:
1300+
# test_resample_axis1
1301+
result = obj.T.groupby(self.grouper).aggregate(how, **kwargs).T
12981302

12991303
return self._wrap_result(result)
13001304

pandas/core/reshape/pivot.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,8 @@ def _all_key(key):
377377
margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc)
378378
cat_axis = 1
379379

380-
for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed):
380+
for key, piece in table.T.groupby(level=0, observed=observed):
381+
piece = piece.T
381382
all_key = _all_key(key)
382383

383384
# we are going to mutate this, so need to copy!
@@ -390,7 +391,7 @@ def _all_key(key):
390391
from pandas import DataFrame
391392

392393
cat_axis = 0
393-
for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed):
394+
for key, piece in table.groupby(level=0, observed=observed):
394395
if len(cols) > 1:
395396
all_key = _all_key(key)
396397
else:

pandas/tests/apply/test_str.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,14 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
268268
args = [0.0] if op == "fillna" else []
269269
if axis in (0, "index"):
270270
ones = np.ones(float_frame.shape[0])
271+
msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
271272
else:
272273
ones = np.ones(float_frame.shape[1])
273-
expected = float_frame.groupby(ones, axis=axis).transform(op, *args)
274+
msg = "DataFrame.groupby with axis=1 is deprecated"
275+
276+
with tm.assert_produces_warning(FutureWarning, match=msg):
277+
gb = float_frame.groupby(ones, axis=axis)
278+
expected = gb.transform(op, *args)
274279
result = float_frame.transform(op, axis, *args)
275280
tm.assert_frame_equal(result, expected)
276281

@@ -283,7 +288,9 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
283288
ones = np.ones(float_frame.shape[0])
284289
else:
285290
ones = np.ones(float_frame.shape[1])
286-
expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
291+
with tm.assert_produces_warning(FutureWarning, match=msg):
292+
gb2 = float_frame.groupby(ones, axis=axis)
293+
expected2 = gb2.transform(op, *args)
287294
result2 = float_frame.transform(op, axis, *args)
288295
tm.assert_frame_equal(result2, expected2)
289296

pandas/tests/groupby/aggregate/test_aggregate.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,9 @@ def test_groupby_aggregation_multi_level_column():
124124
columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]),
125125
)
126126

127-
gb = df.groupby(level=1, axis=1)
127+
msg = "DataFrame.groupby with axis=1 is deprecated"
128+
with tm.assert_produces_warning(FutureWarning, match=msg):
129+
gb = df.groupby(level=1, axis=1)
128130
result = gb.sum(numeric_only=False)
129131
expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]})
130132

@@ -253,7 +255,11 @@ def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype
253255
[[1, 2, 3, 4, 5, 6]] * 3,
254256
columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]),
255257
).astype({("a", "j"): dtype, ("b", "j"): dtype})
256-
result = df.groupby(level=1, axis=1).agg(func)
258+
259+
msg = "DataFrame.groupby with axis=1 is deprecated"
260+
with tm.assert_produces_warning(FutureWarning, match=msg):
261+
gb = df.groupby(level=1, axis=1)
262+
result = gb.agg(func)
257263
expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype(
258264
result_dtype_dict
259265
)
@@ -278,7 +284,11 @@ def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict):
278284
columns=Index([10, 20, 10, 20], name="x"),
279285
dtype="int64",
280286
).astype({10: "Int64"})
281-
result = df.groupby("x", axis=1).agg(func)
287+
288+
msg = "DataFrame.groupby with axis=1 is deprecated"
289+
with tm.assert_produces_warning(FutureWarning, match=msg):
290+
gb = df.groupby("x", axis=1)
291+
result = gb.agg(func)
282292
expected = DataFrame(
283293
data=expected_data,
284294
index=Index([0, 1, 0], name="y"),
@@ -1447,7 +1457,9 @@ def test_groupby_complex_raises(func):
14471457
def test_multi_axis_1_raises(func):
14481458
# GH#46995
14491459
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]})
1450-
gb = df.groupby("a", axis=1)
1460+
msg = "DataFrame.groupby with axis=1 is deprecated"
1461+
with tm.assert_produces_warning(FutureWarning, match=msg):
1462+
gb = df.groupby("a", axis=1)
14511463
with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
14521464
gb.agg(func)
14531465

pandas/tests/groupby/aggregate/test_cython.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,15 @@ def test_cython_agg_frame_columns():
122122
# #2113
123123
df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]})
124124

125-
df.groupby(level=0, axis="columns").mean()
126-
df.groupby(level=0, axis="columns").mean()
127-
df.groupby(level=0, axis="columns").mean()
128-
df.groupby(level=0, axis="columns").mean()
125+
msg = "DataFrame.groupby with axis=1 is deprecated"
126+
with tm.assert_produces_warning(FutureWarning, match=msg):
127+
df.groupby(level=0, axis="columns").mean()
128+
with tm.assert_produces_warning(FutureWarning, match=msg):
129+
df.groupby(level=0, axis="columns").mean()
130+
with tm.assert_produces_warning(FutureWarning, match=msg):
131+
df.groupby(level=0, axis="columns").mean()
132+
with tm.assert_produces_warning(FutureWarning, match=msg):
133+
df.groupby(level=0, axis="columns").mean()
129134

130135

131136
def test_cython_agg_return_dict():

pandas/tests/groupby/test_allowlist.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,15 @@ def test_regression_allowlist_methods(raw_frame, op, axis, skipna, sort):
7676
# explicitly test the allowlist methods
7777
if axis == 0:
7878
frame = raw_frame
79+
msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be"
7980
else:
8081
frame = raw_frame.T
82+
msg = "DataFrame.groupby with axis=1 is deprecated"
8183

82-
if op in AGG_FUNCTIONS_WITH_SKIPNA:
84+
with tm.assert_produces_warning(FutureWarning, match=msg):
8385
grouped = frame.groupby(level=0, axis=axis, sort=sort)
86+
87+
if op in AGG_FUNCTIONS_WITH_SKIPNA:
8488
result = getattr(grouped, op)(skipna=skipna)
8589
expected = frame.groupby(level=0).apply(
8690
lambda h: getattr(h, op)(axis=axis, skipna=skipna)
@@ -89,7 +93,6 @@ def test_regression_allowlist_methods(raw_frame, op, axis, skipna, sort):
8993
expected = expected.sort_index(axis=axis)
9094
tm.assert_frame_equal(result, expected)
9195
else:
92-
grouped = frame.groupby(level=0, axis=axis, sort=sort)
9396
result = getattr(grouped, op)()
9497
expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis))
9598
if sort:

0 commit comments

Comments
 (0)