Skip to content

Commit 15a06d3

Browse files
authored
BUG: groupby.size and groupby.transform('size') incorrect for axis=1 (#45987)
1 parent 4bc68b3 commit 15a06d3

File tree

6 files changed

+71
-4
lines changed

6 files changed

+71
-4
lines changed

doc/source/whatsnew/v1.5.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ Plotting
371371
Groupby/resample/rolling
372372
^^^^^^^^^^^^^^^^^^^^^^^^
373373
- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`)
374-
- Bug in :meth:`.DataFrameGroupBy.transform` fails when the input DataFrame has multiple columns (:issue:`27469`)
374+
- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``func="size"`` and the input DataFrame has multiple columns (:issue:`27469`)
375+
- Bug in :meth:`.DataFrameGroupBy.size` and :meth:`.DataFrameGroupBy.transform` with ``func="size"`` produced incorrect results when ``axis=1`` (:issue:`45715`)
375376

376377
Reshaping
377378
^^^^^^^^^

pandas/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,14 @@ def axis(request):
229229
axis_frame = axis
230230

231231

232+
@pytest.fixture(params=[1, "columns"], ids=lambda x: f"axis={repr(x)}")
233+
def axis_1(request):
234+
"""
235+
Fixture for returning aliases of axis 1 of a DataFrame.
236+
"""
237+
return request.param
238+
239+
232240
@pytest.fixture(params=[True, False, None])
233241
def observed(request):
234242
"""

pandas/core/groupby/groupby.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1665,15 +1665,15 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT:
16651665

16661666
# for each col, reshape to size of original frame by take operation
16671667
ids, _, _ = self.grouper.group_info
1668-
result = result.reindex(self.grouper.result_index, copy=False)
1668+
result = result.reindex(self.grouper.result_index, axis=self.axis, copy=False)
16691669

16701670
if self.obj.ndim == 1:
16711671
# i.e. SeriesGroupBy
16721672
out = algorithms.take_nd(result._values, ids)
16731673
output = obj._constructor(out, index=obj.index, name=obj.name)
16741674
else:
1675-
output = result.take(ids, axis=0)
1676-
output.index = obj.index
1675+
output = result.take(ids, axis=self.axis)
1676+
output = output.set_axis(obj._get_axis(self.axis), axis=self.axis)
16771677
return output
16781678

16791679
# -----------------------------------------------------------------
@@ -2150,6 +2150,13 @@ def size(self) -> DataFrame | Series:
21502150
"""
21512151
result = self.grouper.size()
21522152

2153+
if self.axis == 1:
2154+
return DataFrame(
2155+
data=np.tile(result.values, (self.obj.shape[0], 1)),
2156+
columns=result.index,
2157+
index=self.obj.index,
2158+
)
2159+
21532160
# GH28330 preserve subclassed Series/DataFrames through calls
21542161
if issubclass(self.obj._constructor, Series):
21552162
result = self._obj_1d_constructor(result, name=self.obj.name)

pandas/tests/groupby/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ def as_index(request):
1919
return request.param
2020

2121

22+
@pytest.fixture(params=[True, False])
23+
def dropna(request):
24+
return request.param
25+
26+
2227
@pytest.fixture
2328
def mframe(multiindex_dataframe_random_data):
2429
return multiindex_dataframe_random_data

pandas/tests/groupby/test_size.py

+23
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,29 @@ def test_size(df, by):
1818
assert result[key] == len(group)
1919

2020

21+
@pytest.mark.parametrize(
22+
"by",
23+
[
24+
[0, 0, 0, 0],
25+
[0, 1, 1, 1],
26+
[1, 0, 1, 1],
27+
[0, None, None, None],
28+
pytest.param([None, None, None, None], marks=pytest.mark.xfail),
29+
],
30+
)
31+
def test_size_axis_1(df, axis_1, by, sort, dropna):
32+
# GH#45715
33+
counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)}
34+
if dropna:
35+
counts = {key: value for key, value in counts.items() if key is not None}
36+
expected = DataFrame(counts, index=df.index)
37+
if sort:
38+
expected = expected.sort_index(axis=1)
39+
grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna)
40+
result = grouped.size()
41+
tm.assert_frame_equal(result, expected)
42+
43+
2144
@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
2245
@pytest.mark.parametrize("sort", [True, False])
2346
def test_size_sort(sort, by):

pandas/tests/groupby/transform/test_transform.py

+23
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,29 @@ def test_transform_axis_1(request, transformation_func):
191191
tm.assert_equal(result, expected)
192192

193193

194+
def test_transform_axis_1_reducer(request, reduction_func):
195+
# GH#45715
196+
if reduction_func in (
197+
"corrwith",
198+
"first",
199+
"idxmax",
200+
"idxmin",
201+
"last",
202+
"ngroup",
203+
"nth",
204+
):
205+
marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986")
206+
request.node.add_marker(marker)
207+
df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"])
208+
result = df.groupby([0, 0, 1], axis=1).transform(reduction_func)
209+
if reduction_func == "size":
210+
# size doesn't behave in the same manner; hardcode expected result
211+
expected = DataFrame(2 * [[2, 2, 1]], index=df.index, columns=df.columns)
212+
else:
213+
expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T
214+
tm.assert_equal(result, expected)
215+
216+
194217
def test_transform_axis_ts(tsframe):
195218

196219
# make sure that we are setting the axes

0 commit comments

Comments
 (0)