Skip to content

Commit c96012d

Browse files
Ynob2000WillAyd
authored andcommitted
Groupby agg works with pd.Series.nunique, but groupby nunique fails with axis=1 (pandas-dev#30311)
1 parent 4e807a2 commit c96012d

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ Groupby/resample/rolling
837837
- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`)
838838
- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`)
839839
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
840+
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
840841

841842
Reshaping
842843
^^^^^^^^^

pandas/core/groupby/generic.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1813,9 +1813,20 @@ def groupby_series(obj, col=None):
18131813
# Try to consolidate with normal wrapping functions
18141814
from pandas.core.reshape.concat import concat
18151815

1816-
results = [groupby_series(content, label) for label, content in obj.items()]
1816+
axis_number = obj._get_axis_number(self.axis)
1817+
other_axis = int(not axis_number)
1818+
if axis_number == 0:
1819+
iter_func = obj.items
1820+
else:
1821+
iter_func = obj.iterrows
1822+
1823+
results = [groupby_series(content, label) for label, content in iter_func()]
18171824
results = concat(results, axis=1)
1818-
results.columns.names = obj.columns.names
1825+
1826+
if axis_number == 1:
1827+
results = results.T
1828+
1829+
results._get_axis(other_axis).names = obj._get_axis(other_axis).names
18191830

18201831
if not self.as_index:
18211832
results.index = ibase.default_index(len(results))

pandas/tests/groupby/test_groupby.py

+17
Original file line numberDiff line numberDiff line change
@@ -1994,3 +1994,20 @@ def test_dup_labels_output_shape(groupby_func, idx):
19941994

19951995
assert result.shape == (1, 2)
19961996
tm.assert_index_equal(result.columns, idx)
1997+
1998+
1999+
def test_groupby_crash_on_nunique(axis):
2000+
# Fix following 30253
2001+
df = pd.DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]})
2002+
2003+
axis_number = df._get_axis_number(axis)
2004+
if not axis_number:
2005+
df = df.T
2006+
2007+
result = df.groupby(axis=axis_number, level=0).nunique()
2008+
2009+
expected = pd.DataFrame({"A": [1, 2], "D": [1, 1]})
2010+
if not axis_number:
2011+
expected = expected.T
2012+
2013+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)