diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6fd758abb1f33..2826029d902db 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -833,6 +833,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) +- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index bfe3c628169e9..eaa4f51c155a9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1813,9 +1813,20 @@ def groupby_series(obj, col=None): # Try to consolidate with normal wrapping functions from pandas.core.reshape.concat import concat - results = [groupby_series(content, label) for label, content in obj.items()] + axis_number = obj._get_axis_number(self.axis) + other_axis = int(not axis_number) + if axis_number == 0: + iter_func = obj.items + else: + iter_func = obj.iterrows + + results = [groupby_series(content, label) for label, content in iter_func()] results = concat(results, axis=1) - results.columns.names = obj.columns.names + + if axis_number == 1: + results = results.T + + results._get_axis(other_axis).names = obj._get_axis(other_axis).names if not self.as_index: results.index = ibase.default_index(len(results)) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 93d4dc6046735..8f88f68c69f2b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1994,3 +1994,20 @@ def test_dup_labels_output_shape(groupby_func, idx): assert result.shape == (1, 2) tm.assert_index_equal(result.columns, idx) + + +def test_groupby_crash_on_nunique(axis): + # Fix following 30253 + df = pd.DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) + + axis_number = df._get_axis_number(axis) + if not axis_number: + df = df.T + + result = df.groupby(axis=axis_number, level=0).nunique() + + expected = pd.DataFrame({"A": [1, 2], "D": [1, 1]}) + if not axis_number: + expected = expected.T + + tm.assert_frame_equal(result, expected)