diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index d00a418af3ddb..c9521cd7fb75c 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -92,6 +92,7 @@ Bug fixes - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) - Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) **Strings** diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d346a0ccbcd3f..97f73966dda8a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1080,7 +1080,7 @@ def _cython_agg_blocks( result = type(block.values)._from_sequence( result.ravel(), dtype=block.values.dtype ) - except ValueError: + except (ValueError, TypeError): # reshape to be valid for non-Extension Block result = result.reshape(1, -1) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 1ecff003b545b..3f7b0d46faf5d 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1563,3 +1563,34 @@ def test_groupby_mean_no_overflow(): } ) assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = pd.Index([1, 2, 3], dtype=object, name="a") + expected = pd.DataFrame({"b": arr}, index=idx) + + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected)