diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2ebd6fb62b424..663a67fb24b26 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -329,10 +329,12 @@ Bug fixes - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Fixed bug in :meth:`DataFrameGroupBy.aggregate` that had inconsistent ``dtype`` behavior for ``BooleanArray`` (:issue:`58031`) - Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Fixed bug in :meth:`read_csv raising` :meth:`TypeError` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8585ae3828247..5129b84b5ad3f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -914,7 +914,8 @@ def agg_series( np.ndarray or ExtensionArray """ - if not isinstance(obj._values, np.ndarray): + # if objtype is not in np.dtypes, type is preserved + if not isinstance(obj._values, np.ndarray) and obj.dtype != "boolean": # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 12f99e3cf7a63..7ae4d3176decb 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -666,3 +666,28 @@ def weird_func(x): result = df["decimals"].groupby(df["id1"]).agg(weird_func) tm.assert_series_equal(result, expected, check_names=False) + + +def test_groupby_agg_boolean_dype(): + # GH Issue #58031 + # Ensure return type of aggregate dtype has consistent behavior + # for 'bool' and 'boolean' because boolean not covered under numpy + + df_boolean = DataFrame({"0": [1, 2, 2], "1": [True, True, None]}) + df_boolean[1] = df_boolean["1"].astype("boolean") + + df_bool = DataFrame({"0": [1, 2, 2], "1": [True, True, None]}) + df_bool[1] = df_bool["1"].astype("bool") + + boolean_return_type = ( + df_boolean.groupby("0") + .aggregate(lambda s: s.fillna(False).mean()) + .dtypes.values[0] + ) + bool_return_type = ( + df_bool.groupby("0") + .aggregate(lambda s: s.fillna(False).mean()) + .dtypes.values[0] + ) + + assert boolean_return_type == bool_return_type