pandas-dev · longovin · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024 · rhshadrach
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -329,10 +329,12 @@ Bug fixes
 - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
+- Fixed bug in :meth:`DataFrameGroupBy.aggregate` that had inconsistent ``dtype`` behavior for ``BooleanArray`` (:issue:`58031`)
 - Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
+- Fixed bug in :meth:`read_csv raising` :meth:`TypeError` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 
 Categorical
 ^^^^^^^^^^^

@@ -914,7 +914,8 @@ def agg_series(
         np.ndarray or ExtensionArray
         """
 
-        if not isinstance(obj._values, np.ndarray):
+        # if objtype is not in np.dtypes, type is preserved
+        if not isinstance(obj._values, np.ndarray) and obj.dtype != "boolean":
             # we can preserve a little bit more aggressively with EA dtype
             #  because maybe_cast_pointwise_result will do a try/except
             #  with _from_sequence.  NB we are assuming here that _from_sequence

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -666,3 +666,28 @@ def weird_func(x):
 
     result = df["decimals"].groupby(df["id1"]).agg(weird_func)
     tm.assert_series_equal(result, expected, check_names=False)
+
+
+def test_groupby_agg_boolean_dype():
+    # GH Issue #58031
+    # Ensure return type of aggregate dtype has consistent behavior
+    # for 'bool' and 'boolean' because boolean not covered under numpy
+
+    df_boolean = DataFrame({"0": [1, 2, 2], "1": [True, True, None]})
+    df_boolean[1] = df_boolean["1"].astype("boolean")
+
+    df_bool = DataFrame({"0": [1, 2, 2], "1": [True, True, None]})
+    df_bool[1] = df_bool["1"].astype("bool")
+
+    boolean_return_type = (
+        df_boolean.groupby("0")
+        .aggregate(lambda s: s.fillna(False).mean())
+        .dtypes.values[0]
+    )
+    bool_return_type = (
+        df_bool.groupby("0")
+        .aggregate(lambda s: s.fillna(False).mean())
+        .dtypes.values[0]
+    )
+
+    assert boolean_return_type == bool_return_type