From 4f6ee286c764e0a906263faa2e05c25cc81a2a60 Mon Sep 17 00:00:00 2001 From: longovin Date: Sun, 14 Apr 2024 20:06:27 -0400 Subject: [PATCH 1/3] made tests and changes for issue 58031 on GH --- pandas/core/groupby/ops.py | 4 +++- pandas/tests/groupby/aggregate/test_other.py | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8585ae3828247..a96bf59315bad 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -914,7 +914,9 @@ def agg_series( np.ndarray or ExtensionArray """ - if not isinstance(obj._values, np.ndarray): + + #if objtype is not in np.dtypes, type is preserved but thats bad seems readable + if not isinstance(obj._values, np.ndarray) and obj.dtype != "boolean": # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 12f99e3cf7a63..9b8bd7a70d8b7 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -666,3 +666,20 @@ def weird_func(x): result = df["decimals"].groupby(df["id1"]).agg(weird_func) tm.assert_series_equal(result, expected, check_names=False) + +def test_groupby_agg_boolean_dype(): + # GH Issue #58031 + # Ensure return type of aggregate dtype has consistent behavior for 'bool' and 'boolean' + # because boolean not covered under numpy + + df_boolean = pd.DataFrame({0: [1, 2, 2], 1: [True, True, None]}) + df_boolean[1] = df_boolean[1].astype("boolean") + + df_bool = pd.DataFrame({0: [1, 2, 2], 1: [True, True, None]}) + df_bool[1] = df_bool[1].astype("bool") + + boolean_return_type = df_boolean.groupby(by=0).aggregate(lambda s: s.fillna(False).mean()).dtypes.values[0] + bool_return_type = df_bool.groupby(by=0).aggregate(lambda s: s.fillna(False).mean()).dtypes.values[0] + + assert boolean_return_type == bool_return_type + From d71448b58a79f77e73f0e7df649c252d0fc71741 Mon Sep 17 00:00:00 2001 From: longovin Date: Sun, 14 Apr 2024 21:07:50 -0400 Subject: [PATCH 2/3] fixed all of the pre-commit style errors for GH issue #58031 --- pandas/core/groupby/ops.py | 3 +- pandas/tests/groupby/aggregate/test_other.py | 34 ++++++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a96bf59315bad..5129b84b5ad3f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -914,8 +914,7 @@ def agg_series( np.ndarray or ExtensionArray """ - - #if objtype is not in np.dtypes, type is preserved but thats bad seems readable + # if objtype is not in np.dtypes, type is preserved if not isinstance(obj._values, np.ndarray) and obj.dtype != "boolean": # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 9b8bd7a70d8b7..7ae4d3176decb 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -667,19 +667,27 @@ def weird_func(x): result = df["decimals"].groupby(df["id1"]).agg(weird_func) tm.assert_series_equal(result, expected, check_names=False) + def test_groupby_agg_boolean_dype(): # GH Issue #58031 - # Ensure return type of aggregate dtype has consistent behavior for 'bool' and 'boolean' - # because boolean not covered under numpy - - df_boolean = pd.DataFrame({0: [1, 2, 2], 1: [True, True, None]}) - df_boolean[1] = df_boolean[1].astype("boolean") - - df_bool = pd.DataFrame({0: [1, 2, 2], 1: [True, True, None]}) - df_bool[1] = df_bool[1].astype("bool") - - boolean_return_type = df_boolean.groupby(by=0).aggregate(lambda s: s.fillna(False).mean()).dtypes.values[0] - bool_return_type = df_bool.groupby(by=0).aggregate(lambda s: s.fillna(False).mean()).dtypes.values[0] - - assert boolean_return_type == bool_return_type + # Ensure return type of aggregate dtype has consistent behavior + # for 'bool' and 'boolean' because boolean not covered under numpy + + df_boolean = DataFrame({"0": [1, 2, 2], "1": [True, True, None]}) + df_boolean[1] = df_boolean["1"].astype("boolean") + + df_bool = DataFrame({"0": [1, 2, 2], "1": [True, True, None]}) + df_bool[1] = df_bool["1"].astype("bool") + boolean_return_type = ( + df_boolean.groupby("0") + .aggregate(lambda s: s.fillna(False).mean()) + .dtypes.values[0] + ) + bool_return_type = ( + df_bool.groupby("0") + .aggregate(lambda s: s.fillna(False).mean()) + .dtypes.values[0] + ) + + assert boolean_return_type == bool_return_type From 4059236d1dee0a4038ba68eb0e13241c4964eb5a Mon Sep 17 00:00:00 2001 From: longovin Date: Sun, 14 Apr 2024 21:30:50 -0400 Subject: [PATCH 3/3] documented our BUG fix for pull request --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2ebd6fb62b424..663a67fb24b26 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -329,10 +329,12 @@ Bug fixes - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) +- Fixed bug in :meth:`DataFrameGroupBy.aggregate` that had inconsistent ``dtype`` behavior for ``BooleanArray`` (:issue:`58031`) - Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Fixed bug in :meth:`read_csv raising` :meth:`TypeError` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) Categorical ^^^^^^^^^^^