From b100bf75e1e420a0e134dcf545cc79ebc202c9e4 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 13 Mar 2022 00:19:20 +0800 Subject: [PATCH 1/9] Update groupby.py --- pandas/core/groupby/groupby.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3d857e4f3e4e7..a7d43727a5db7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1489,6 +1489,10 @@ def _agg_general( numeric_only=numeric_only, min_count=min_count, ) + dtypes = self.dtypes + for column in result.columns: + result[column] = result[column].astype(dtypes[column].dtype) + return result.__finalize__(self.obj, method="groupby") def _agg_py_fallback( From a216028d5353d1974f9e365d7bc01f2587c81827 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 13 Mar 2022 00:20:03 +0800 Subject: [PATCH 2/9] Update test_groupby.py --- pandas/tests/groupby/test_groupby.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7bf63bb3c2cac..fa64029b3e744 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -18,8 +18,10 @@ Series, Timedelta, Timestamp, + concat, date_range, to_datetime, + util, ) import pandas._testing as tm from pandas.core.arrays import BooleanArray @@ -2654,3 +2656,24 @@ def test_pad_backfill_deprecation(): s.groupby(level=0).backfill() with tm.assert_produces_warning(FutureWarning, match="pad"): s.groupby(level=0).pad() + + +def test_groupby_agg_general_dtypes(): + # GH 44132 + df = util.testing.makeMixedDataFrame() + df = df.to_numpy() + df = DataFrame(df) + df.columns = ["A", "B", "C", "D"] + df = df.set_index("B") + + df1, df2 = df.iloc[:2], df.iloc[2:] + + groupby1 = df1.groupby("B").sum() + groupby2 = df2.groupby("B").sum() + + df_conc = concat([groupby1, groupby2], axis=0) + + result = df_conc.groupby(level=0).sum() + expected = df.groupby(level=0).sum() + + tm.assert_frame_equal(result, expected) From 48fbbe532fff6f6fd3a790f8f332b9bd4c6f2d35 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 13 Mar 2022 00:36:03 +0800 Subject: [PATCH 3/9] pre commit --- pandas/tests/groupby/test_groupby.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fa64029b3e744..5e830fa105185 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -18,7 +18,6 @@ Series, Timedelta, Timestamp, - concat, date_range, to_datetime, util, @@ -2671,7 +2670,7 @@ def test_groupby_agg_general_dtypes(): groupby1 = df1.groupby("B").sum() groupby2 = df2.groupby("B").sum() - df_conc = concat([groupby1, groupby2], axis=0) + df_conc = pd.concat([groupby1, groupby2], axis=0) result = df_conc.groupby(level=0).sum() expected = df.groupby(level=0).sum() From 8e6ce3f27181ecf901daa46ee7ac56601991781b Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 13 Mar 2022 13:51:15 +0800 Subject: [PATCH 4/9] ensure dtypes are same in groupby type --- pandas/core/groupby/groupby.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a7d43727a5db7..c84d0bc7e9e25 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1489,10 +1489,6 @@ def _agg_general( numeric_only=numeric_only, min_count=min_count, ) - dtypes = self.dtypes - for column in result.columns: - result[column] = result[column].astype(dtypes[column].dtype) - return result.__finalize__(self.obj, method="groupby") def _agg_py_fallback( @@ -1583,6 +1579,9 @@ def array_func(values: ArrayLike) -> ArrayLike: res.index = self.grouper.result_index return self._reindex_output(res) else: + dtypes = self.dtypes + for column in res.columns: + res[column] = res[column].astype(dtypes[column].dtype) return res def _cython_transform( From 5d2375b2039a326ef90054d4076b72b5da4e911e Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 13 Mar 2022 16:04:05 +0800 Subject: [PATCH 5/9] ensure dataframe dtypes are same in sum function --- pandas/core/groupby/groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c84d0bc7e9e25..360d0e5ccd382 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1579,9 +1579,6 @@ def array_func(values: ArrayLike) -> ArrayLike: res.index = self.grouper.result_index return self._reindex_output(res) else: - dtypes = self.dtypes - for column in res.columns: - res[column] = res[column].astype(dtypes[column].dtype) return res def _cython_transform( @@ -2181,6 +2178,10 @@ def sum( alias="add", npfunc=np.sum, ) + if isinstance(result, DataFrame): + dtypes = self.dtypes + for column in result.columns: + result[column] = result[column].astype(dtypes[column].dtype) return self._reindex_output(result, fill_value=0) From 9066ffbf7e63e897975153679ecf93ac96518ca4 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Mon, 14 Mar 2022 21:49:12 +0800 Subject: [PATCH 6/9] Update groupby.py --- pandas/core/groupby/groupby.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 360d0e5ccd382..3c8811f2a00ad 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2179,9 +2179,12 @@ def sum( npfunc=np.sum, ) if isinstance(result, DataFrame): - dtypes = self.dtypes - for column in result.columns: - result[column] = result[column].astype(dtypes[column].dtype) + try: + dtypes = self.dtypes + for column in result.columns: + result[column] = result[column].astype(dtypes[column].dtype) + except: + pass return self._reindex_output(result, fill_value=0) From 4373116b48e9d84406b7f8c88e9bd353613fe8f8 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Mon, 14 Mar 2022 22:17:43 +0800 Subject: [PATCH 7/9] pre commit --- pandas/core/groupby/groupby.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3c8811f2a00ad..f9e87e3b13ff6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2178,6 +2178,7 @@ def sum( alias="add", npfunc=np.sum, ) + if isinstance(result, DataFrame): try: dtypes = self.dtypes From 3b84aef5660fc7916dfe57f924031e5918139e63 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Mon, 14 Mar 2022 22:27:03 +0800 Subject: [PATCH 8/9] Try and exception --- pandas/core/groupby/groupby.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9e87e3b13ff6..279ae482a8837 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2179,13 +2179,13 @@ def sum( npfunc=np.sum, ) - if isinstance(result, DataFrame): - try: - dtypes = self.dtypes - for column in result.columns: - result[column] = result[column].astype(dtypes[column].dtype) - except: - pass + if isinstance(result, DataFrame): + try: + dtypes = self.dtypes + for column in result.columns: + result[column] = result[column].astype(dtypes[column].dtype) + except: + return self._reindex_output(result, fill_value=0) return self._reindex_output(result, fill_value=0) From 91d9ecd66543300bea5351081935850848852b09 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Mon, 14 Mar 2022 22:40:05 +0800 Subject: [PATCH 9/9] pre commit --- pandas/core/groupby/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 279ae482a8837..7af394e744f1a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2184,8 +2184,8 @@ def sum( dtypes = self.dtypes for column in result.columns: result[column] = result[column].astype(dtypes[column].dtype) - except: - return self._reindex_output(result, fill_value=0) + except Exception: + pass return self._reindex_output(result, fill_value=0)