From 336927c66a9c9a3be4e061e9228f05318c9417ab Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 30 Dec 2020 10:00:35 -0500 Subject: [PATCH 1/2] REGR: groupby.sem with nuisance columns --- doc/source/whatsnew/v1.2.1.rst | 1 + pandas/core/groupby/groupby.py | 11 +++++------ pandas/tests/groupby/test_groupby.py | 8 ++++++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 4c444ea1020dd..60c60c9c23a42 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -20,6 +20,7 @@ Fixed regressions - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) +- Fixed regression in :meth:`.GroupBy.sem` where non-numeric columns would raise an error instead of being dropped (:issue:`38774`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ff6ff98fb7840..aef4c036abc65 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1620,12 +1620,11 @@ def sem(self, ddof: int = 1): if result.ndim == 1: result /= np.sqrt(self.count()) else: - cols = result.columns.get_indexer_for( - result.columns.difference(self.exclusions).unique() - ) - result.iloc[:, cols] = result.iloc[:, cols] / np.sqrt( - self.count().iloc[:, cols] - ) + cols = result.columns.difference(self.exclusions).unique() + counts = self.count() + result_ilocs = result.columns.get_indexer_for(cols) + count_ilocs = counts.columns.get_indexer_for(cols) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f8a9412d3036d..e5021b7b4dd5f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -842,6 +842,14 @@ def test_omit_nuisance(df): grouped.agg(lambda x: x.sum(0, numeric_only=False)) +def test_omit_nuisance_sem(df): + # GH 38774 - sem should work with nuisance columns + grouped = df.groupby("A") + result = grouped.sem() + expected = df.loc[:, ["A", "C", "D"]].groupby("A").sem() + tm.assert_frame_equal(result, expected) + + def test_omit_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"]) From 79905b516c9c21816275b57a00e4c33330a00d38 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 30 Dec 2020 10:41:33 -0500 Subject: [PATCH 2/2] whatsnew fix --- doc/source/whatsnew/v1.2.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 60c60c9c23a42..456cf454e486f 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -20,7 +20,7 @@ Fixed regressions - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) -- Fixed regression in :meth:`.GroupBy.sem` where non-numeric columns would raise an error instead of being dropped (:issue:`38774`) +- Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) - .. ---------------------------------------------------------------------------