diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index df7a8b8775501..acbfaa5f4d3bf 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -20,6 +20,7 @@ Fixed regressions - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) +- Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) - :func:`read_excel` does not work for non-rawbyte file handles (issue:`38788`) - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings (:issue:`38753`) - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ff6ff98fb7840..aef4c036abc65 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1620,12 +1620,11 @@ def sem(self, ddof: int = 1): if result.ndim == 1: result /= np.sqrt(self.count()) else: - cols = result.columns.get_indexer_for( - result.columns.difference(self.exclusions).unique() - ) - result.iloc[:, cols] = result.iloc[:, cols] / np.sqrt( - self.count().iloc[:, cols] - ) + cols = result.columns.difference(self.exclusions).unique() + counts = self.count() + result_ilocs = result.columns.get_indexer_for(cols) + count_ilocs = counts.columns.get_indexer_for(cols) + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f8a9412d3036d..e5021b7b4dd5f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -842,6 +842,14 @@ def test_omit_nuisance(df): grouped.agg(lambda x: x.sum(0, numeric_only=False)) +def test_omit_nuisance_sem(df): + # GH 38774 - sem should work with nuisance columns + grouped = df.groupby("A") + result = grouped.sem() + expected = df.loc[:, ["A", "C", "D"]].groupby("A").sem() + tm.assert_frame_equal(result, expected) + + def test_omit_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"])