Skip to content

Commit 4edb52b

Browse files
authored
REGR: groupby.sem with nuisance columns (#38816)
1 parent 3d351ed commit 4edb52b

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Fixed regressions
2020
- Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
2121
- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
2222
- Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`)
23+
- Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`)
2324
- :func:`read_excel` does not work for non-rawbyte file handles (issue:`38788`)
2425
- Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
2526
-

pandas/core/groupby/groupby.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1620,12 +1620,11 @@ def sem(self, ddof: int = 1):
16201620
if result.ndim == 1:
16211621
result /= np.sqrt(self.count())
16221622
else:
1623-
cols = result.columns.get_indexer_for(
1624-
result.columns.difference(self.exclusions).unique()
1625-
)
1626-
result.iloc[:, cols] = result.iloc[:, cols] / np.sqrt(
1627-
self.count().iloc[:, cols]
1628-
)
1623+
cols = result.columns.difference(self.exclusions).unique()
1624+
counts = self.count()
1625+
result_ilocs = result.columns.get_indexer_for(cols)
1626+
count_ilocs = counts.columns.get_indexer_for(cols)
1627+
result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
16291628
return result
16301629

16311630
@final

pandas/tests/groupby/test_groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,14 @@ def test_omit_nuisance(df):
842842
grouped.agg(lambda x: x.sum(0, numeric_only=False))
843843

844844

845+
def test_omit_nuisance_sem(df):
846+
# GH 38774 - sem should work with nuisance columns
847+
grouped = df.groupby("A")
848+
result = grouped.sem()
849+
expected = df.loc[:, ["A", "C", "D"]].groupby("A").sem()
850+
tm.assert_frame_equal(result, expected)
851+
852+
845853
def test_omit_nuisance_python_multiple(three_group):
846854
grouped = three_group.groupby(["A", "B"])
847855

0 commit comments

Comments
 (0)