Skip to content

Commit 34fcda3

Browse files
committed
Avoided looping and multiple np calls, added api breaking section to whatsnew
1 parent 0b8efa9 commit 34fcda3

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

pandas/core/groupby/groupby.py

+4
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,8 @@ def std(self, ddof: int = 1):
12821282
cols = result.columns.get_indexer_for(
12831283
result.columns.difference(self.exclusions).unique()
12841284
)
1285+
# .values to remove labels; iLocIndexer._setitem_with_indexer
1286+
# broken when labels are not unique
12851287
result.iloc[:, cols] = np.sqrt(result.iloc[:, cols]).values
12861288

12871289
return result
@@ -1339,6 +1341,8 @@ def sem(self, ddof: int = 1):
13391341
cols = result.columns.get_indexer_for(
13401342
result.columns.difference(self.exclusions).unique()
13411343
)
1344+
# .values in both numerator and denominator to remove labels;
1345+
# iLocIndexer._setitem_with_indexer broken when labels are not unique
13421346
result.iloc[:, cols] = (
13431347
result.iloc[:, cols].values / np.sqrt(self.count().iloc[:, cols]).values
13441348
)

pandas/tests/groupby/test_function.py

+7-13
Original file line numberDiff line numberDiff line change
@@ -589,19 +589,13 @@ def test_ops_not_as_index(reduction_func):
589589
# GH 10355
590590
# Using as_index=False should not modify grouped column
591591

592-
# GH 5755
593-
if reduction_func in (
594-
"corrwith",
595-
"idxmax",
596-
"idxmin",
597-
"mad",
598-
"ngroup",
599-
"nth",
600-
"nunique",
601-
"size",
602-
"skew",
603-
):
604-
pytest.skip("Skip until #5755 is resolved")
592+
if reduction_func in ("nth", "ngroup", "size",):
593+
pytest.skip("Skip until behavior is determined (GH #5755)")
594+
595+
if reduction_func in ("corrwith", "idxmax", "idxmin", "mad", "nunique", "skew",):
596+
pytest.xfail(
597+
"_GroupBy._python_apply_general incorrectly modifies grouping columns"
598+
)
605599

606600
df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"])
607601
expected = getattr(df.groupby("a"), reduction_func)().reset_index()

0 commit comments

Comments
 (0)