Avoided looping and multiple np calls, added api breaking section to whatsnew

rhshadrach · rhshadrach · commit 34fcda304e8a · 2020-04-26T10:49:14.000-04:00
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1282,6 +1282,8 @@ def std(self, ddof: int = 1):
             cols = result.columns.get_indexer_for(
                 result.columns.difference(self.exclusions).unique()
             )
+            # .values to remove labels; iLocIndexer._setitem_with_indexer
+            # broken when labels are not unique
             result.iloc[:, cols] = np.sqrt(result.iloc[:, cols]).values
 
         return result
@@ -1339,6 +1341,8 @@ def sem(self, ddof: int = 1):
             cols = result.columns.get_indexer_for(
                 result.columns.difference(self.exclusions).unique()
             )
+            # .values in both numerator and denominator to remove labels;
+            # iLocIndexer._setitem_with_indexer broken when labels are not unique
             result.iloc[:, cols] = (
                 result.iloc[:, cols].values / np.sqrt(self.count().iloc[:, cols]).values
             )
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -589,19 +589,13 @@ def test_ops_not_as_index(reduction_func):
     # GH 10355
     # Using as_index=False should not modify grouped column
 
-    # GH 5755
-    if reduction_func in (
-        "corrwith",
-        "idxmax",
-        "idxmin",
-        "mad",
-        "ngroup",
-        "nth",
-        "nunique",
-        "size",
-        "skew",
-    ):
-        pytest.skip("Skip until #5755 is resolved")
+    if reduction_func in ("nth", "ngroup", "size",):
+        pytest.skip("Skip until behavior is determined (GH #5755)")
+
+    if reduction_func in ("corrwith", "idxmax", "idxmin", "mad", "nunique", "skew",):
+        pytest.xfail(
+            "_GroupBy._python_apply_general incorrectly modifies grouping columns"
+        )
 
     df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"])
     expected = getattr(df.groupby("a"), reduction_func)().reset_index()

Original file line number	Diff line number	Diff line change
`@@ -1282,6 +1282,8 @@ def std(self, ddof: int = 1):`
`1282`	`1282`	`cols = result.columns.get_indexer_for(`
`1283`	`1283`	`result.columns.difference(self.exclusions).unique()`
`1284`	`1284`	`)`
	`1285`	`+ # .values to remove labels; iLocIndexer._setitem_with_indexer`
	`1286`	`+ # broken when labels are not unique`
`1285`	`1287`	`result.iloc[:, cols] = np.sqrt(result.iloc[:, cols]).values`
`1286`	`1288`
`1287`	`1289`	`return result`
`@@ -1339,6 +1341,8 @@ def sem(self, ddof: int = 1):`
`1339`	`1341`	`cols = result.columns.get_indexer_for(`
`1340`	`1342`	`result.columns.difference(self.exclusions).unique()`
`1341`	`1343`	`)`
	`1344`	`+ # .values in both numerator and denominator to remove labels;`
	`1345`	`+ # iLocIndexer._setitem_with_indexer broken when labels are not unique`
`1342`	`1346`	`result.iloc[:, cols] = (`
`1343`	`1347`	`result.iloc[:, cols].values / np.sqrt(self.count().iloc[:, cols]).values`
`1344`	`1348`	`)`