From 4cd824d31ea438149d162f001c221fefb6c67604 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Oct 2020 13:38:13 +0100
Subject: [PATCH 1/2] Backport PR #37433: REGR: fix groupby std() with nullable
 dtypes

---
 doc/source/whatsnew/v1.1.4.rst                |  1 +
 pandas/core/groupby/groupby.py                |  2 +-
 pandas/tests/groupby/aggregate/test_cython.py | 35 +++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst
index c0aa1afc34c8f..a717e46692a19 100644
--- a/doc/source/whatsnew/v1.1.4.rst
+++ b/doc/source/whatsnew/v1.1.4.rst
@@ -21,6 +21,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`)
 - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`)
 - Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`)
+- Fixed regression in ``DataFrame.groupby(..).std()`` with nullable integer dtype (:issue:`37415`)
 - Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`)
 - Fixed regression where slicing :class:`DatetimeIndex` raised :exc:`AssertionError` on irregular time series with ``pd.NaT`` or on unsorted indices (:issue:`36953` and :issue:`35509`)
 - Fixed regression in certain offsets (:meth:`pd.offsets.Day() <pandas.tseries.offsets.Day>` and below) no longer being hashable (:issue:`37267`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b3ec9cf71786a..9415ee1b7e969 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2489,9 +2489,9 @@ def _get_cythonized_result(
                     except TypeError as e:
                         error_msg = str(e)
                         continue
+                vals = vals.astype(cython_dtype, copy=False)
                 if needs_2d:
                     vals = vals.reshape((-1, 1))
-                vals = vals.astype(cython_dtype, copy=False)
                 func = partial(func, vals)
 
             func = partial(func, labels)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 87ebd8b5a27fb..02dbf00ee6421 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -277,3 +277,38 @@ def test_read_only_buffer_source_agg(agg):
     expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
 
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "op_name",
+    [
+        "count",
+        "sum",
+        "std",
+        "var",
+        "sem",
+        "mean",
+        "median",
+        "prod",
+        "min",
+        "max",
+    ],
+)
+def test_cython_agg_nullable_int(op_name):
+    # ensure that the cython-based aggregations don't fail for nullable dtype
+    # (eg https://github.com/pandas-dev/pandas/issues/37415)
+    df = DataFrame(
+        {
+            "A": ["A", "B"] * 5,
+            "B": pd.array([1, 2, 3, 4, 5, 6, 7, 8, 9, pd.NA], dtype="Int64"),
+        }
+    )
+    result = getattr(df.groupby("A")["B"], op_name)()
+    df2 = df.assign(B=df["B"].astype("float64"))
+    expected = getattr(df2.groupby("A")["B"], op_name)()
+
+    if op_name != "count":
+        # the result is not yet consistently using Int64/Float64 dtype,
+        # so for now just checking the values by casting to float
+        result = result.astype("float64")
+    tm.assert_series_equal(result, expected)

From bd0b4dda9c4d6c8265e751173bada728df677848 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 27 Oct 2020 13:42:54 +0000
Subject: [PATCH 2/2] reformat with black 19.10b0

---
 pandas/tests/groupby/aggregate/test_cython.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 02dbf00ee6421..7bacb62ce62f4 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -281,18 +281,7 @@ def test_read_only_buffer_source_agg(agg):
 
 @pytest.mark.parametrize(
     "op_name",
-    [
-        "count",
-        "sum",
-        "std",
-        "var",
-        "sem",
-        "mean",
-        "median",
-        "prod",
-        "min",
-        "max",
-    ],
+    ["count", "sum", "std", "var", "sem", "mean", "median", "prod", "min", "max"],
 )
 def test_cython_agg_nullable_int(op_name):
     # ensure that the cython-based aggregations don't fail for nullable dtype