From 771284086dc28ad0d2262712b99d79e5b0746a33 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Wed, 27 Nov 2024 09:18:01 -0800
Subject: [PATCH 1/7] ENH: Support kurtosis (kurt) in DataFrameGroupBy and
 SeriesGroupBy

---
 doc/source/whatsnew/v3.0.0.rst                |   1 +
 pandas/_libs/groupby.pyi                      |   9 +
 pandas/_libs/groupby.pyx                      |  94 +++++++++
 pandas/core/arrays/base.py                    |   1 +
 pandas/core/arrays/categorical.py             |   2 +-
 pandas/core/arrays/datetimelike.py            |   6 +-
 pandas/core/groupby/base.py                   |   1 +
 pandas/core/groupby/generic.py                | 188 ++++++++++++++++++
 pandas/core/groupby/ops.py                    |   8 +-
 pandas/tests/groupby/methods/test_kurt.py     |  27 +++
 pandas/tests/groupby/test_api.py              |   1 +
 pandas/tests/groupby/test_apply.py            |   1 +
 pandas/tests/groupby/test_categorical.py      |   1 +
 pandas/tests/groupby/test_groupby.py          |  10 +-
 pandas/tests/groupby/test_numeric_only.py     |   3 +
 pandas/tests/groupby/test_raises.py           |  33 ++-
 pandas/tests/groupby/test_reductions.py       |   5 +-
 .../tests/groupby/transform/test_transform.py |   8 +-
 18 files changed, 379 insertions(+), 20 deletions(-)
 create mode 100644 pandas/tests/groupby/methods/test_kurt.py

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 1b12735f0e7c1..8999c8f441551 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -56,6 +56,7 @@ Other enhancements
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
+- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`SeriesGroupBy.apply`, :meth:`DataFrame.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :meth:`str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index 53f5f73624232..34367f55d2bbb 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -97,6 +97,15 @@ def group_skew(
     result_mask: np.ndarray | None = ...,
     skipna: bool = ...,
 ) -> None: ...
+def group_kurt(
+    out: np.ndarray,  # float64_t[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[float64_T, ndim=2]
+    labels: np.ndarray,  # const intp_t[::1]
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
+) -> None: ...
 def group_mean(
     out: np.ndarray,  # floating[:, ::1]
     counts: np.ndarray,  # int64_t[::1]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index d7e485f74e58b..0c48fbdee1f11 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -995,6 +995,100 @@ def group_skew(
                     )
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.cpow
+def group_kurt(
+    float64_t[:, ::1] out,
+    int64_t[::1] counts,
+    ndarray[float64_t, ndim=2] values,
+    const intp_t[::1] labels,
+    const uint8_t[:, ::1] mask=None,
+    uint8_t[:, ::1] result_mask=None,
+    bint skipna=True,
+) -> None:
+    cdef:
+        Py_ssize_t i, j, N, K, lab, ngroups = len(counts)
+        int64_t[:, ::1] nobs
+        Py_ssize_t len_values = len(values), len_labels = len(labels)
+        bint isna_entry, uses_mask = mask is not None
+        float64_t[:, ::1] M1, M2, M3, M4
+        float64_t delta, delta_n, delta_n2, term1, val
+        int64_t n1, n
+        float64_t ct, num, den, adj
+
+    if len_values != len_labels:
+        raise ValueError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
+
+    # M1, M2, M3 and M4 correspond to 1st, 2nd, 3rd and 4th Moments
+    M1 = np.zeros((<object>out).shape, dtype=np.float64)
+    M2 = np.zeros((<object>out).shape, dtype=np.float64)
+    M3 = np.zeros((<object>out).shape, dtype=np.float64)
+    M4 = np.zeros((<object>out).shape, dtype=np.float64)
+
+    N, K = (<object>values).shape
+
+    out[:, :] = 0.0
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+
+            for j in range(K):
+                val = values[i, j]
+
+                if uses_mask:
+                    isna_entry = mask[i, j]
+                else:
+                    isna_entry = _treat_as_na(val, False)
+
+                if not isna_entry:
+                    # Based on RunningStats::Push from
+                    #  https://www.johndcook.com/blog/skewness_kurtosis/
+                    n1 = nobs[lab, j]
+                    n = n1 + 1
+
+                    nobs[lab, j] = n
+                    delta = val - M1[lab, j]
+                    delta_n = delta / n
+                    delta_n2 = delta_n * delta_n
+                    term1 = delta * delta_n * n1
+
+                    M1[lab, j] += delta_n
+                    M4[lab, j] += (term1 * delta_n2 * (n*n - 3*n + 3)
+                                   + 6 * delta_n2 * M2[lab, j]
+                                   - 4 * delta_n * M3[lab, j])
+                    M3[lab, j] += term1 * delta_n * (n - 2) - 3 * delta_n * M2[lab, j]
+                    M2[lab, j] += term1
+                elif not skipna:
+                    M1[lab, j] = NaN
+                    M2[lab, j] = NaN
+                    M3[lab, j] = NaN
+                    M4[lab, j] = NaN
+
+        for i in range(ngroups):
+            for j in range(K):
+                ct = <float64_t>nobs[i, j]
+                if ct < 4:
+                    if result_mask is not None:
+                        result_mask[i, j] = 1
+                    out[i, j] = NaN
+                elif M2[i, j] == 0:
+                    out[i, j] = 0
+                else:
+                    num = ct * (ct + 1) * (ct - 1) * M4[i, j]
+                    den = (ct - 2) * (ct - 3) * M2[i, j] ** 2
+                    adj = 3.0 * (ct - 1) ** 2 / ((ct - 2) * (ct - 3))
+                    out[i, j] = num / den - adj
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_mean(
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 4835d808f2433..e831883998098 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2618,6 +2618,7 @@ def _groupby_op(
                 "sem",
                 "var",
                 "skew",
+                "kurt",
             ]:
                 raise TypeError(
                     f"dtype '{self.dtype}' does not support operation '{how}'"
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 99e4cb0545e2d..ae20bfb6b284b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2736,7 +2736,7 @@ def _groupby_op(
         op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
 
         dtype = self.dtype
-        if how in ["sum", "prod", "cumsum", "cumprod", "skew"]:
+        if how in ["sum", "prod", "cumsum", "cumprod", "skew", "kurt"]:
             raise TypeError(f"{dtype} type does not support {how} operations")
         if how in ["min", "max", "rank", "idxmin", "idxmax"] and not dtype.ordered:
             # raise TypeError instead of NotImplementedError to ensure we
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 9c821bf0d184e..cbcab4cd497ad 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1656,7 +1656,7 @@ def _groupby_op(
         dtype = self.dtype
         if dtype.kind == "M":
             # Adding/multiplying datetimes is not valid
-            if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
+            if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew", "kurt"]:
                 raise TypeError(f"datetime64 type does not support operation '{how}'")
             if how in ["any", "all"]:
                 # GH#34479
@@ -1667,7 +1667,7 @@ def _groupby_op(
 
         elif isinstance(dtype, PeriodDtype):
             # Adding/multiplying Periods is not valid
-            if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
+            if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew", "kurt"]:
                 raise TypeError(f"Period type does not support {how} operations")
             if how in ["any", "all"]:
                 # GH#34479
@@ -1677,7 +1677,7 @@ def _groupby_op(
                 )
         else:
             # timedeltas we can add but not multiply
-            if how in ["prod", "cumprod", "skew", "var"]:
+            if how in ["prod", "cumprod", "skew", "kurt", "var"]:
                 raise TypeError(f"timedelta64 type does not support {how} operations")
 
         # All of the functions implemented here are ordinal, so we can
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index bad9749b5ecee..7699fb3d0f864 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -50,6 +50,7 @@ class OutputKey:
         "sem",
         "size",
         "skew",
+        "kurt",
         "std",
         "sum",
         "var",
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 35ec09892ede6..d276d929321ba 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1279,6 +1279,84 @@ def alt(obj):
             "skew", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
         )
 
+    def kurt(
+        self,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs,
+    ) -> Series:
+        """
+        Return unbiased kurtosis within groups.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
+
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. Not implemented for Series.
+
+        **kwargs
+            Additional keyword arguments to be passed to the function.
+
+        Returns
+        -------
+        Series
+            Unbiased kurtosis within groups.
+
+        See Also
+        --------
+        Series.kurt : Return unbiased kurtosis over requested axis.
+
+        Examples
+        --------
+        >>> ser = pd.Series(
+        ...     [390.0, 350.0, 357.0, 333.0, np.nan, 22.0, 20.0, 30.0, 40.0, 41.0],
+        ...     index=[
+        ...         "Falcon",
+        ...         "Falcon",
+        ...         "Falcon",
+        ...         "Falcon",
+        ...         "Falcon",
+        ...         "Parrot",
+        ...         "Parrot",
+        ...         "Parrot",
+        ...         "Parrot",
+        ...         "Parrot",
+        ...     ],
+        ...     name="Max Speed",
+        ... )
+        >>> ser
+        Falcon    390.0
+        Falcon    350.0
+        Falcon    357.0
+        Falcon    333.0
+        Falcon      NaN
+        Parrot     22.0
+        Parrot     20.0
+        Parrot     30.0
+        Parrot     40.0
+        Parrot     41.0
+        Name: Max Speed, dtype: float64
+        >>> ser.groupby(level=0).kurt()
+        Falcon    1.622109
+        Parrot   -2.878714
+        Name: Max Speed, dtype: float64
+        >>> ser.groupby(level=0).kurt(skipna=False)
+        Falcon         NaN
+        Parrot   -2.878714
+        Name: Max Speed, dtype: float64
+        """
+
+        def alt(obj):
+            # This should not be reached since the cython path should raise
+            #  TypeError and not NotImplementedError.
+            raise TypeError(f"'kurt' is not supported for dtype={obj.dtype}")
+
+        return self._cython_agg_general(
+            "kurt", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
+        )
+
     @property
     @doc(Series.plot.__doc__)
     def plot(self) -> GroupByPlot:
@@ -2905,6 +2983,116 @@ def alt(obj):
             "skew", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
         )
 
+    def kurt(
+        self,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs,
+    ) -> DataFrame:
+        """
+        Return unbiased kurtosis within groups.
+
+        Parameters
+        ----------
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
+
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        **kwargs
+            Additional keyword arguments to be passed to the function.
+
+        Returns
+        -------
+        DataFrame
+            Unbiased kurtosis within groups.
+
+        See Also
+        --------
+        DataFrame.kurt : Return unbiased kurtosis over requested axis.
+
+        Examples
+        --------
+        >>> arrays = [
+        ...     [
+        ...         "falcon",
+        ...         "parrot",
+        ...         "cockatoo",
+        ...         "kiwi",
+        ...         "eagle",
+        ...         "lion",
+        ...         "monkey",
+        ...         "rabbit",
+        ...         "dog",
+        ...         "wolf",
+        ...     ],
+        ...     [
+        ...         "bird",
+        ...         "bird",
+        ...         "bird",
+        ...         "bird",
+        ...         "bird",
+        ...         "mammal",
+        ...         "mammal",
+        ...         "mammal",
+        ...         "mammal",
+        ...         "mammal",
+        ...     ],
+        ... ]
+        >>> index = pd.MultiIndex.from_arrays(arrays, names=("name", "class"))
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "max_speed": [
+        ...             389.0,
+        ...             24.0,
+        ...             70.0,
+        ...             np.nan,
+        ...             350.0,
+        ...             80.5,
+        ...             21.5,
+        ...             15.0,
+        ...             40.0,
+        ...             50.0,
+        ...         ]
+        ...     },
+        ...     index=index,
+        ... )
+        >>> df
+                         max_speed
+        name     class
+        falcon   bird        389.0
+        parrot   bird         24.0
+        cockatoo bird         70.0
+        kiwi     bird          NaN
+        eagle    bird        350.0
+        lion     mammal       80.5
+        monkey   mammal       21.5
+        rabbit   mammal       15.0
+        dog      mammal       40.0
+        wolf     mammal       50.0
+        >>> gb = df.groupby(["class"])
+        >>> gb.kurt()
+                max_speed
+        class
+        bird    -5.493277
+        mammal   0.204125
+        >>> gb.kurt(skipna=False)
+                max_speed
+        class
+        bird          NaN
+        mammal   0.204125
+        """
+
+        def alt(obj):
+            # This should not be reached since the cython path should raise
+            #  TypeError and not NotImplementedError.
+            raise TypeError(f"'kurt' is not supported for dtype={obj.dtype}")
+
+        return self._cython_agg_general(
+            "kurt", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
+        )
+
     @property
     @doc(DataFrame.plot.__doc__)
     def plot(self) -> GroupByPlot:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 4c7fe604e452d..c4c7f73ee166c 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -144,6 +144,7 @@ def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None:
             "std": functools.partial(libgroupby.group_var, name="std"),
             "sem": functools.partial(libgroupby.group_var, name="sem"),
             "skew": "group_skew",
+            "kurt": "group_kurt",
             "first": "group_nth",
             "last": "group_last",
             "ohlc": "group_ohlc",
@@ -193,7 +194,7 @@ def _get_cython_function(
             elif how in ["std", "sem", "idxmin", "idxmax"]:
                 # We have a partial object that does not have __signatures__
                 return f
-            elif how == "skew":
+            elif how in ["skew", "kurt"]:
                 # _get_cython_vals will convert to float64
                 pass
             elif "object" not in f.__signatures__:
@@ -224,7 +225,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
         """
         how = self.how
 
-        if how in ["median", "std", "sem", "skew"]:
+        if how in ["median", "std", "sem", "skew", "kurt"]:
             # median only has a float64 implementation
             # We should only get here with is_numeric, as non-numeric cases
             #  should raise in _get_cython_function
@@ -453,7 +454,7 @@ def _call_cython_op(
                     **kwargs,
                 )
                 result = result.astype(bool, copy=False)
-            elif self.how in ["skew"]:
+            elif self.how in ["skew", "kurt"]:
                 func(
                     out=result,
                     counts=counts,
@@ -1021,6 +1022,7 @@ def apply_groupwise(
         # getattr pattern for __name__ is needed for functools.partial objects
         if len(group_keys) == 0 and getattr(f, "__name__", None) in [
             "skew",
+            "kurt",
             "sum",
             "prod",
         ]:
diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
new file mode 100644
index 0000000000000..51720571f43d0
--- /dev/null
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_groupby_kurt_equivalence():
+    # Test that that groupby kurt method (which uses libgroupby.group_kurt)
+    #  matches the results of operating group-by-group (which uses nanops.nankurt)
+    nrows = 1000
+    ngroups = 3
+    ncols = 2
+    nan_frac = 0.05
+
+    arr = np.random.default_rng(2).standard_normal((nrows, ncols))
+    arr[np.random.default_rng(2).random(nrows) < nan_frac] = np.nan
+
+    df = pd.DataFrame(arr)
+    grps = np.random.default_rng(2).integers(0, ngroups, size=nrows)
+    gb = df.groupby(grps)
+
+    result = gb.kurt()
+
+    grpwise = [grp.kurt().to_frame(i).T for i, grp in gb]
+    expected = pd.concat(grpwise, axis=0)
+    expected.index = expected.index.astype(result.index.dtype)  # 32bit builds
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py
index 013b308cd14cd..baec3ed1a5024 100644
--- a/pandas/tests/groupby/test_api.py
+++ b/pandas/tests/groupby/test_api.py
@@ -74,6 +74,7 @@ def test_tab_completion(multiindex_dataframe_random_data):
         "all",
         "shift",
         "skew",
+        "kurt",
         "take",
         "pct_change",
         "any",
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 1a4127ab49b0e..69d5e2daecf89 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1484,6 +1484,7 @@ def test_result_name_when_one_group(name):
         ("apply", lambda gb: gb.values[-1]),
         ("apply", lambda gb: gb["b"].iloc[0]),
         ("agg", "skew"),
+        ("agg", "kurt"),
         ("agg", "prod"),
         ("agg", "sum"),
     ],
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 6d84dae1d25d8..95d0d9de4ec54 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -61,6 +61,7 @@ def f(a):
     "sem": np.nan,
     "size": 0,
     "skew": np.nan,
+    "kurt": np.nan,
     "std": np.nan,
     "sum": 0,
     "var": np.nan,
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 702bbfef2be3b..d062cb3bfac38 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1728,7 +1728,7 @@ def test_pivot_table_values_key_error():
 )
 @pytest.mark.parametrize("method", ["attr", "agg", "apply"])
 @pytest.mark.parametrize(
-    "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"]
+    "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew", "kurt"]
 )
 def test_empty_groupby(columns, keys, values, method, op, dropna, using_infer_string):
     # GH8093 & GH26411
@@ -1804,7 +1804,7 @@ def get_categorical_invalid_expected():
             tm.assert_equal(result, expected)
         return
 
-    if op in ["prod", "sum", "skew"]:
+    if op in ["prod", "sum", "skew", "kurt"]:
         # ops that require more than just ordered-ness
         if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
             # GH#41291
@@ -1817,15 +1817,15 @@ def get_categorical_invalid_expected():
                 msg = f"dtype 'str' does not support operation '{op}'"
             else:
                 msg = "category type does not support"
-            if op == "skew":
-                msg = "|".join([msg, "does not support operation 'skew'"])
+            if op in ["skew", "kurt"]:
+                msg = "|".join([msg, f"does not support operation '{op}'"])
             with pytest.raises(TypeError, match=msg):
                 get_result()
 
             if not isinstance(columns, list):
                 # i.e. SeriesGroupBy
                 return
-            elif op == "skew":
+            elif op in ["skew", "kurt"]:
                 # TODO: test the numeric_only=True case
                 return
             else:
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index cb4569812f600..53ab6dacd42c3 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -244,6 +244,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
         ("quantile", True),
         ("sem", True),
         ("skew", True),
+        ("kurt", True),
         ("std", True),
         ("sum", True),
         ("var", True),
@@ -381,6 +382,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
         "max",
         "prod",
         "skew",
+        "kurt",
     )
 
     # Test default behavior; kernels that fail may be enabled in the future but kernels
@@ -410,6 +412,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
         "quantile",
         "sem",
         "skew",
+        "kurt",
         "std",
         "sum",
         "var",
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 1e0a15d0ba796..f1e38bdfb42a3 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -172,6 +172,7 @@ def test_groupby_raises_string(
         "shift": (None, ""),
         "size": (None, ""),
         "skew": (ValueError, "could not convert string to float"),
+        "kurt": (ValueError, "could not convert string to float"),
         "std": (ValueError, "could not convert string to float"),
         "sum": (None, ""),
         "var": (
@@ -191,10 +192,11 @@ def test_groupby_raises_string(
             "sem",
             "var",
             "skew",
+            "kurt",
             "quantile",
         ]:
             msg = f"dtype 'str' does not support operation '{groupby_func}'"
-            if groupby_func in ["sem", "std", "skew"]:
+            if groupby_func in ["sem", "std", "skew", "kurt"]:
                 # The object-dtype raises ValueError when trying to convert to numeric.
                 klass = TypeError
         elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
@@ -328,6 +330,15 @@ def test_groupby_raises_datetime(
                 ]
             ),
         ),
+        "kurt": (
+            TypeError,
+            "|".join(
+                [
+                    r"dtype datetime64\[ns\] does not support operation",
+                    "datetime64 type does not support operation 'kurt'",
+                ]
+            ),
+        ),
         "std": (None, ""),
         "sum": (TypeError, "datetime64 type does not support operation 'sum"),
         "var": (TypeError, "datetime64 type does not support operation 'var'"),
@@ -380,7 +391,7 @@ def test_groupby_raises_datetime_np(
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
-@pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"])
+@pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "kurt", "var"])
 def test_groupby_raises_timedelta(func):
     df = DataFrame(
         {
@@ -511,6 +522,15 @@ def test_groupby_raises_category(
                 ]
             ),
         ),
+        "kurt": (
+            TypeError,
+            "|".join(
+                [
+                    "dtype category does not support operation 'kurt'",
+                    "category type does not support kurt operations",
+                ]
+            ),
+        ),
         "std": (
             TypeError,
             "|".join(
@@ -689,6 +709,15 @@ def test_groupby_raises_category_on_category(
                 ]
             ),
         ),
+        "kurt": (
+            TypeError,
+            "|".join(
+                [
+                    "category type does not support kurt operations",
+                    "dtype category does not support operation 'kurt'",
+                ]
+            ),
+        ),
         "std": (
             TypeError,
             "|".join(
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 51c7eab2bfa82..a17200c123d22 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -1114,6 +1114,7 @@ def test_apply_to_nullable_integer_returns_float(values, function):
         "median",
         "mean",
         "skew",
+        "kurt",
         "std",
         "var",
         "sem",
@@ -1127,8 +1128,8 @@ def test_regression_allowlist_methods(op, skipna, sort):
 
     grouped = frame.groupby(level=0, sort=sort)
 
-    if op == "skew":
-        # skew has skipna
+    if op in ["skew", "kurt"]:
+        # skew and kurt have skipna
         result = getattr(grouped, op)(skipna=skipna)
         expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(skipna=skipna))
         if sort:
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 022d3d51ded4e..2bec4a5920a19 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1097,13 +1097,13 @@ def test_transform_agg_by_name(request, reduction_func, frame_or_series):
     func = reduction_func
 
     obj = DataFrame(
-        {"a": [0, 0, 0, 1, 1, 1], "b": range(6)},
-        index=["A", "B", "C", "D", "E", "F"],
+        {"a": [0, 0, 0, 0, 1, 1, 1, 1], "b": range(8)},
+        index=["A", "B", "C", "D", "E", "F", "G", "H"],
     )
     if frame_or_series is Series:
         obj = obj["a"]
 
-    g = obj.groupby(np.repeat([0, 1], 3))
+    g = obj.groupby(np.repeat([0, 1], 4))
 
     if func == "corrwith" and isinstance(obj, Series):  # GH#32293
         # TODO: implement SeriesGroupBy.corrwith
@@ -1128,7 +1128,7 @@ def test_transform_agg_by_name(request, reduction_func, frame_or_series):
         tm.assert_index_equal(result.columns, obj.columns)
 
     # verify that values were broadcasted across each group
-    assert len(set(DataFrame(result).iloc[-3:, -1])) == 1
+    assert len(set(DataFrame(result).iloc[-4:, -1])) == 1
 
 
 def test_transform_lambda_with_datetimetz():

From 290378f15c81e44e551cb2e0d323216e975bda13 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 3 Dec 2024 16:08:06 -0800
Subject: [PATCH 2/7] ENH: Address review comments

---
 pandas/_libs/groupby.pyx                  |  8 ++--
 pandas/core/groupby/generic.py            | 14 +-----
 pandas/tests/groupby/methods/test_kurt.py | 57 ++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 0c48fbdee1f11..59bc59135a8ff 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -910,7 +910,7 @@ def group_var(
 @cython.wraparound(False)
 @cython.boundscheck(False)
 @cython.cdivision(True)
-@cython.cpow
+@cython.cpow(True)
 def group_skew(
     float64_t[:, ::1] out,
     int64_t[::1] counts,
@@ -961,7 +961,7 @@ def group_skew(
                     isna_entry = _treat_as_na(val, False)
 
                 if not isna_entry:
-                    # Based on RunningStats::Push from
+                    # Running stats update based on RunningStats::Push from
                     #  https://www.johndcook.com/blog/skewness_kurtosis/
                     n1 = nobs[lab, j]
                     n = n1 + 1
@@ -998,7 +998,7 @@ def group_skew(
 @cython.wraparound(False)
 @cython.boundscheck(False)
 @cython.cdivision(True)
-@cython.cpow
+@cython.cpow(True)
 def group_kurt(
     float64_t[:, ::1] out,
     int64_t[::1] counts,
@@ -1050,7 +1050,7 @@ def group_kurt(
                     isna_entry = _treat_as_na(val, False)
 
                 if not isna_entry:
-                    # Based on RunningStats::Push from
+                    # Running stats update based on RunningStats::Push from
                     #  https://www.johndcook.com/blog/skewness_kurtosis/
                     n1 = nobs[lab, j]
                     n = n1 + 1
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index d276d929321ba..c3d8a71aebc90 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1270,13 +1270,8 @@ def skew(
         Name: Max Speed, dtype: float64
         """
 
-        def alt(obj):
-            # This should not be reached since the cython path should raise
-            #  TypeError and not NotImplementedError.
-            raise TypeError(f"'skew' is not supported for dtype={obj.dtype}")
-
         return self._cython_agg_general(
-            "skew", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
+            "skew", alt=None, skipna=skipna, numeric_only=numeric_only, **kwargs
         )
 
     def kurt(
@@ -3084,13 +3079,8 @@ def kurt(
         mammal   0.204125
         """
 
-        def alt(obj):
-            # This should not be reached since the cython path should raise
-            #  TypeError and not NotImplementedError.
-            raise TypeError(f"'kurt' is not supported for dtype={obj.dtype}")
-
         return self._cython_agg_general(
-            "kurt", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
+            "kurt", alt=None, skipna=skipna, numeric_only=numeric_only, **kwargs
         )
 
     @property
diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index 51720571f43d0..edf098d25a464 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -5,6 +5,7 @@
 
 
 def test_groupby_kurt_equivalence():
+    # GH#40139
     # Test that that groupby kurt method (which uses libgroupby.group_kurt)
     #  matches the results of operating group-by-group (which uses nanops.nankurt)
     nrows = 1000
@@ -23,5 +24,59 @@ def test_groupby_kurt_equivalence():
 
     grpwise = [grp.kurt().to_frame(i).T for i, grp in gb]
     expected = pd.concat(grpwise, axis=0)
-    expected.index = expected.index.astype(result.index.dtype)  # 32bit builds
+    expected.index = expected.index.astype(np.intp)  # 32bit builds
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_kurt_arrow_float64():
+    # GH#40139
+    # Test groupby.kurt() with skipna = False
+    df = pd.DataFrame(
+        {
+            "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
+            "y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
+        },
+        dtype="float64[pyarrow]",
+    )
+    gb = df.groupby(by=lambda x: 0)
+
+    result = gb.kurt()
+    expected = pd.DataFrame(
+        {"x": [2.1644713], "y": [0.1513969]}, dtype="float64[pyarrow]"
+    )
+    tm.assert_almost_equal(result, expected)
+
+
+def test_groupby_kurt_noskipna():
+    # GH#40139
+    # Test groupby.kurt() with skipna = False
+    df = pd.DataFrame(
+        {
+            "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
+            "y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
+        }
+    )
+    gb = df.groupby(by=lambda x: 0)
+
+    result = gb.kurt(skipna=False)
+    expected = pd.DataFrame({"x": [np.nan], "y": [0.1513969]})
+    tm.assert_almost_equal(result, expected)
+
+
+def test_groupby_kurt_all_ones():
+    # GH#40139
+    # Test groupby.kurt() with skipna = False
+    df = pd.DataFrame(
+        {
+            "x": [1.0] * 10,
+        }
+    )
+    gb = df.groupby(by=lambda x: 0)
+
+    result = gb.kurt(skipna=False)
+    expected = pd.DataFrame(
+        {
+            "x": [0.0],  # Same behavior as pd.DataFrame.kurt()
+        }
+    )
+    tm.assert_almost_equal(result, expected)

From 1adbb0c2248ec89e5b5f31978dfd73c9c5d9ebec Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 3 Dec 2024 16:16:03 -0800
Subject: [PATCH 3/7] ENH: Fix comments in new test cases

---
 pandas/tests/groupby/methods/test_kurt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index edf098d25a464..ba6f395985d0e 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -30,7 +30,7 @@ def test_groupby_kurt_equivalence():
 
 def test_groupby_kurt_arrow_float64():
     # GH#40139
-    # Test groupby.kurt() with skipna = False
+    # Test groupby.kurt() with float64[pyarrow] dtype
     df = pd.DataFrame(
         {
             "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
@@ -65,7 +65,7 @@ def test_groupby_kurt_noskipna():
 
 def test_groupby_kurt_all_ones():
     # GH#40139
-    # Test groupby.kurt() with skipna = False
+    # Test groupby.kurt() with constant values
     df = pd.DataFrame(
         {
             "x": [1.0] * 10,

From c5df6ec50d764f9d1c570fd33a2819c445aba6c9 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 3 Dec 2024 16:34:53 -0800
Subject: [PATCH 4/7] ENH: Skip pyarrow test case if no pyarrow available

---
 pandas/tests/groupby/methods/test_kurt.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index ba6f395985d0e..895e032c6df89 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -28,6 +30,7 @@ def test_groupby_kurt_equivalence():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_if_no("pyarrow")
 def test_groupby_kurt_arrow_float64():
     # GH#40139
     # Test groupby.kurt() with float64[pyarrow] dtype

From aaacc27b872b3d46bba0834bdd2182c517de7228 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 3 Dec 2024 17:25:07 -0800
Subject: [PATCH 5/7] ENH: Update to intp instead of np.intp

---
 pandas/tests/groupby/methods/test_kurt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index 895e032c6df89..a6cce1d578ce1 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -26,7 +26,7 @@ def test_groupby_kurt_equivalence():
 
     grpwise = [grp.kurt().to_frame(i).T for i, grp in gb]
     expected = pd.concat(grpwise, axis=0)
-    expected.index = expected.index.astype(np.intp)  # 32bit builds
+    expected.index = expected.index.astype("intp")  # 32bit builds
     tm.assert_frame_equal(result, expected)
 
 

From 4fc5ca250d885f69fb8f7be99b19517106c74b09 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 3 Dec 2024 18:16:28 -0800
Subject: [PATCH 6/7] ENH: Change intp to int64

---
 pandas/tests/groupby/methods/test_kurt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index a6cce1d578ce1..e616d4fd735e6 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -26,7 +26,7 @@ def test_groupby_kurt_equivalence():
 
     grpwise = [grp.kurt().to_frame(i).T for i, grp in gb]
     expected = pd.concat(grpwise, axis=0)
-    expected.index = expected.index.astype("intp")  # 32bit builds
+    expected.index = expected.index.astype("int64")  # 32bit builds
     tm.assert_frame_equal(result, expected)
 
 

From e42a06092d25a48da8ca44b514b31cda968b3334 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Sun, 15 Dec 2024 13:30:06 -0800
Subject: [PATCH 7/7] Address review comments

---
 doc/source/whatsnew/v3.0.0.rst            |  2 +-
 pandas/tests/groupby/methods/test_kurt.py | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index f89fef2a78019..32ae8f1781190 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -55,8 +55,8 @@ Other enhancements
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
+- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
-- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`SeriesGroupBy.apply`, :meth:`DataFrame.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
diff --git a/pandas/tests/groupby/methods/test_kurt.py b/pandas/tests/groupby/methods/test_kurt.py
index e616d4fd735e6..21b7c50c3c5aa 100644
--- a/pandas/tests/groupby/methods/test_kurt.py
+++ b/pandas/tests/groupby/methods/test_kurt.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas.util._test_decorators as td
 
@@ -30,23 +31,27 @@ def test_groupby_kurt_equivalence():
     tm.assert_frame_equal(result, expected)
 
 
-@td.skip_if_no("pyarrow")
-def test_groupby_kurt_arrow_float64():
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        "Float64",
+    ],
+)
+def test_groupby_kurt_arrow_float64(dtype):
     # GH#40139
-    # Test groupby.kurt() with float64[pyarrow] dtype
+    # Test groupby.kurt() with float64[pyarrow] and Float64 dtypes
     df = pd.DataFrame(
         {
             "x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
             "y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
         },
-        dtype="float64[pyarrow]",
+        dtype=dtype,
     )
     gb = df.groupby(by=lambda x: 0)
 
     result = gb.kurt()
-    expected = pd.DataFrame(
-        {"x": [2.1644713], "y": [0.1513969]}, dtype="float64[pyarrow]"
-    )
+    expected = pd.DataFrame({"x": [2.1644713], "y": [0.1513969]}, dtype=dtype)
     tm.assert_almost_equal(result, expected)