From 72276dc2d31f9dc4c2165963284680a06d4ba75b Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 17:03:03 -0800
Subject: [PATCH 01/11] ENH: Support skipna parameter in GroupBy prod, var, std
 and sem methods

---
 doc/source/whatsnew/v3.0.0.rst               |  2 +-
 pandas/_libs/groupby.pyi                     |  2 +
 pandas/_libs/groupby.pyx                     | 39 ++++++++++++++-
 pandas/core/_numba/kernels/var_.py           | 11 ++++-
 pandas/core/groupby/groupby.py               | 51 +++++++++++++++++---
 pandas/tests/groupby/aggregate/test_numba.py |  2 +-
 pandas/tests/groupby/test_api.py             | 18 +++----
 pandas/tests/groupby/test_reductions.py      | 36 ++++++++++++++
 8 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index fea269ac4555e..d7d918c7cafa4 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -58,9 +58,9 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
+- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``prod``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
-- :meth:`.DataFrameGroupBy.mean`, :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.mean` and :meth:`.SeriesGroupBy.sum` now accept ``skipna`` parameter (:issue:`15675`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index e3909203d1f5a..4267a02e43a12 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -76,6 +76,7 @@ def group_prod(
     mask: np.ndarray | None,
     result_mask: np.ndarray | None = ...,
     min_count: int = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_var(
     out: np.ndarray,  # floating[:, ::1]
@@ -88,6 +89,7 @@ def group_var(
     result_mask: np.ndarray | None = ...,
     is_datetimelike: bool = ...,
     name: str = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_skew(
     out: np.ndarray,  # float64_t[:, ::1]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index fd288dff01f32..93f478a8077c9 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -806,13 +806,14 @@ def group_prod(
     const uint8_t[:, ::1] mask,
     uint8_t[:, ::1] result_mask=None,
     Py_ssize_t min_count=0,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        int64float_t val
+        int64float_t val, nan_val
         int64float_t[:, ::1] prodx
         int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
@@ -825,6 +826,13 @@ def group_prod(
     prodx = np.ones((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
+    if uses_mask:
+        nan_val = 0
+    elif int64float_t is int64_t or int64float_t is uint64_t:
+        # This has no effect as int64 can't be nan. Setting to 0 to avoid type error
+        nan_val = 0
+    else:
+        nan_val = NAN
 
     with nogil:
         for i in range(N):
@@ -836,6 +844,13 @@ def group_prod(
             for j in range(K):
                 val = values[i, j]
 
+                if not skipna and (
+                    (uses_mask and result_mask[lab, j]) or
+                    _treat_as_na(prodx[lab, j], False)
+                ):
+                    # If prod is already NA, no need to update it
+                    continue
+
                 if uses_mask:
                     isna_entry = mask[i, j]
                 else:
@@ -844,6 +859,11 @@ def group_prod(
                 if not isna_entry:
                     nobs[lab, j] += 1
                     prodx[lab, j] *= val
+                elif not skipna:
+                    if uses_mask:
+                        result_mask[lab, j] = True
+                    else:
+                        prodx[lab, j] = nan_val
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, prodx
@@ -864,6 +884,7 @@ def group_var(
     uint8_t[:, ::1] result_mask=None,
     bint is_datetimelike=False,
     str name="var",
+    bint skipna=True,
 ) -> None:
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
@@ -898,6 +919,16 @@ def group_var(
             for j in range(K):
                 val = values[i, j]
 
+                if not skipna and (
+                    (uses_mask and result_mask[lab, j]) or
+                    (is_datetimelike and out[lab, j] == NPY_NAT) or
+                    _treat_as_na(out[lab, j], False)
+                ):
+                    # If aggregate is already NA, don't add to it. This is important for
+                    # datetimelike because adding a value to NPY_NAT may not result
+                    # in a NPY_NAT
+                    continue
+
                 if uses_mask:
                     isna_entry = mask[i, j]
                 elif is_datetimelike:
@@ -913,6 +944,12 @@ def group_var(
                     oldmean = mean[lab, j]
                     mean[lab, j] += (val - oldmean) / nobs[lab, j]
                     out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
+                elif not skipna:
+                    nobs[lab, j] = 0
+                    if uses_mask:
+                        result_mask[lab, j] = True
+                    else:
+                        out[lab, j] = NAN
 
         for i in range(ncounts):
             for j in range(K):
diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py
index 69aec4d6522c4..cce5ee10154b1 100644
--- a/pandas/core/_numba/kernels/var_.py
+++ b/pandas/core/_numba/kernels/var_.py
@@ -176,6 +176,7 @@ def grouped_var(
     ngroups: int,
     min_periods: int,
     ddof: int = 1,
+    skipna: bool = True,
 ) -> tuple[np.ndarray, list[int]]:
     N = len(labels)
 
@@ -190,7 +191,15 @@ def grouped_var(
         lab = labels[i]
         val = values[i]
 
-        if lab < 0:
+        if lab < 0 or np.isnan(output[lab]):
+            continue
+
+        if not skipna and np.isnan(val):
+            output[lab] = np.nan
+            nobs_arr[lab] += 1
+            comp_arr[lab] = np.nan
+            consecutive_counts[lab] = 1
+            prev_vals[lab] = np.nan
             continue
 
         mean_x = means[lab]
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index f9059e6e8896f..b6025b29ff2c3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2349,6 +2349,7 @@ def std(
         engine: Literal["cython", "numba"] | None = None,
         engine_kwargs: dict[str, bool] | None = None,
         numeric_only: bool = False,
+        skipna: bool = True,
     ):
         """
         Compute standard deviation of groups, excluding missing values.
@@ -2387,6 +2388,12 @@ def std(
 
                 numeric_only now defaults to ``False``.
 
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 3.0.0
+
         Returns
         -------
         Series or DataFrame
@@ -2441,14 +2448,16 @@ def std(
                     engine_kwargs,
                     min_periods=0,
                     ddof=ddof,
+                    skipna=skipna,
                 )
             )
         else:
             return self._cython_agg_general(
                 "std",
-                alt=lambda x: Series(x, copy=False).std(ddof=ddof),
+                alt=lambda x: Series(x, copy=False).std(ddof=ddof, skipna=skipna),
                 numeric_only=numeric_only,
                 ddof=ddof,
+                skipna=skipna,
             )
 
     @final
@@ -2460,6 +2469,7 @@ def var(
         engine: Literal["cython", "numba"] | None = None,
         engine_kwargs: dict[str, bool] | None = None,
         numeric_only: bool = False,
+        skipna: bool = True,
     ):
         """
         Compute variance of groups, excluding missing values.
@@ -2497,6 +2507,12 @@ def var(
 
                 numeric_only now defaults to ``False``.
 
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 3.0.0
+
         Returns
         -------
         Series or DataFrame
@@ -2550,13 +2566,15 @@ def var(
                 engine_kwargs,
                 min_periods=0,
                 ddof=ddof,
+                skipna=skipna,
             )
         else:
             return self._cython_agg_general(
                 "var",
-                alt=lambda x: Series(x, copy=False).var(ddof=ddof),
+                alt=lambda x: Series(x, copy=False).var(ddof=ddof, skipna=skipna),
                 numeric_only=numeric_only,
                 ddof=ddof,
+                skipna=skipna,
             )
 
     @final
@@ -2686,7 +2704,9 @@ def _value_counts(
         return result.__finalize__(self.obj, method="value_counts")
 
     @final
-    def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT:
+    def sem(
+        self, ddof: int = 1, numeric_only: bool = False, skipna: bool = True
+    ) -> NDFrameT:
         """
         Compute standard error of the mean of groups, excluding missing values.
 
@@ -2706,6 +2726,12 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT:
 
                 numeric_only now defaults to ``False``.
 
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 3.0.0
+
         Returns
         -------
         Series or DataFrame
@@ -2780,9 +2806,10 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT:
             )
         return self._cython_agg_general(
             "sem",
-            alt=lambda x: Series(x, copy=False).sem(ddof=ddof),
+            alt=lambda x: Series(x, copy=False).sem(ddof=ddof, skipna=skipna),
             numeric_only=numeric_only,
             ddof=ddof,
+            skipna=skipna,
         )
 
     @final
@@ -2959,7 +2986,9 @@ def sum(
             return result
 
     @final
-    def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
+    def prod(
+        self, numeric_only: bool = False, min_count: int = 0, skipna: bool = True
+    ) -> NDFrameT:
         """
         Compute prod of group values.
 
@@ -2976,6 +3005,12 @@ def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
             The required number of valid values to perform the operation. If fewer
             than ``min_count`` non-NA values are present the result will be NA.
 
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 3.0.0
+
         Returns
         -------
         Series or DataFrame
@@ -3024,7 +3059,11 @@ def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
         2   30   72
         """
         return self._agg_general(
-            numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
+            numeric_only=numeric_only,
+            min_count=min_count,
+            skipna=skipna,
+            alias="prod",
+            npfunc=np.prod,
         )
 
     @final
diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py
index ca265a1d1108b..1da13d28be9dd 100644
--- a/pandas/tests/groupby/aggregate/test_numba.py
+++ b/pandas/tests/groupby/aggregate/test_numba.py
@@ -186,7 +186,7 @@ def test_multifunc_numba_vs_cython_frame(agg_kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("func", ["sum", "mean"])
+@pytest.mark.parametrize("func", ["sum", "mean", "var", "std"])
 def test_multifunc_numba_vs_cython_frame_noskipna(func):
     pytest.importorskip("numba")
     data = DataFrame(
diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py
index cc69de2581a79..41644a3f6bf20 100644
--- a/pandas/tests/groupby/test_api.py
+++ b/pandas/tests/groupby/test_api.py
@@ -176,14 +176,13 @@ def test_frame_consistency(groupby_func):
     elif groupby_func in ("max", "min"):
         exclude_expected = {"axis", "kwargs", "skipna"}
         exclude_result = {"min_count", "engine", "engine_kwargs"}
-    elif groupby_func in ("sum", "mean"):
+    elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
         exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("std", "var"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
-        exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("median", "prod", "sem"):
+    elif groupby_func in ("median"):
         exclude_expected = {"axis", "kwargs", "skipna"}
+    elif groupby_func in ("prod", "sem"):
+        exclude_expected = {"axis", "kwargs"}
     elif groupby_func in ("bfill", "ffill"):
         exclude_expected = {"inplace", "axis", "limit_area"}
     elif groupby_func in ("cummax", "cummin"):
@@ -237,14 +236,13 @@ def test_series_consistency(request, groupby_func):
     elif groupby_func in ("max", "min"):
         exclude_expected = {"axis", "kwargs", "skipna"}
         exclude_result = {"min_count", "engine", "engine_kwargs"}
-    elif groupby_func in ("sum", "mean"):
+    elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
         exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("std", "var"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
-        exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("median", "prod", "sem"):
+    elif groupby_func in ("median"):
         exclude_expected = {"axis", "kwargs", "skipna"}
+    elif groupby_func in ("prod", "sem"):
+        exclude_expected = {"axis", "kwargs"}
     elif groupby_func in ("bfill", "ffill"):
         exclude_expected = {"inplace", "axis", "limit_area"}
     elif groupby_func in ("cummax", "cummin"):
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 1db12f05e821f..6b60c72636bf8 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -514,6 +514,42 @@ def test_sum_skipna_object(skipna):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "func, values, dtype, result_dtype",
+    [
+        ("prod", [0, 1, 3, np.nan, 4, 5, 6, 7, -8, 9], "float64", "float64"),
+        ("prod", [0, -1, 3, 4, 5, np.nan, 6, 7, 8, 9], "Float64", "Float64"),
+        ("prod", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Int64", "Int64"),
+        ("var", [0, -1, 3, 4, np.nan, 5, 6, 7, 8, 9], "float64", "float64"),
+        ("var", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Float64", "Float64"),
+        ("var", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Int64", "Float64"),
+        ("std", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "float64", "float64"),
+        ("std", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Float64", "Float64"),
+        ("std", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Int64", "Float64"),
+        ("sem", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
+        ("sem", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
+        ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"),
+    ],
+)
+def test_multifunc_skipna(func, values, dtype, result_dtype, skipna):
+    # GH#15675
+    df = DataFrame(
+        {
+            "val": values,
+            "cat": ["A", "B"] * 5,
+        }
+    ).astype({"val": dtype})
+    # We need to recast the expected values to the result_dtype as some operations
+    # change the dtype
+    expected = (
+        df.groupby("cat")["val"]
+        .apply(lambda x: getattr(x, func)(skipna=skipna))
+        .astype(result_dtype)
+    )
+    result = getattr(df.groupby("cat")["val"], func)(skipna=skipna)
+    tm.assert_series_equal(result, expected)
+
+
 def test_cython_median():
     arr = np.random.default_rng(2).standard_normal(1000)
     arr[::2] = np.nan

From 0414465fa060fcc9a76f50005c3b92f65a8721c3 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 17:29:34 -0800
Subject: [PATCH 02/11] Fix docstring error

---
 pandas/core/resample.py | 51 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index b1b8aef31d3c4..c0671c294cd5e 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1440,12 +1440,61 @@ def var(
         return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
 
     @final
-    @doc(GroupBy.sem)
     def sem(
         self,
         ddof: int = 1,
         numeric_only: bool = False,
     ):
+        """
+        Compute standard error of the mean of groups, excluding missing values.
+
+        For multiple groupings, the result index will be a MultiIndex.
+
+        Parameters
+        ----------
+        ddof : int, default 1
+            Degrees of freedom.
+
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
+            .. versionchanged:: 2.0.0
+
+                numeric_only now defaults to ``False``.
+
+        Returns
+        -------
+        Series or DataFrame
+            Standard error of the mean of values within each group.
+
+        See Also
+        --------
+        DataFrame.sem : Return unbiased standard error of the mean over requested axis.
+        Series.sem : Return unbiased standard error of the mean over requested axis.
+
+        Examples
+        --------
+
+        >>> ser = pd.Series(
+        ...     [1, 3, 2, 4, 3, 8],
+        ...     index=pd.DatetimeIndex(
+        ...         [
+        ...             "2023-01-01",
+        ...             "2023-01-10",
+        ...             "2023-01-15",
+        ...             "2023-02-01",
+        ...             "2023-02-10",
+        ...             "2023-02-15",
+        ...         ]
+        ...     ),
+        ... )
+        >>> ser.resample("MS").sem()
+        2023-01-01    0.577350
+        2023-02-01    1.527525
+        Freq: MS, dtype: float64
+        """
         return self._downsample("sem", ddof=ddof, numeric_only=numeric_only)
 
     @final

From e2233f84ab8cb86b0b6b5dda316985a5b695ec69 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 18:38:36 -0800
Subject: [PATCH 03/11] Address review comment and add skipna to min and max

---
 doc/source/whatsnew/v3.0.0.rst          |  2 +-
 pandas/_libs/groupby.pyi                |  2 +
 pandas/_libs/groupby.pyx                | 81 +++++++++++++++----------
 pandas/core/_numba/kernels/min_max_.py  | 10 ++-
 pandas/core/groupby/groupby.py          | 12 +++-
 pandas/tests/groupby/test_api.py        |  4 +-
 pandas/tests/groupby/test_reductions.py | 56 +++++++++++++++++
 7 files changed, 127 insertions(+), 40 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index d7d918c7cafa4..cdc39ae2dac08 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -58,7 +58,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
-- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``prod``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
+- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index 4267a02e43a12..156f107f4e6b4 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -185,6 +185,7 @@ def group_max(
     is_datetimelike: bool = ...,
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_min(
     out: np.ndarray,  # groupby_t[:, ::1]
@@ -195,6 +196,7 @@ def group_min(
     is_datetimelike: bool = ...,
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_idxmin_idxmax(
     out: npt.NDArray[np.intp],
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index f1b034b2382d3..79ae102293468 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -815,7 +815,7 @@ def group_prod(
         int64float_t[:, ::1] prodx
         int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
-        bint isna_entry, uses_mask = mask is not None
+        bint isna_entry, isna_result, uses_mask = mask is not None
 
     if len_values != len_labels:
         raise ValueError("len(index) != len(labels)")
@@ -842,17 +842,16 @@ def group_prod(
             for j in range(K):
                 val = values[i, j]
 
-                if not skipna and (
-                    (uses_mask and result_mask[lab, j]) or
-                    _treat_as_na(prodx[lab, j], False)
-                ):
-                    # If prod is already NA, no need to update it
-                    continue
-
                 if uses_mask:
                     isna_entry = mask[i, j]
+                    isna_result = result_mask[lab, j]
                 else:
                     isna_entry = _treat_as_na(val, False)
+                    isna_result = _treat_as_na(prodx[lab, j], False)
+
+                if not skipna and isna_result:
+                    # If prod is already NA, no need to update it
+                    continue
 
                 if not isna_entry:
                     nobs[lab, j] += 1
@@ -890,7 +889,7 @@ def group_var(
         floating[:, ::1] mean
         int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
-        bint isna_entry, uses_mask = mask is not None
+        bint isna_entry, isna_result, uses_mask = mask is not None
         bint is_std = name == "std"
         bint is_sem = name == "sem"
 
@@ -917,25 +916,24 @@ def group_var(
             for j in range(K):
                 val = values[i, j]
 
-                if not skipna and (
-                    (uses_mask and result_mask[lab, j]) or
-                    (is_datetimelike and out[lab, j] == NPY_NAT) or
-                    _treat_as_na(out[lab, j], False)
-                ):
-                    # If aggregate is already NA, don't add to it. This is important for
-                    # datetimelike because adding a value to NPY_NAT may not result
-                    # in a NPY_NAT
-                    continue
-
                 if uses_mask:
                     isna_entry = mask[i, j]
+                    isna_result = result_mask[lab, j]
                 elif is_datetimelike:
                     # With group_var, we cannot just use _treat_as_na bc
                     #  datetimelike dtypes get cast to float64 instead of
                     #  to int64.
                     isna_entry = val == NPY_NAT
+                    isna_result = out[lab, j] == NPY_NAT
                 else:
                     isna_entry = _treat_as_na(val, is_datetimelike)
+                    isna_result = _treat_as_na(out[lab, j], is_datetimelike)
+
+                if not skipna and isna_result:
+                    # If aggregate is already NA, don't add to it. This is important for
+                    # datetimelike because adding a value to NPY_NAT may not result
+                    # in a NPY_NAT
+                    continue
 
                 if not isna_entry:
                     nobs[lab, j] += 1
@@ -1201,7 +1199,7 @@ def group_mean(
         mean_t[:, ::1] sumx, compensation
         int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
-        bint isna_entry, uses_mask = mask is not None
+        bint isna_entry, isna_result, uses_mask = mask is not None
 
     assert min_count == -1, "'min_count' only used in sum and prod"
 
@@ -1231,25 +1229,24 @@ def group_mean(
             for j in range(K):
                 val = values[i, j]
 
-                if not skipna and (
-                    (uses_mask and result_mask[lab, j]) or
-                    (is_datetimelike and sumx[lab, j] == NPY_NAT) or
-                    _treat_as_na(sumx[lab, j], False)
-                ):
-                    # If sum is already NA, don't add to it. This is important for
-                    # datetimelike because adding a value to NPY_NAT may not result
-                    # in NPY_NAT
-                    continue
-
                 if uses_mask:
                     isna_entry = mask[i, j]
+                    isna_result = result_mask[lab, j]
                 elif is_datetimelike:
                     # With group_mean, we cannot just use _treat_as_na bc
                     #  datetimelike dtypes get cast to float64 instead of
                     #  to int64.
                     isna_entry = val == NPY_NAT
+                    isna_result = sumx[lab, j] == NPY_NAT
                 else:
                     isna_entry = _treat_as_na(val, is_datetimelike)
+                    isna_result = _treat_as_na(sumx[lab, j], is_datetimelike)
+
+                if not skipna and isna_result:
+                    # If sum is already NA, don't add to it. This is important for
+                    # datetimelike because adding a value to NPY_NAT may not result
+                    # in NPY_NAT
+                    continue
 
                 if not isna_entry:
                     nobs[lab, j] += 1
@@ -1843,6 +1840,7 @@ cdef group_min_max(
     bint compute_max=True,
     const uint8_t[:, ::1] mask=None,
     uint8_t[:, ::1] result_mask=None,
+    bint skipna=True,
 ):
     """
     Compute minimum/maximum  of columns of `values`, in row groups `labels`.
@@ -1870,6 +1868,8 @@ cdef group_min_max(
     result_mask : ndarray[bool, ndim=2], optional
         If not None, these specify locations in the output that are NA.
         Modified in-place.
+    skipna : bool, default True
+        If True, ignore nans in `values`.
 
     Notes
     -----
@@ -1878,17 +1878,18 @@ cdef group_min_max(
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ngroups = len(counts)
-        numeric_t val
+        numeric_t val, nan_val
         numeric_t[:, ::1] group_min_or_max
         int64_t[:, ::1] nobs
         bint uses_mask = mask is not None
-        bint isna_entry
+        bint isna_entry, isna_result
 
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
     min_count = max(min_count, 1)
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
+    nan_val = _get_na_val(<numeric_t>0, is_datetimelike)
 
     group_min_or_max = np.empty_like(out)
     group_min_or_max[:] = _get_min_or_max(<numeric_t>0, compute_max, is_datetimelike)
@@ -1907,8 +1908,15 @@ cdef group_min_max(
 
                 if uses_mask:
                     isna_entry = mask[i, j]
+                    isna_result = result_mask[lab, j]
                 else:
                     isna_entry = _treat_as_na(val, is_datetimelike)
+                    isna_result = _treat_as_na(group_min_or_max[lab, j],
+                                               is_datetimelike)
+
+                if not skipna and isna_result:
+                    # If current min/max is already NA, it will always be NA
+                    continue
 
                 if not isna_entry:
                     nobs[lab, j] += 1
@@ -1918,6 +1926,11 @@ cdef group_min_max(
                     else:
                         if val < group_min_or_max[lab, j]:
                             group_min_or_max[lab, j] = val
+                elif not skipna:
+                    if uses_mask:
+                        result_mask[lab, j] = True
+                    else:
+                        group_min_or_max[lab, j] = nan_val
 
     _check_below_mincount(
         out, uses_mask, result_mask, ngroups, K, nobs, min_count, group_min_or_max
@@ -2049,6 +2062,7 @@ def group_max(
     bint is_datetimelike=False,
     const uint8_t[:, ::1] mask=None,
     uint8_t[:, ::1] result_mask=None,
+    bint skipna=True,
 ) -> None:
     """See group_min_max.__doc__"""
     group_min_max(
@@ -2061,6 +2075,7 @@ def group_max(
         compute_max=True,
         mask=mask,
         result_mask=result_mask,
+        skipna=skipna,
     )
 
 
@@ -2075,6 +2090,7 @@ def group_min(
     bint is_datetimelike=False,
     const uint8_t[:, ::1] mask=None,
     uint8_t[:, ::1] result_mask=None,
+    bint skipna=True,
 ) -> None:
     """See group_min_max.__doc__"""
     group_min_max(
@@ -2087,6 +2103,7 @@ def group_min(
         compute_max=False,
         mask=mask,
         result_mask=result_mask,
+        skipna=skipna,
     )
 
 
diff --git a/pandas/core/_numba/kernels/min_max_.py b/pandas/core/_numba/kernels/min_max_.py
index 59d36732ebae6..376d1221b06a6 100644
--- a/pandas/core/_numba/kernels/min_max_.py
+++ b/pandas/core/_numba/kernels/min_max_.py
@@ -80,7 +80,7 @@ def sliding_min_max(
     return output, na_pos
 
 
-@numba.jit(nopython=True, nogil=True, parallel=False)
+@numba.jit(nopython=True, nogil=False, parallel=False)
 def grouped_min_max(
     values: np.ndarray,
     result_dtype: np.dtype,
@@ -88,6 +88,7 @@ def grouped_min_max(
     ngroups: int,
     min_periods: int,
     is_max: bool,
+    skipna: bool = True,
 ) -> tuple[np.ndarray, list[int]]:
     N = len(labels)
     nobs = np.zeros(ngroups, dtype=np.int64)
@@ -97,13 +98,16 @@ def grouped_min_max(
     for i in range(N):
         lab = labels[i]
         val = values[i]
-        if lab < 0:
+        if lab < 0 or (nobs[lab] >= 1 and np.isnan(output[lab])):
             continue
 
         if values.dtype.kind == "i" or not np.isnan(val):
             nobs[lab] += 1
         else:
-            # NaN value cannot be a min/max value
+            if not skipna:
+                # If skipna is False and we encounter a NaN,
+                # both min and max of the group will be NaN
+                output[lab] = np.nan
             continue
 
         if nobs[lab] == 1:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b6025b29ff2c3..c60c0ce5d5404 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3068,12 +3068,13 @@ def prod(
 
     @final
     @doc(
-        _groupby_agg_method_engine_template,
+        _groupby_agg_method_skipna_engine_template,
         fname="min",
         no=False,
         mc=-1,
         e=None,
         ek=None,
+        s=True,
         example=dedent(
             """\
         For SeriesGroupBy:
@@ -3113,6 +3114,7 @@ def min(
         self,
         numeric_only: bool = False,
         min_count: int = -1,
+        skipna: bool = True,
         engine: Literal["cython", "numba"] | None = None,
         engine_kwargs: dict[str, bool] | None = None,
     ):
@@ -3125,23 +3127,26 @@ def min(
                 engine_kwargs,
                 min_periods=min_count,
                 is_max=False,
+                skipna=skipna,
             )
         else:
             return self._agg_general(
                 numeric_only=numeric_only,
                 min_count=min_count,
+                skipna=skipna,
                 alias="min",
                 npfunc=np.min,
             )
 
     @final
     @doc(
-        _groupby_agg_method_engine_template,
+        _groupby_agg_method_skipna_engine_template,
         fname="max",
         no=False,
         mc=-1,
         e=None,
         ek=None,
+        s=True,
         example=dedent(
             """\
         For SeriesGroupBy:
@@ -3181,6 +3186,7 @@ def max(
         self,
         numeric_only: bool = False,
         min_count: int = -1,
+        skipna: bool = True,
         engine: Literal["cython", "numba"] | None = None,
         engine_kwargs: dict[str, bool] | None = None,
     ):
@@ -3193,11 +3199,13 @@ def max(
                 engine_kwargs,
                 min_periods=min_count,
                 is_max=True,
+                skipna=skipna,
             )
         else:
             return self._agg_general(
                 numeric_only=numeric_only,
                 min_count=min_count,
+                skipna=skipna,
                 alias="max",
                 npfunc=np.max,
             )
diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py
index 41644a3f6bf20..2be05ea9daa10 100644
--- a/pandas/tests/groupby/test_api.py
+++ b/pandas/tests/groupby/test_api.py
@@ -174,7 +174,7 @@ def test_frame_consistency(groupby_func):
     elif groupby_func in ("nunique",):
         exclude_expected = {"axis"}
     elif groupby_func in ("max", "min"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
+        exclude_expected = {"axis", "kwargs"}
         exclude_result = {"min_count", "engine", "engine_kwargs"}
     elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
@@ -234,7 +234,7 @@ def test_series_consistency(request, groupby_func):
     if groupby_func in ("any", "all"):
         exclude_expected = {"kwargs", "bool_only", "axis"}
     elif groupby_func in ("max", "min"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
+        exclude_expected = {"axis", "kwargs"}
         exclude_result = {"min_count", "engine", "engine_kwargs"}
     elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 6b60c72636bf8..17dead27d9eed 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -529,6 +529,62 @@ def test_sum_skipna_object(skipna):
         ("sem", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
         ("sem", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
         ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"),
+        ("min", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
+        ("min", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
+        ("min", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"),
+        (
+            "min",
+            [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9],
+            "timedelta64[ns]",
+            "timedelta64[ns]",
+        ),
+        (
+            "min",
+            pd.to_datetime(
+                [
+                    "2019-05-09",
+                    pd.NaT,
+                    "2019-05-11",
+                    "2019-05-12",
+                    "2019-05-13",
+                    "2019-05-14",
+                    "2019-05-15",
+                    "2019-05-16",
+                    "2019-05-17",
+                    "2019-05-18",
+                ]
+            ),
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
+        ("max", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
+        ("max", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
+        ("max", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"),
+        (
+            "max",
+            [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9],
+            "timedelta64[ns]",
+            "timedelta64[ns]",
+        ),
+        (
+            "max",
+            pd.to_datetime(
+                [
+                    "2019-05-09",
+                    pd.NaT,
+                    "2019-05-11",
+                    "2019-05-12",
+                    "2019-05-13",
+                    "2019-05-14",
+                    "2019-05-15",
+                    "2019-05-16",
+                    "2019-05-17",
+                    "2019-05-18",
+                ]
+            ),
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
     ],
 )
 def test_multifunc_skipna(func, values, dtype, result_dtype, skipna):

From 0c58a7dda16aa7af9d768a96a1b605e407e4f944 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 18:43:16 -0800
Subject: [PATCH 04/11] Undo temporary change

---
 pandas/core/_numba/kernels/min_max_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/_numba/kernels/min_max_.py b/pandas/core/_numba/kernels/min_max_.py
index 376d1221b06a6..d56453e4e5abf 100644
--- a/pandas/core/_numba/kernels/min_max_.py
+++ b/pandas/core/_numba/kernels/min_max_.py
@@ -80,7 +80,7 @@ def sliding_min_max(
     return output, na_pos
 
 
-@numba.jit(nopython=True, nogil=False, parallel=False)
+@numba.jit(nopython=True, nogil=True, parallel=False)
 def grouped_min_max(
     values: np.ndarray,
     result_dtype: np.dtype,

From e259679825fe956d3d2dd0d0ae191a22662bfc47 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 19:02:47 -0800
Subject: [PATCH 05/11] Add skipna to groupby median

---
 doc/source/whatsnew/v3.0.0.rst          |  2 +-
 pandas/_libs/groupby.pyi                |  1 +
 pandas/_libs/groupby.pyx                | 19 +++++++++++------
 pandas/core/groupby/groupby.py          | 13 ++++++++++--
 pandas/tests/groupby/test_api.py        |  8 ++-----
 pandas/tests/groupby/test_reductions.py | 28 +++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index cdc39ae2dac08..b2efc8d22bb52 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -58,7 +58,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
-- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
+- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index 156f107f4e6b4..163fc23535022 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -13,6 +13,7 @@ def group_median_float64(
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
     is_datetimelike: bool = ...,  # bint
+    skipna: bool = ...,
 ) -> None: ...
 def group_cumprod(
     out: np.ndarray,  # float64_t[:, ::1]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 79ae102293468..7bae7f40f80d9 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -62,7 +62,12 @@ cdef enum InterpolationEnumType:
     INTERPOLATION_MIDPOINT
 
 
-cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept nogil:
+cdef float64_t median_linear_mask(
+    float64_t* a,
+    int n,
+    uint8_t* mask,
+    bint skipna=True
+) noexcept nogil:
     cdef:
         int i, j, na_count = 0
         float64_t* tmp
@@ -77,7 +82,7 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n
             na_count += 1
 
     if na_count:
-        if na_count == n:
+        if na_count == n or not skipna:
             return NaN
 
         tmp = <float64_t*>malloc((n - na_count) * sizeof(float64_t))
@@ -104,7 +109,8 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n
 cdef float64_t median_linear(
     float64_t* a,
     int n,
-    bint is_datetimelike=False
+    bint is_datetimelike=False,
+    bint skipna=True,
 ) noexcept nogil:
     cdef:
         int i, j, na_count = 0
@@ -125,7 +131,7 @@ cdef float64_t median_linear(
                 na_count += 1
 
     if na_count:
-        if na_count == n:
+        if na_count == n or not skipna:
             return NaN
 
         tmp = <float64_t*>malloc((n - na_count) * sizeof(float64_t))
@@ -186,6 +192,7 @@ def group_median_float64(
     const uint8_t[:, :] mask=None,
     uint8_t[:, ::1] result_mask=None,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
@@ -229,7 +236,7 @@ def group_median_float64(
 
                 for j in range(ngroups):
                     size = _counts[j + 1]
-                    result = median_linear_mask(ptr, size, ptr_mask)
+                    result = median_linear_mask(ptr, size, ptr_mask, skipna)
                     out[j, i] = result
 
                     if result != result:
@@ -244,7 +251,7 @@ def group_median_float64(
                 ptr += _counts[0]
                 for j in range(ngroups):
                     size = _counts[j + 1]
-                    out[j, i] = median_linear(ptr, size, is_datetimelike)
+                    out[j, i] = median_linear(ptr, size, is_datetimelike, skipna)
                     ptr += size
 
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c60c0ce5d5404..7c3088bea4b76 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2248,7 +2248,7 @@ def mean(
             return result.__finalize__(self.obj, method="groupby")
 
     @final
-    def median(self, numeric_only: bool = False) -> NDFrameT:
+    def median(self, numeric_only: bool = False, skipna: bool = True) -> NDFrameT:
         """
         Compute median of groups, excluding missing values.
 
@@ -2263,6 +2263,12 @@ def median(self, numeric_only: bool = False) -> NDFrameT:
 
                 numeric_only no longer accepts ``None`` and defaults to False.
 
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 3.0.0
+
         Returns
         -------
         Series or DataFrame
@@ -2335,8 +2341,11 @@ def median(self, numeric_only: bool = False) -> NDFrameT:
         """
         result = self._cython_agg_general(
             "median",
-            alt=lambda x: Series(x, copy=False).median(numeric_only=numeric_only),
+            alt=lambda x: Series(x, copy=False).median(
+                numeric_only=numeric_only, skipna=skipna
+            ),
             numeric_only=numeric_only,
+            skipna=skipna,
         )
         return result.__finalize__(self.obj, method="groupby")
 
diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py
index 2be05ea9daa10..215e627abb018 100644
--- a/pandas/tests/groupby/test_api.py
+++ b/pandas/tests/groupby/test_api.py
@@ -179,9 +179,7 @@ def test_frame_consistency(groupby_func):
     elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
         exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("median"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
-    elif groupby_func in ("prod", "sem"):
+    elif groupby_func in ("median", "prod", "sem"):
         exclude_expected = {"axis", "kwargs"}
     elif groupby_func in ("bfill", "ffill"):
         exclude_expected = {"inplace", "axis", "limit_area"}
@@ -239,9 +237,7 @@ def test_series_consistency(request, groupby_func):
     elif groupby_func in ("sum", "mean", "std", "var"):
         exclude_expected = {"axis", "kwargs"}
         exclude_result = {"engine", "engine_kwargs"}
-    elif groupby_func in ("median"):
-        exclude_expected = {"axis", "kwargs", "skipna"}
-    elif groupby_func in ("prod", "sem"):
+    elif groupby_func in ("median", "prod", "sem"):
         exclude_expected = {"axis", "kwargs"}
     elif groupby_func in ("bfill", "ffill"):
         exclude_expected = {"inplace", "axis", "limit_area"}
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 17dead27d9eed..35d3abcb761e2 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -585,6 +585,34 @@ def test_sum_skipna_object(skipna):
             "datetime64[ns]",
             "datetime64[ns]",
         ),
+        ("median", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
+        ("median", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
+        ("median", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"),
+        (
+            "median",
+            [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9],
+            "timedelta64[ns]",
+            "timedelta64[ns]",
+        ),
+        (
+            "median",
+            pd.to_datetime(
+                [
+                    "2019-05-09",
+                    pd.NaT,
+                    "2019-05-11",
+                    "2019-05-12",
+                    "2019-05-13",
+                    "2019-05-14",
+                    "2019-05-15",
+                    "2019-05-16",
+                    "2019-05-17",
+                    "2019-05-18",
+                ]
+            ),
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
     ],
 )
 def test_multifunc_skipna(func, values, dtype, result_dtype, skipna):

From f40aa16e0d6626e942db2ef969cbab504f3d7bc2 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 19:21:38 -0800
Subject: [PATCH 06/11] Fix docstring error

---
 pandas/core/resample.py | 47 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index c0671c294cd5e..82207587d60be 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1259,8 +1259,53 @@ def last(
         )
 
     @final
-    @doc(GroupBy.median)
     def median(self, numeric_only: bool = False):
+        """
+        Compute median of groups, excluding missing values.
+
+        For multiple groupings, the result index will be a MultiIndex
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+            .. versionchanged:: 2.0.0
+
+                numeric_only no longer accepts ``None`` and defaults to False.
+
+        Returns
+        -------
+        Series or DataFrame
+            Median of values within each group.
+
+        See Also
+        --------
+        Series.groupby : Apply a function groupby to a Series.
+        DataFrame.groupby : Apply a function groupby to each row or column of a
+            DataFrame.
+
+        Examples
+        --------
+
+        >>> ser = pd.Series(
+        ...     [1, 2, 3, 3, 4, 5],
+        ...     index=pd.DatetimeIndex(
+        ...         [
+        ...             "2023-01-01",
+        ...             "2023-01-10",
+        ...             "2023-01-15",
+        ...             "2023-02-01",
+        ...             "2023-02-10",
+        ...             "2023-02-15",
+        ...         ]
+        ...     ),
+        ... )
+        >>> ser.resample("MS").median()
+        2023-01-01    2.0
+        2023-02-01    4.0
+        Freq: MS, dtype: float64
+        """
         return self._downsample("median", numeric_only=numeric_only)
 
     @final

From 574708efcb9d8b476f0b8df442358f6ef3343790 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Tue, 21 Jan 2025 20:30:46 -0800
Subject: [PATCH 07/11] Add min and max to groupby numba vs cython test

---
 pandas/tests/groupby/aggregate/test_numba.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py
index 1da13d28be9dd..0cd8a14d97eb0 100644
--- a/pandas/tests/groupby/aggregate/test_numba.py
+++ b/pandas/tests/groupby/aggregate/test_numba.py
@@ -186,7 +186,7 @@ def test_multifunc_numba_vs_cython_frame(agg_kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("func", ["sum", "mean", "var", "std"])
+@pytest.mark.parametrize("func", ["sum", "mean", "var", "std", "min", "max"])
 def test_multifunc_numba_vs_cython_frame_noskipna(func):
     pytest.importorskip("numba")
     data = DataFrame(

From a1444c98458e0b06b0b846bafe6ab71d5158622f Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Wed, 22 Jan 2025 14:50:24 -0800
Subject: [PATCH 08/11] Use _get_na_val to determine nan_val in group_prod

---
 pandas/_libs/groupby.pyx | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 7bae7f40f80d9..16a104a46ed3d 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -831,13 +831,7 @@ def group_prod(
     prodx = np.ones((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
-    if uses_mask:
-        nan_val = 0
-    elif int64float_t is int64_t or int64float_t is uint64_t:
-        # This has no effect as int64 can't be nan. Setting to 0 to avoid type error
-        nan_val = 0
-    else:
-        nan_val = NAN
+    nan_val = _get_na_val(<int64float_t>0, False)
 
     with nogil:
         for i in range(N):

From d31aa796c317dc6ab35505a0cf9bb43602ce41f1 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Sat, 25 Jan 2025 14:03:48 -0800
Subject: [PATCH 09/11] Add test for all-NA case

---
 pandas/tests/groupby/test_reductions.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index 35d3abcb761e2..ea876cfdf4933 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -520,15 +520,27 @@ def test_sum_skipna_object(skipna):
         ("prod", [0, 1, 3, np.nan, 4, 5, 6, 7, -8, 9], "float64", "float64"),
         ("prod", [0, -1, 3, 4, 5, np.nan, 6, 7, 8, 9], "Float64", "Float64"),
         ("prod", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Int64", "Int64"),
+        ("prod", [np.nan] * 10, "float64", "float64"),
+        ("prod", [np.nan] * 10, "Float64", "Float64"),
+        ("prod", [np.nan] * 10, "Int64", "Int64"),
         ("var", [0, -1, 3, 4, np.nan, 5, 6, 7, 8, 9], "float64", "float64"),
         ("var", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Float64", "Float64"),
         ("var", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Int64", "Float64"),
+        ("var", [np.nan] * 10, "float64", "float64"),
+        ("var", [np.nan] * 10, "Float64", "Float64"),
+        ("var", [np.nan] * 10, "Int64", "Float64"),
         ("std", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "float64", "float64"),
         ("std", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Float64", "Float64"),
         ("std", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Int64", "Float64"),
+        ("std", [np.nan] * 10, "float64", "float64"),
+        ("std", [np.nan] * 10, "Float64", "Float64"),
+        ("std", [np.nan] * 10, "Int64", "Float64"),
         ("sem", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
         ("sem", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
         ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"),
+        ("sem", [np.nan] * 10, "float64", "float64"),
+        ("sem", [np.nan] * 10, "Float64", "Float64"),
+        ("sem", [np.nan] * 10, "Int64", "Float64"),
         ("min", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
         ("min", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
         ("min", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"),
@@ -557,6 +569,9 @@ def test_sum_skipna_object(skipna):
             "datetime64[ns]",
             "datetime64[ns]",
         ),
+        ("min", [np.nan] * 10, "float64", "float64"),
+        ("min", [np.nan] * 10, "Float64", "Float64"),
+        ("min", [np.nan] * 10, "Int64", "Int64"),
         ("max", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
         ("max", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
         ("max", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"),
@@ -585,6 +600,9 @@ def test_sum_skipna_object(skipna):
             "datetime64[ns]",
             "datetime64[ns]",
         ),
+        ("max", [np.nan] * 10, "float64", "float64"),
+        ("max", [np.nan] * 10, "Float64", "Float64"),
+        ("max", [np.nan] * 10, "Int64", "Int64"),
         ("median", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"),
         ("median", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"),
         ("median", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"),
@@ -613,6 +631,9 @@ def test_sum_skipna_object(skipna):
             "datetime64[ns]",
             "datetime64[ns]",
         ),
+        ("median", [np.nan] * 10, "float64", "float64"),
+        ("median", [np.nan] * 10, "Float64", "Float64"),
+        ("median", [np.nan] * 10, "Int64", "Float64"),
     ],
 )
 def test_multifunc_skipna(func, values, dtype, result_dtype, skipna):

From 7a30d590bbe7306e94535fda21c02b22643d5b19 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Mon, 27 Jan 2025 15:58:19 -0800
Subject: [PATCH 10/11] Address review comment

---
 pandas/core/_numba/kernels/var_.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py
index cce5ee10154b1..fd52f474dba47 100644
--- a/pandas/core/_numba/kernels/var_.py
+++ b/pandas/core/_numba/kernels/var_.py
@@ -196,7 +196,6 @@ def grouped_var(
 
         if not skipna and np.isnan(val):
             output[lab] = np.nan
-            nobs_arr[lab] += 1
             comp_arr[lab] = np.nan
             consecutive_counts[lab] = 1
             prev_vals[lab] = np.nan

From 0fc49df08fb81233750a3007bc8b5b2cd5b5e675 Mon Sep 17 00:00:00 2001
From: Nitish Satyavolu <snitish.iitk@gmail.com>
Date: Mon, 27 Jan 2025 16:06:33 -0800
Subject: [PATCH 11/11] Remove more no-op lines

---
 pandas/core/_numba/kernels/var_.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py
index fd52f474dba47..5d720c877815d 100644
--- a/pandas/core/_numba/kernels/var_.py
+++ b/pandas/core/_numba/kernels/var_.py
@@ -196,9 +196,6 @@ def grouped_var(
 
         if not skipna and np.isnan(val):
             output[lab] = np.nan
-            comp_arr[lab] = np.nan
-            consecutive_counts[lab] = 1
-            prev_vals[lab] = np.nan
             continue
 
         mean_x = means[lab]