pandas-dev
diff --git a/‎.github/workflows/unit-tests.yml
Lines changed: 13 additions & 3 deletions b/‎.github/workflows/unit-tests.yml
Lines changed: 13 additions & 3 deletions
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/wheels.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 47 additions & 1 deletion b/‎pandas/_libs/groupby.pyx
Lines changed: 47 additions & 1 deletion
diff --git a/‎pandas/core/_numba/kernels/mean_.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/_numba/kernels/mean_.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/_numba/kernels/sum_.py
Lines changed: 12 additions & 2 deletions b/‎pandas/core/_numba/kernels/sum_.py
Lines changed: 12 additions & 2 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/frame.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/generic.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/generic.py
Lines changed: 2 additions & 1 deletion
@@ -22,10 +22,11 @@ defaults:
 
 jobs:
   ubuntu:
-    runs-on: ubuntu-22.04
+    runs-on: ${{ matrix.platform }}
     timeout-minutes: 90
     strategy:
       matrix:
+        platform: [ubuntu-22.04, ubuntu-24.04-arm]
         env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
         # Prevent the include jobs from overriding other jobs
         pattern: [""]
@@ -35,9 +36,11 @@ jobs:
             env_file: actions-311-downstream_compat.yaml
             pattern: "not slow and not network and not single_cpu"
             pytest_target: "pandas/tests/test_downstream.py"
+            platform: ubuntu-22.04
           - name: "Minimum Versions"
             env_file: actions-310-minimum_versions.yaml
             pattern: "not slow and not network and not single_cpu"
+            platform: ubuntu-22.04
           - name: "Locale: it_IT"
             env_file: actions-311.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -48,6 +51,7 @@ jobs:
             # Also install it_IT (its encoding is ISO8859-1) but do not activate it.
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "it_IT"
+            platform: ubuntu-22.04
           - name: "Locale: zh_CN"
             env_file: actions-311.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -58,25 +62,31 @@ jobs:
             # Also install zh_CN (its encoding is gb2312) but do not activate it.
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "zh_CN"
+            platform: ubuntu-22.04
           - name: "Future infer strings"
             env_file: actions-312.yaml
             pandas_future_infer_string: "1"
+            platform: ubuntu-22.04
           - name: "Future infer strings (without pyarrow)"
             env_file: actions-311.yaml
             pandas_future_infer_string: "1"
+            platform: ubuntu-22.04
           - name: "Pypy"
             env_file: actions-pypy-39.yaml
             pattern: "not slow and not network and not single_cpu"
             test_args: "--max-worker-restart 0"
+            platform: ubuntu-22.04
           - name: "Numpy Dev"
             env_file: actions-311-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
             test_args: "-W error::DeprecationWarning -W error::FutureWarning"
+            platform: ubuntu-22.04
           - name: "Pyarrow Nightly"
             env_file: actions-311-pyarrownightly.yaml
             pattern: "not slow and not network and not single_cpu"
+            platform: ubuntu-22.04
       fail-fast: false
-    name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
+    name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}-${{ matrix.platform }}
     env:
       PATTERN: ${{ matrix.pattern }}
       LANG: ${{ matrix.lang || 'C.UTF-8' }}
@@ -91,7 +101,7 @@ jobs:
       REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
     concurrency:
       # https://github.community/t/concurrecy-not-work-for-push/183068/7
-      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }}
       cancel-in-progress: true
 
     services:
 
@@ -94,6 +94,7 @@ jobs:
         buildplat:
         - [ubuntu-22.04, manylinux_x86_64]
         - [ubuntu-22.04, musllinux_x86_64]
+        - [ubuntu-24.04-arm, manylinux_aarch64]
         - [macos-13, macosx_x86_64]
         # Note: M1 images on Github Actions start from macOS 14
         - [macos-14, macosx_arm64]
 
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
+- :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`)
 - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
@@ -59,6 +60,7 @@ Other enhancements
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
+- :meth:`.DataFrameGroupBy.mean`, :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.mean` and :meth:`.SeriesGroupBy.sum` now accept ``skipna`` parameter (:issue:`15675`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 
@@ -66,6 +66,7 @@ def group_sum(
     result_mask: np.ndarray | None = ...,
     min_count: int = ...,
     is_datetimelike: bool = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_prod(
     out: np.ndarray,  # int64float_t[:, ::1]
@@ -115,6 +116,7 @@ def group_mean(
     is_datetimelike: bool = ...,  # bint
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_ohlc(
     out: np.ndarray,  # floatingintuint_t[:, ::1]
 
@@ -700,13 +700,14 @@ def group_sum(
     uint8_t[:, ::1] result_mask=None,
     Py_ssize_t min_count=0,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0 using Kahan summation
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        sum_t val, t, y
+        sum_t val, t, y, nan_val
         sum_t[:, ::1] sumx, compensation
         int64_t[:, ::1] nobs
         Py_ssize_t len_values = len(values), len_labels = len(labels)
@@ -722,6 +723,15 @@ def group_sum(
     compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
+    if uses_mask:
+        nan_val = 0
+    elif is_datetimelike:
+        nan_val = NPY_NAT
+    elif sum_t is int64_t or sum_t is uint64_t:
+        # This has no effect as int64 can't be nan. Setting to 0 to avoid type error
+        nan_val = 0
+    else:
+        nan_val = NAN
 
     with nogil(sum_t is not object):
         for i in range(N):
@@ -734,6 +744,16 @@ def group_sum(
             for j in range(K):
                 val = values[i, j]
 
+                if not skipna and (
+                    (uses_mask and result_mask[lab, j]) or
+                    (is_datetimelike and sumx[lab, j] == NPY_NAT) or
+                    _treat_as_na(sumx[lab, j], False)
+                ):
+                    # If sum is already NA, don't add to it. This is important for
+                    # datetimelikebecause adding a value to NPY_NAT may not result
+                    # in a NPY_NAT
+                    continue
+
                 if uses_mask:
                     isna_entry = mask[i, j]
                 else:
@@ -765,6 +785,11 @@ def group_sum(
                             # because of no gil
                             compensation[lab, j] = 0
                         sumx[lab, j] = t
+                elif not skipna:
+                    if uses_mask:
+                        result_mask[lab, j] = True
+                    else:
+                        sumx[lab, j] = nan_val
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, sumx
@@ -1100,6 +1125,7 @@ def group_mean(
     bint is_datetimelike=False,
     const uint8_t[:, ::1] mask=None,
     uint8_t[:, ::1] result_mask=None,
+    bint skipna=True,
 ) -> None:
     """
     Compute the mean per label given a label assignment for each value.
@@ -1125,6 +1151,8 @@ def group_mean(
         Mask of the input values.
     result_mask : ndarray[bool, ndim=2], optional
         Mask of the out array
+    skipna : bool, optional
+        If True, ignore nans in `values`.
 
     Notes
     -----
@@ -1168,6 +1196,16 @@ def group_mean(
             for j in range(K):
                 val = values[i, j]
 
+                if not skipna and (
+                    (uses_mask and result_mask[lab, j]) or
+                    (is_datetimelike and sumx[lab, j] == NPY_NAT) or
+                    _treat_as_na(sumx[lab, j], False)
+                ):
+                    # If sum is already NA, don't add to it. This is important for
+                    # datetimelike because adding a value to NPY_NAT may not result
+                    # in NPY_NAT
+                    continue
+
                 if uses_mask:
                     isna_entry = mask[i, j]
                 elif is_datetimelike:
@@ -1191,6 +1229,14 @@ def group_mean(
                         # because of no gil
                         compensation[lab, j] = 0.
                     sumx[lab, j] = t
+                elif not skipna:
+                    # Set the nobs to 0 so that in case of datetimelike,
+                    # dividing NPY_NAT by nobs may not result in a NPY_NAT
+                    nobs[lab, j] = 0
+                    if uses_mask:
+                        result_mask[lab, j] = True
+                    else:
+                        sumx[lab, j] = nan_val
 
         for i in range(ncounts):
             for j in range(K):
 
@@ -169,9 +169,10 @@ def grouped_mean(
     labels: npt.NDArray[np.intp],
     ngroups: int,
     min_periods: int,
+    skipna: bool,
 ) -> tuple[np.ndarray, list[int]]:
     output, nobs_arr, comp_arr, consecutive_counts, prev_vals = grouped_kahan_sum(
-        values, result_dtype, labels, ngroups
+        values, result_dtype, labels, ngroups, skipna
     )
 
     # Post-processing, replace sums that don't satisfy min_periods
 
@@ -165,6 +165,7 @@ def grouped_kahan_sum(
     result_dtype: np.dtype,
     labels: npt.NDArray[np.intp],
     ngroups: int,
+    skipna: bool,
 ) -> tuple[
     np.ndarray, npt.NDArray[np.int64], np.ndarray, npt.NDArray[np.int64], np.ndarray
 ]:
@@ -180,7 +181,15 @@ def grouped_kahan_sum(
         lab = labels[i]
         val = values[i]
 
-        if lab < 0:
+        if lab < 0 or np.isnan(output[lab]):
+            continue
+
+        if not skipna and np.isnan(val):
+            output[lab] = np.nan
+            nobs_arr[lab] += 1
+            comp_arr[lab] = np.nan
+            consecutive_counts[lab] = 1
+            prev_vals[lab] = np.nan
             continue
 
         sum_x = output[lab]
@@ -219,11 +228,12 @@ def grouped_sum(
     labels: npt.NDArray[np.intp],
     ngroups: int,
     min_periods: int,
+    skipna: bool,
 ) -> tuple[np.ndarray, list[int]]:
     na_pos = []
 
     output, nobs_arr, comp_arr, consecutive_counts, prev_vals = grouped_kahan_sum(
-        values, result_dtype, labels, ngroups
+        values, result_dtype, labels, ngroups, skipna
     )
 
     # Post-processing, replace sums that don't satisfy min_periods
 
@@ -6890,7 +6890,8 @@ def sort_values(
             builtin :meth:`sorted` function, with the notable difference that
             this `key` function should be *vectorized*. It should expect a
             ``Series`` and return a Series with the same shape as the input.
-            It will be applied to each column in `by` independently.
+            It will be applied to each column in `by` independently. The values in the
+            returned Series will be used as the keys for sorting.
 
         Returns
         -------
 
@@ -4884,7 +4884,8 @@ def sort_values(
             builtin :meth:`sorted` function, with the notable difference that
             this `key` function should be *vectorized*. It should expect a
             ``Series`` and return a Series with the same shape as the input.
-            It will be applied to each column in `by` independently.
+            It will be applied to each column in `by` independently. The values in the
+            returned Series will be used as the keys for sorting.
 
         Returns
         -------