attack68
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 32 additions & 0 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.3.4.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/whatsnew/v1.3.4.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 32 additions & 18 deletions b/‎pandas/_libs/index.pyx
Lines changed: 32 additions & 18 deletions
diff --git a/‎pandas/core/_numba/__init__.py b/‎pandas/core/_numba/__init__.py
diff --git a/‎pandas/core/_numba/executor.py
Lines changed: 59 additions & 0 deletions b/‎pandas/core/_numba/executor.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎pandas/core/_numba/kernels/__init__.py
Lines changed: 3 additions & 0 deletions b/‎pandas/core/_numba/kernels/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/_numba/kernels/mean_.py
Lines changed: 119 additions & 0 deletions b/‎pandas/core/_numba/kernels/mean_.py
Lines changed: 119 additions & 0 deletions
diff --git a/‎pandas/core/apply.py
Lines changed: 2 additions & 3 deletions b/‎pandas/core/apply.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 2 additions & 0 deletions b/‎pandas/core/generic.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/groupby/ops.py
Lines changed: 7 additions & 1 deletion b/‎pandas/core/groupby/ops.py
Lines changed: 7 additions & 1 deletion
@@ -603,6 +603,38 @@ def time_sum(self):
         self.df.groupby(["a"])["b"].sum()
 
 
+class String:
+    # GH#41596
+    param_names = ["dtype", "method"]
+    params = [
+        ["str", "string[python]"],
+        [
+            "sum",
+            "prod",
+            "min",
+            "max",
+            "mean",
+            "median",
+            "var",
+            "first",
+            "last",
+            "any",
+            "all",
+        ],
+    ]
+
+    def setup(self, dtype, method):
+        cols = list("abcdefghjkl")
+        self.df = DataFrame(
+            np.random.randint(0, 100, size=(1_000_000, len(cols))),
+            columns=cols,
+            dtype=dtype,
+        )
+
+    def time_str_func(self, dtype, method):
+        self.df.groupby("a")[self.df.columns[1:]].agg(method)
+
+
 class Categories:
     def setup(self):
         N = 10 ** 5
 
@@ -17,10 +17,11 @@ Fixed regressions
 - Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`)
 - Fixed regression in :meth:`DataFrame.corr` raising ``ValueError`` with ``method="spearman"`` on 32-bit platforms (:issue:`43588`)
 - Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
+- Fixed performance regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` with :class:`StringDtype` (:issue:`41596`)
 - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
 - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
 - Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`)
--
+- Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 
 .. ---------------------------------------------------------------------------
 
@@ -29,6 +30,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`.GroupBy.mean` with datetimelike values including ``NaT`` values returning incorrect results (:issue:`43132`)
+- Fixed bug in :meth:`Series.aggregate` not passing the first ``args`` to the user supplied ``func`` in certain cases (:issue:`43357`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -357,7 +357,7 @@ Performance improvements
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
 - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
--
+- Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -87,11 +87,7 @@ cdef class IndexEngine:
             values = self.values
 
             self._check_type(val)
-            try:
-                loc = _bin_search(values, val)  # .searchsorted(val, side='left')
-            except TypeError:
-                # GH#35788 e.g. val=None with float64 values
-                raise KeyError(val)
+            loc = self._searchsorted_left(val)
             if loc >= len(values):
                 raise KeyError(val)
             if values[loc] != val:
@@ -110,6 +106,17 @@ cdef class IndexEngine:
             # GH#41775 OverflowError e.g. if we are uint64 and val is -1
             raise KeyError(val)
 
+    cdef Py_ssize_t _searchsorted_left(self, val) except? -1:
+        """
+        See ObjectEngine._searchsorted_left.__doc__.
+        """
+        try:
+            loc = self.values.searchsorted(val, side="left")
+        except TypeError as err:
+            # GH#35788 e.g. val=None with float64 values
+            raise KeyError(val)
+        return loc
+
     cdef inline _get_loc_duplicates(self, object val):
         # -> Py_ssize_t | slice | ndarray[bool]
         cdef:
@@ -373,6 +380,11 @@ cdef class IndexEngine:
 
 
 cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
+    # GH#1757 ndarray.searchsorted is not safe to use with array of tuples
+    #  (treats a tuple `val` as a sequence of keys instead of a single key),
+    #  so we implement something similar.
+    # This is equivalent to the stdlib's bisect.bisect_left
+
     cdef:
         Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
         object pval
@@ -405,6 +417,15 @@ cdef class ObjectEngine(IndexEngine):
     cdef _make_hash_table(self, Py_ssize_t n):
         return _hash.PyObjectHashTable(n)
 
+    cdef Py_ssize_t _searchsorted_left(self, val) except? -1:
+        # using values.searchsorted here would treat a tuple `val` as a sequence
+        #  instead of a single key, so we use a different implementation
+        try:
+            loc = _bin_search(self.values, val)
+        except TypeError as err:
+            raise KeyError(val) from err
+        return loc
+
 
 cdef class DatetimeEngine(Int64Engine):
 
@@ -418,19 +439,12 @@ cdef class DatetimeEngine(Int64Engine):
     def __contains__(self, val: object) -> bool:
         # We assume before we get here:
         #  - val is hashable
-        cdef:
-            int64_t loc, conv
-
-        conv = self._unbox_scalar(val)
-        if self.over_size_threshold and self.is_monotonic_increasing:
-            if not self.is_unique:
-                return self._get_loc_duplicates(conv)
-            values = self.values
-            loc = values.searchsorted(conv, side='left')
-            return values[loc] == conv
-
-        self._ensure_mapping_populated()
-        return conv in self.mapping
+        self._unbox_scalar(val)
+        try:
+            self.get_loc(val)
+            return True
+        except KeyError:
+            return False
 
     cdef _call_monotonic(self, values):
         return algos.is_monotonic(values, timelike=True)
 
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+
+from pandas._typing import Scalar
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.core.util.numba_ import (
+    NUMBA_FUNC_CACHE,
+    get_jit_arguments,
+)
+
+
+def generate_shared_aggregator(
+    func: Callable[..., Scalar],
+    engine_kwargs: dict[str, bool] | None,
+    cache_key_str: str,
+):
+    """
+    Generate a Numba function that loops over the columns 2D object and applies
+    a 1D numba kernel over each column.
+
+    Parameters
+    ----------
+    func : function
+        aggregation function to be applied to each column
+    engine_kwargs : dict
+        dictionary of arguments to be passed into numba.jit
+    cache_key_str: str
+        string to access the compiled function of the form
+        <caller_type>_<aggregation_type> e.g. rolling_mean, groupby_mean
+
+    Returns
+    -------
+    Numba function
+    """
+    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, None)
+
+    cache_key = (func, cache_key_str)
+    if cache_key in NUMBA_FUNC_CACHE:
+        return NUMBA_FUNC_CACHE[cache_key]
+
+    numba = import_optional_dependency("numba")
+
+    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+    def column_looper(
+        values: np.ndarray,
+        start: np.ndarray,
+        end: np.ndarray,
+        min_periods: int,
+    ):
+        result = np.empty((len(start), values.shape[1]), dtype=np.float64)
+        for i in numba.prange(values.shape[1]):
+            result[:, i] = func(values[:, i], start, end, min_periods)
+        return result
+
+    return column_looper
@@ -0,0 +1,3 @@
+from pandas.core._numba.kernels.mean_ import sliding_mean
+
+__all__ = ["sliding_mean"]
@@ -0,0 +1,119 @@
+"""
+Numba 1D aggregation kernels that can be shared by
+* Dataframe / Series
+* groupby
+* rolling / expanding
+
+Mirrors pandas/_libs/window/aggregation.pyx
+"""
+from __future__ import annotations
+
+import numba
+import numpy as np
+
+
+@numba.jit(nopython=True, nogil=True, parallel=False)
+def is_monotonic_increasing(bounds: np.ndarray) -> bool:
+    """Check if int64 values are monotonically increasing."""
+    n = len(bounds)
+    if n < 2:
+        return True
+    prev = bounds[0]
+    for i in range(1, n):
+        cur = bounds[i]
+        if cur < prev:
+            return False
+        prev = cur
+    return True
+
+
+@numba.jit(nopython=True, nogil=True, parallel=False)
+def add_mean(
+    val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float
+) -> tuple[int, float, int, float]:
+    if not np.isnan(val):
+        nobs += 1
+        y = val - compensation
+        t = sum_x + y
+        compensation = t - sum_x - y
+        sum_x = t
+        if val < 0:
+            neg_ct += 1
+    return nobs, sum_x, neg_ct, compensation
+
+
+@numba.jit(nopython=True, nogil=True, parallel=False)
+def remove_mean(
+    val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float
+) -> tuple[int, float, int, float]:
+    if not np.isnan(val):
+        nobs -= 1
+        y = -val - compensation
+        t = sum_x + y
+        compensation = t - sum_x - y
+        sum_x = t
+        if val < 0:
+            neg_ct -= 1
+    return nobs, sum_x, neg_ct, compensation
+
+
+@numba.jit(nopython=True, nogil=True, parallel=False)
+def sliding_mean(
+    values: np.ndarray,
+    start: np.ndarray,
+    end: np.ndarray,
+    min_periods: int,
+) -> np.ndarray:
+    N = len(start)
+    nobs = 0
+    sum_x = 0.0
+    neg_ct = 0
+    compensation_add = 0.0
+    compensation_remove = 0.0
+
+    is_monotonic_increasing_bounds = is_monotonic_increasing(
+        start
+    ) and is_monotonic_increasing(end)
+
+    output = np.empty(N, dtype=np.float64)
+
+    for i in range(N):
+        s = start[i]
+        e = end[i]
+        if i == 0 or not is_monotonic_increasing_bounds:
+            for j in range(s, e):
+                val = values[j]
+                nobs, sum_x, neg_ct, compensation_add = add_mean(
+                    val, nobs, sum_x, neg_ct, compensation_add
+                )
+        else:
+            for j in range(start[i - 1], s):
+                val = values[j]
+                nobs, sum_x, neg_ct, compensation_remove = remove_mean(
+                    val, nobs, sum_x, neg_ct, compensation_remove
+                )
+
+            for j in range(end[i - 1], e):
+                val = values[j]
+                nobs, sum_x, neg_ct, compensation_add = add_mean(
+                    val, nobs, sum_x, neg_ct, compensation_add
+                )
+
+        if nobs >= min_periods and nobs > 0:
+            result = sum_x / nobs
+            if neg_ct == 0 and result < 0:
+                result = 0
+            elif neg_ct == nobs and result > 0:
+                result = 0
+        else:
+            result = np.nan
+
+        output[i] = result
+
+        if not is_monotonic_increasing_bounds:
+            nobs = 0
+            sum_x = 0.0
+            neg_ct = 0
+            compensation_remove = 0.0
+
+    return output
@@ -1051,7 +1051,6 @@ def agg(self):
         result = super().agg()
         if result is None:
             f = self.f
-            args = self.args
             kwargs = self.kwargs
 
             # string, list-like, and dict-like are entirely handled in super
@@ -1070,9 +1069,9 @@ def agg(self):
             # then .agg and .apply would have different semantics if the
             # operation is actually defined on the Series, e.g. str
             try:
-                result = self.obj.apply(f, *args, **kwargs)
+                result = self.obj.apply(f)
             except (ValueError, AttributeError, TypeError):
-                result = f(self.obj, *args, **kwargs)
+                result = f(self.obj)
 
         return result
 
 
@@ -3953,6 +3953,8 @@ def __delitem__(self, key) -> None:
         maybe_shortcut = False
         if self.ndim == 2 and isinstance(self.columns, MultiIndex):
             try:
+                # By using engine's __contains__ we effectively
+                # restrict to same-length tuples
                 maybe_shortcut = key not in self.columns._engine
             except TypeError:
                 pass
 
@@ -82,6 +82,7 @@
     BaseMaskedArray,
     BaseMaskedDtype,
 )
+from pandas.core.arrays.string_ import StringDtype
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import grouper
@@ -348,6 +349,9 @@ def _ea_wrap_cython_operation(
         elif isinstance(values.dtype, FloatingDtype):
             # FloatingArray
             npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan)
+        elif isinstance(values.dtype, StringDtype):
+            # StringArray
+            npvalues = values.to_numpy(object, na_value=np.nan)
         else:
             raise NotImplementedError(
                 f"function is not implemented for this dtype: {values.dtype}"
@@ -375,7 +379,9 @@ def _reconstruct_ea_result(self, values, res_values):
         """
         # TODO: allow EAs to override this logic
 
-        if isinstance(values.dtype, (BooleanDtype, _IntegerDtype, FloatingDtype)):
+        if isinstance(
+            values.dtype, (BooleanDtype, _IntegerDtype, FloatingDtype, StringDtype)
+        ):
             dtype = self._get_result_dtype(values.dtype)
             cls = dtype.construct_array_type()
             return cls._from_sequence(res_values, dtype=dtype)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from pandas.core._numba.kernels.mean_ import sliding_mean`
	`2`	`+`
	`3`	`+__all__ = ["sliding_mean"]`