pandas-dev
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+7-3 b/‎asv_bench/benchmarks/arithmetic.py
+7-3
diff --git a/‎asv_bench/benchmarks/groupby.py
+58 b/‎asv_bench/benchmarks/groupby.py
+58
diff --git a/‎asv_bench/benchmarks/stat_ops.py
+2-2 b/‎asv_bench/benchmarks/stat_ops.py
+2-2
diff --git a/‎ci/code_checks.sh
+7-1 b/‎ci/code_checks.sh
+7-1
diff --git a/‎doc/source/getting_started/index.rst
+1-1 b/‎doc/source/getting_started/index.rst
+1-1
diff --git a/‎doc/source/user_guide/computation.rst
+12-8 b/‎doc/source/user_guide/computation.rst
+12-8
diff --git a/‎doc/source/user_guide/cookbook.rst
-27 b/‎doc/source/user_guide/cookbook.rst
-27
diff --git a/‎doc/source/user_guide/groupby.rst
+67 b/‎doc/source/user_guide/groupby.rst
+67
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
         self.ser.add(self.ser, fill_value=4)
 
 
-class MixedFrameWithSeriesAxis0:
+class MixedFrameWithSeriesAxis:
     params = [
         [
             "eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
             "gt",
             "add",
             "sub",
-            "div",
+            "truediv",
             "floordiv",
             "mul",
             "pow",
@@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
     param_names = ["opname"]
 
     def setup(self, opname):
-        arr = np.arange(10 ** 6).reshape(100, -1)
+        arr = np.arange(10 ** 6).reshape(1000, -1)
         df = DataFrame(arr)
         df["C"] = 1.0
         self.df = df
         self.ser = df[0]
+        self.row = df.iloc[0]
 
     def time_frame_op_with_series_axis0(self, opname):
         getattr(self.df, opname)(self.ser, axis=0)
 
+    def time_frame_op_with_series_axis1(self, opname):
+        getattr(operator, opname)(self.df, self.ser)
+
 
 class Ops:
 
 
@@ -660,4 +660,62 @@ def function(values):
         self.grouper.transform(function, engine="cython")
 
 
+class AggEngine:
+    def setup(self):
+        N = 10 ** 3
+        data = DataFrame(
+            {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
+            columns=[0, 1],
+        )
+        self.grouper = data.groupby(0)
+
+    def time_series_numba(self):
+        def function(values, index):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper[1].agg(function, engine="numba")
+
+    def time_series_cython(self):
+        def function(values):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper[1].agg(function, engine="cython")
+
+    def time_dataframe_numba(self):
+        def function(values, index):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper.agg(function, engine="numba")
+
+    def time_dataframe_cython(self):
+        def function(values):
+            total = 0
+            for i, value in enumerate(values):
+                if i % 2:
+                    total += value + 5
+                else:
+                    total += value * 2
+            return total
+
+        self.grouper.agg(function, engine="cython")
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -11,8 +11,8 @@ class FrameOps:
     param_names = ["op", "dtype", "axis"]
 
     def setup(self, op, dtype, axis):
-        if op == "mad" and dtype == "Int64" and axis == 1:
-            # GH-33036
+        if op == "mad" and dtype == "Int64":
+            # GH-33036, GH#33600
             raise NotImplementedError
         values = np.random.randn(100000, 4)
         if dtype == "Int64":
 
@@ -150,7 +150,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     # Check for imports from pandas._testing instead of `import pandas._testing as tm`
     invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
     RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests
+    invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # No direct imports from conftest
+    invgrep -R --include="*.py*" -E "conftest import" pandas/tests
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+    invgrep -R --include="*.py*" -E "import conftest" pandas/tests
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for use of exec' ; echo $MSG
 
@@ -398,7 +398,7 @@ data set, a sliding window of the data or grouped by categories. The latter is a
                 <div class="card-body">
 
 Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
-from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command.
+from long to wide format. With aggregations built-in, a pivot table is created with a single command.
 
 .. image:: ../_static/schemas/07_melt.svg
    :align: center
 
@@ -318,8 +318,8 @@ We provide a number of common statistical functions:
     :meth:`~Rolling.kurt`, Sample kurtosis (4th moment)
     :meth:`~Rolling.quantile`, Sample quantile (value at %)
     :meth:`~Rolling.apply`, Generic apply
-    :meth:`~Rolling.cov`, Unbiased covariance (binary)
-    :meth:`~Rolling.corr`, Correlation (binary)
+    :meth:`~Rolling.cov`, Sample covariance (binary)
+    :meth:`~Rolling.corr`, Sample correlation (binary)
 
 .. _computation.window_variance.caveats:
 
@@ -341,6 +341,8 @@ We provide a number of common statistical functions:
    sample variance under the circumstances would result in a biased estimator
    of the variable we are trying to determine.
 
+   The same caveats apply to using any supported statistical sample methods.
+
 .. _stats.rolling_apply:
 
 Rolling apply
@@ -380,8 +382,8 @@ and their default values are set to ``False``, ``True`` and ``False`` respective
 .. note::
 
    In terms of performance, **the first time a function is run using the Numba engine will be slow**
-   as Numba will have some function compilation overhead. However, ``rolling`` objects will cache
-   the function and subsequent calls will be fast. In general, the Numba engine is performant with
+   as Numba will have some function compilation overhead. However, the compiled functions are cached,
+   and subsequent calls will be fast. In general, the Numba engine is performant with
    a larger amount of data points (e.g. 1+ million).
 
 .. code-block:: ipython
@@ -870,12 +872,12 @@ Method summary
     :meth:`~Expanding.max`, Maximum
     :meth:`~Expanding.std`, Sample standard deviation
     :meth:`~Expanding.var`, Sample variance
-    :meth:`~Expanding.skew`, Unbiased skewness (3rd moment)
-    :meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment)
+    :meth:`~Expanding.skew`, Sample skewness (3rd moment)
+    :meth:`~Expanding.kurt`, Sample kurtosis (4th moment)
     :meth:`~Expanding.quantile`, Sample quantile (value at %)
     :meth:`~Expanding.apply`, Generic apply
-    :meth:`~Expanding.cov`, Unbiased covariance (binary)
-    :meth:`~Expanding.corr`, Correlation (binary)
+    :meth:`~Expanding.cov`, Sample covariance (binary)
+    :meth:`~Expanding.corr`, Sample correlation (binary)
 
 .. note::
 
@@ -884,6 +886,8 @@ Method summary
    windows. See :ref:`this section <computation.window_variance.caveats>` for more
    information.
 
+   The same caveats apply to using any supported statistical sample methods.
+
 .. currentmodule:: pandas
 
 Aside from not having a ``window`` parameter, these functions have the same
 
@@ -1333,33 +1333,6 @@ Values can be set to NaT using np.nan, similar to datetime
    y[1] = np.nan
    y
 
-Aliasing axis names
--------------------
-
-To globally provide aliases for axis names, one can define these 2 functions:
-
-.. ipython:: python
-
-   def set_axis_alias(cls, axis, alias):
-       if axis not in cls._AXIS_NUMBERS:
-           raise Exception("invalid axis [%s] for alias [%s]" % (axis, alias))
-       cls._AXIS_ALIASES[alias] = axis
-
-.. ipython:: python
-
-   def clear_axis_alias(cls, axis, alias):
-       if axis not in cls._AXIS_NUMBERS:
-           raise Exception("invalid axis [%s] for alias [%s]" % (axis, alias))
-       cls._AXIS_ALIASES.pop(alias, None)
-
-.. ipython:: python
-
-   set_axis_alias(pd.DataFrame, 'columns', 'myaxis2')
-   df2 = pd.DataFrame(np.random.randn(3, 2), columns=['c1', 'c2'],
-                      index=['i1', 'i2', 'i3'])
-   df2.sum(axis='myaxis2')
-   clear_axis_alias(pd.DataFrame, 'columns', 'myaxis2')
-
 Creating example data
 ---------------------
 
 
@@ -1021,6 +1021,73 @@ that is itself a series, and possibly upcast the result to a DataFrame:
    the output as well as set the indices.
 
 
+Numba Accelerated Routines
+--------------------------
+
+.. versionadded:: 1.1
+
+If `Numba <https://numba.pydata.org/>`__ is installed as an optional dependency, the ``transform`` and
+``aggregate`` methods support ``engine='numba'`` and ``engine_kwargs`` arguments. The ``engine_kwargs``
+argument is a dictionary of keyword arguments that will be passed into the
+`numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
+These keyword arguments will be applied to the passed function. Currently only ``nogil``, ``nopython``,
+and ``parallel`` are supported, and their default values are set to ``False``, ``True`` and ``False`` respectively.
+
+The function signature must start with ``values, index`` **exactly** as the data belonging to each group
+will be passed into ``values``, and the group index will be passed into ``index``.
+
+.. warning::
+
+   When using ``engine='numba'``, there will be no "fall back" behavior internally. The group
+   data and group index will be passed as numpy arrays to the JITed user defined function, and no
+   alternative execution attempts will be tried.
+
+.. note::
+
+   In terms of performance, **the first time a function is run using the Numba engine will be slow**
+   as Numba will have some function compilation overhead. However, the compiled functions are cached,
+   and subsequent calls will be fast. In general, the Numba engine is performant with
+   a larger amount of data points (e.g. 1+ million).
+
+.. code-block:: ipython
+
+   In [1]: N = 10 ** 3
+
+   In [2]: data = {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N}
+
+   In [3]: df = pd.DataFrame(data, columns=[0, 1])
+
+   In [4]: def f_numba(values, index):
+      ...:     total = 0
+      ...:     for i, value in enumerate(values):
+      ...:         if i % 2:
+      ...:             total += value + 5
+      ...:         else:
+      ...:             total += value * 2
+      ...:     return total
+      ...:
+
+   In [5]: def f_cython(values):
+      ...:     total = 0
+      ...:     for i, value in enumerate(values):
+      ...:         if i % 2:
+      ...:             total += value + 5
+      ...:         else:
+      ...:             total += value * 2
+      ...:     return total
+      ...:
+
+   In [6]: groupby = df.groupby(0)
+   # Run the first time, compilation time will affect performance
+   In [7]: %timeit -r 1 -n 1 groupby.aggregate(f_numba, engine='numba')  # noqa: E225
+   2.14 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
+   # Function is cached and performance will improve
+   In [8]: %timeit groupby.aggregate(f_numba, engine='numba')
+   4.93 ms ± 32.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [9]: %timeit groupby.aggregate(f_cython, engine='cython')
+   18.6 ms ± 84.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
 Other useful features
 ---------------------