diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 43bf6d9dd1fee..eeb5592d744d6 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1,6 +1,7 @@
 import cython
 from cython import Py_ssize_t
 
+from cpython.ref cimport PyObject
 from cython cimport floating
 from libc.stdlib cimport (
     free,
@@ -56,6 +57,7 @@ cdef enum InterpolationEnumType:
     INTERPOLATION_NEAREST,
     INTERPOLATION_MIDPOINT
 
+include "groupby_mode_helper.pxi"
 
 cdef inline float64_t median_linear(float64_t* a, int n) nogil:
     cdef:
diff --git a/pandas/_libs/groupby_mode_helper.pxi.in b/pandas/_libs/groupby_mode_helper.pxi.in
new file mode 100644
index 0000000000000..de784a9ce7d09
--- /dev/null
+++ b/pandas/_libs/groupby_mode_helper.pxi.in
@@ -0,0 +1,177 @@
+{{py:
+
+# name
+cimported_types = [#'complex64',
+                   #'complex128',
+                   'float32',
+                   'float64',
+                   'int8',
+                   'int16',
+                   'int32',
+                   'int64',
+                   'pymap',
+                   'str',
+                   'strbox',
+                   'uint8',
+                   'uint16',
+                   'uint32',
+                   'uint64']
+}}
+
+{{for name in cimported_types}}
+from pandas._libs.khash cimport (
+    kh_destroy_{{name}},
+    kh_exist_{{name}},
+    kh_get_{{name}},
+    kh_init_{{name}},
+    kh_put_{{name}},
+    kh_resize_{{name}},
+    kh_{{name}}_t,
+)
+
+{{endfor}}
+
+from pandas._libs.khash cimport (
+    #khcomplex64_t,
+    #khcomplex128_t,
+    khiter_t,
+)
+
+from pandas._libs.hashtable import (
+    # NaN checking
+    #is_nan_khcomplex128_t,
+    #is_nan_khcomplex64_t,
+    is_nan_float64_t,
+    is_nan_float32_t,
+    is_nan_int64_t,
+    is_nan_int32_t,
+    is_nan_int16_t,
+    is_nan_int8_t,
+    is_nan_uint64_t,
+    is_nan_uint32_t,
+    is_nan_uint16_t,
+    is_nan_uint8_t,
+    # Casting
+    #to_complex64,
+    #to_complex128,
+    #to_khcomplex128_t,
+    #to_khcomplex64_t,
+)
+
+{{py:
+# TODO: add complex64 and complex128 (requires comparisons between complex numbers)
+# dtype, ttype, c_type, to_c_type, to_dtype
+dtypes = [#('complex128', 'complex128', 'khcomplex128_t',
+          #               'to_khcomplex128_t', 'to_complex128'),
+          #('complex64', 'complex64', 'khcomplex64_t',
+          #              'to_khcomplex64_t', 'to_complex64'),
+          ('float64', 'float64', 'float64_t', '', ''),
+          ('float32', 'float32', 'float32_t', '', ''),
+          ('uint64', 'uint64', 'uint64_t', '', ''),
+          ('uint32', 'uint32', 'uint32_t', '', ''),
+          ('uint16', 'uint16', 'uint16_t', '', ''),
+          ('uint8', 'uint8', 'uint8_t', '', ''),
+          ('object', 'pymap', 'object', '', ''),
+          ('int64', 'int64', 'int64_t', '', ''),
+          ('int32', 'int32', 'int32_t', '', ''),
+          ('int16', 'int16', 'int16_t', '', ''),
+          ('int8', 'int8', 'int8_t', '', '')]
+
+}}
+
+{{for dtype, ttype, c_type, to_c_type, to_dtype in dtypes}}
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef {{c_type}} calc_mode_{{dtype}}(kh_{{ttype}}_t *table):
+    cdef:
+        {{c_type}} mode = 0 # fix annoying uninitialized warning
+        {{c_type}} val
+        int count, max_count = 0
+        khiter_t k
+
+    for k in range(table.n_buckets):
+        if kh_exist_{{ttype}}(table, k):
+            count = table.vals[k]
+            {{if dtype != 'object'}}
+            val = table.keys[k]
+            if count == max_count and val < mode:
+            {{else}}
+            val = <object>table.keys[k]
+            if count == max_count:
+            {{endif}}
+                mode = val
+            elif count > max_count:
+                mode = val
+                max_count = count
+    return mode
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_mode_{{dtype}}(ndarray[{{c_type}}, ndim=1] out,
+                         ndarray[{{c_type}}, ndim=1] values,
+                         ndarray[int64_t, ndim=1] labels,
+                         bint dropna = True):
+    """
+    Calculates the mode of each group.
+    If multimodal returns the smallest mode in each group if numeric.
+    For all other datatypes, returns a mode.
+    """
+    cdef:
+        Py_ssize_t i, N = len(values)
+        int64_t lab, curr_label = -1
+        kh_{{ttype}}_t *table
+        khiter_t k
+        int ret = 0
+
+    table = kh_init_{{ttype}}()
+    {{if dtype != 'object'}}
+    #TODO: Fix NOGIL later
+    #with nogil:
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0: # NaN case
+            continue
+        if lab != curr_label and curr_label != -1:
+            out[curr_label] = calc_mode_{{dtype}}(table)
+            # Reset variables
+            max_count = 0
+            table = kh_init_{{ttype}}()
+
+        val = {{to_c_type}}(values[i])
+
+        if not is_nan_{{c_type}}(val) or not dropna:
+            k = kh_get_{{ttype}}(table, val)
+            if k != table.n_buckets:
+                table.vals[k] += 1
+            else:
+                k = kh_put_{{ttype}}(table, val, &ret)
+                table.vals[k] = 1
+        curr_label = lab
+    # Calc mode for the last group
+    out[curr_label] = calc_mode_{{dtype}}(table)
+    {{else}}
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0: # NaN case
+            continue
+        if lab != curr_label and curr_label != -1:
+            out[curr_label] = calc_mode_{{dtype}}(table)
+            # Reset variables
+            table = kh_init_{{ttype}}()
+
+        val = values[i]
+        if not checknull(val) or not dropna:
+            k = kh_get_{{ttype}}(table, <PyObject*>val)
+            if k != table.n_buckets:
+                table.vals[k] += 1
+            else:
+                k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
+                table.vals[k] = 1
+        curr_label = lab
+    out[curr_label] = calc_mode_{{dtype}}(table)
+    {{endif}}
+    kh_destroy_{{ttype}}(table)
+{{endfor}}
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 0b6bb170cc531..21a7550a80496 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -42,7 +42,8 @@ c_types = ['khcomplex128_t',
 
 {{for c_type in c_types}}
 
-cdef bint is_nan_{{c_type}}({{c_type}} val) nogil:
+cpdef bint is_nan_{{c_type}}({{c_type}} val) nogil:
+    # TODO: create missing.pxi.in and move there as cdef?
     {{if c_type in {'khcomplex128_t', 'khcomplex64_t'} }}
     return val.real != val.real or val.imag != val.imag
     {{elif c_type in {'float64_t', 'float32_t'} }}
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index c169e29b74dbb..f92decce6c6be 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -141,6 +141,7 @@ def _gotitem(self, key, ndim, subset=None):
         "mad",
         "max",
         "mean",
+        "mode",
         "median",
         "min",
         "ngroup",
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index bc277bf67614d..072b53ba65381 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1578,6 +1578,35 @@ def median(self, numeric_only=True):
             numeric_only=numeric_only,
         )
 
+    @final
+    @Substitution(name="groupby")
+    @Appender(_common_see_also)
+    def mode(self, dropna=True, numeric_only=False):
+        """
+        Compute mode of groups, excluding missing values. If a group has
+        multiple modes, the smallest mode will be used.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Do not use NaNs in mode calculation
+        numeric_only: bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+        Returns
+        -------
+        Series or DataFrame
+            Mode of values within each group.
+        """
+        # Note: get_cythonized_result iterates in python, slow for many columns?
+        return self._get_cythonized_result(
+            "group_mode",
+            aggregate=True,
+            numeric_only=numeric_only,
+            needs_values=True,
+            dropna=dropna,
+        )
+
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
@@ -2585,7 +2614,7 @@ def cummax(self, axis=0, **kwargs):
     def _get_cythonized_result(
         self,
         how: str,
-        cython_dtype: np.dtype,
+        cython_dtype: np.dtype = None,
         aggregate: bool = False,
         numeric_only: bool = True,
         needs_counts: bool = False,
@@ -2666,12 +2695,18 @@ def _get_cythonized_result(
 
         labels, _, ngroups = grouper.group_info
         output: Dict[base.OutputKey, np.ndarray] = {}
-        base_func = getattr(libgroupby, how)
+        if cython_dtype is not None:
+            base_func = getattr(libgroupby, how)
 
         error_msg = ""
         for idx, obj in enumerate(self._iterate_slices()):
             name = obj.name
             values = obj._values
+            if cython_dtype is None:
+                cython_dtype = values.dtype
+                # We also need to get the specific function for that dtype
+                how += f"_{cython_dtype}"
+                base_func = getattr(libgroupby, how)
 
             if numeric_only and not is_numeric_dtype(values):
                 continue
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index f9b45f4d9f4cf..d0085a408785b 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -388,3 +388,25 @@ def test_cython_agg_EA_known_dtypes(data, op_name, action, with_na):
 
     result = grouped["col"].aggregate(op_name)
     assert result.dtype == expected_dtype
+
+
+def test_mode_numeric():
+    data = {
+        "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
+        "B": ["A", "B"] * 6,
+        "C": [2, 4, 3, 1, 2, 3, 4, 5, 2, 7, 9, 9],
+    }
+    df = DataFrame(data)
+    df.drop(columns="B", inplace=True)
+    # Group by 1 column
+    result = df.groupby("A").mode()
+    exp = DataFrame({"C": [1, 2]}, index=Series([0.0, 1.0], name="A"))
+    tm.assert_frame_equal(result, exp)
+    # Group by 2 column
+    df = DataFrame(data)
+    result = df.groupby(by=["A", "B"]).mode()
+    exp = DataFrame(
+        {"C": [3, 1, 2, 7]},
+        index=pd.MultiIndex.from_product([[0.0, 1.0], ["A", "B"]], names=["A", "B"]),
+    )
+    tm.assert_frame_equal(result, exp)
diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py
index de8335738791d..3a5e0fb0231d2 100644
--- a/pandas/tests/groupby/test_allowlist.py
+++ b/pandas/tests/groupby/test_allowlist.py
@@ -285,6 +285,7 @@ def test_tab_completion(mframe):
         "mean",
         "median",
         "min",
+        "mode",
         "ngroups",
         "nth",
         "ohlc",
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 4049ef46f3006..f518997c7e7db 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -52,6 +52,7 @@ def f(a):
     "max": np.NaN,
     "mean": np.NaN,
     "median": np.NaN,
+    "mode": np.NaN,
     "min": np.NaN,
     "nth": np.NaN,
     "nunique": 0,
diff --git a/setup.py b/setup.py
index 45548fed68322..47a7b18d20d32 100755
--- a/setup.py
+++ b/setup.py
@@ -56,6 +56,10 @@ def is_platform_mac():
 
 _pxi_dep_template = {
     "algos": ["_libs/algos_common_helper.pxi.in", "_libs/algos_take_helper.pxi.in"],
+    "groupby": [
+        "_libs/groupby_mode_helper.pxi.in",
+        "_libs/khash_for_primitive_helper.pxi.in",
+    ],
     "hashtable": [
         "_libs/hashtable_class_helper.pxi.in",
         "_libs/hashtable_func_helper.pxi.in",
@@ -440,7 +444,14 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
         "include": klib_include,
         "depends": _pxi_dep["algos"],
     },
-    "_libs.groupby": {"pyxfile": "_libs/groupby"},
+    "_libs.groupby": {
+        "pyxfile": "_libs/groupby",
+        "include": klib_include,
+        "depends": (
+            ["pandas/_libs/src/klib/khash_python.h", "pandas/_libs/src/klib/khash.h"]
+            + _pxi_dep["groupby"]
+        ),
+    },
     "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []},
     "_libs.hashtable": {
         "pyxfile": "_libs/hashtable",