diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 94bd8b49777cf..a6bfff9bb00fe 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -191,7 +191,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
+def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
     """
     Compute a 1-d indexer.
 
@@ -200,7 +200,7 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
 
     Parameters
     ----------
-    index: int64 ndarray
+    index: np.ndarray[np.intp]
         Mappings from group -> position.
     ngroups: int64
         Number of groups.
@@ -209,7 +209,7 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
     -------
     ndarray[intp_t, ndim=1]
         Indexer
-    ndarray[int64_t, ndim=1]
+    ndarray[intp_t, ndim=1]
         Group Counts
 
     Notes
@@ -218,13 +218,12 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
     """
     cdef:
         Py_ssize_t i, loc, label, n
-        ndarray[int64_t] counts, where
-        ndarray[intp_t] indexer
+        ndarray[intp_t] indexer, where, counts
 
-    counts = np.zeros(ngroups + 1, dtype=np.int64)
+    counts = np.zeros(ngroups + 1, dtype=np.intp)
     n = len(index)
     indexer = np.zeros(n, dtype=np.intp)
-    where = np.zeros(ngroups + 1, dtype=np.int64)
+    where = np.zeros(ngroups + 1, dtype=np.intp)
 
     with nogil:
 
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
index cdf4ef3b119d2..929cb86c41036 100644
--- a/pandas/_libs/algos_take_helper.pxi.in
+++ b/pandas/_libs/algos_take_helper.pxi.in
@@ -8,6 +8,32 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # take_1d, take_2d
 # ----------------------------------------------------------------------
 
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def take_1d_intp_intp(
+    const intp_t[:] values,
+    const intp_t[:] indexer,
+    intp_t[::1] out,
+    intp_t fill_value=-1,
+):
+    cdef:
+        Py_ssize_t i, n, idx
+        intp_t fv
+
+    n = indexer.shape[0]
+
+    fv = fill_value
+
+    with nogil:
+        for i in range(n):
+            idx = indexer[i]
+            if idx == -1:
+                out[i] = fv
+            else:
+                out[i] = values[idx]
+
+
 {{py:
 
 # c_type_in, c_type_out
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index f09a6c04aecbf..11e08bfd181b0 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -37,6 +37,7 @@ from pandas._libs.util cimport (
 )
 
 from pandas._libs.algos import (
+    ensure_platform_int,
     groupsort_indexer,
     rank_1d,
     take_2d_axis1_float64_float64,
@@ -111,7 +112,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, ngroups, size
-        ndarray[int64_t] _counts
+        ndarray[intp_t] _counts
         ndarray[float64_t, ndim=2] data
         ndarray[intp_t] indexer
         float64_t* ptr
@@ -121,7 +122,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
     ngroups = len(counts)
     N, K = (<object>values).shape
 
-    indexer, _counts = groupsort_indexer(labels, ngroups)
+    indexer, _counts = groupsort_indexer(ensure_platform_int(labels), ngroups)
     counts[:] = _counts[1:]
 
     data = np.empty((K, N), dtype=np.float64)
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index c2947de943e1a..7888a15a7cb26 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -21,10 +21,9 @@ from numpy cimport (
 cnp.import_array()
 
 from pandas._libs.algos import (
-    ensure_int64,
-    ensure_platform_int,
     groupsort_indexer,
     take_1d_int64_int64,
+    take_1d_intp_intp,
 )
 
 
@@ -34,16 +33,16 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[intp_t] left_sorter, right_sorter
-        ndarray[int64_t] left_count, right_count
-        ndarray[int64_t] left_indexer, right_indexer
-        int64_t lc, rc
+        ndarray[intp_t] left_count, right_count
+        ndarray[intp_t] left_indexer, right_indexer
+        intp_t lc, rc
         Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
         Py_ssize_t offset
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
-    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
+    left_sorter, left_count = groupsort_indexer(left, max_groups)
+    right_sorter, right_count = groupsort_indexer(right, max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
@@ -58,8 +57,8 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
     left_pos = left_count[0]
     right_pos = right_count[0]
 
-    left_indexer = np.empty(count, dtype=np.int64)
-    right_indexer = np.empty(count, dtype=np.int64)
+    left_indexer = np.empty(count, dtype=np.intp)
+    right_indexer = np.empty(count, dtype=np.intp)
 
     with nogil:
         for i in range(1, max_groups + 1):
@@ -85,17 +84,17 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
                     Py_ssize_t max_groups, bint sort=True):
     cdef:
         Py_ssize_t i, j, k, count = 0
-        ndarray[int64_t] left_count, right_count
+        ndarray[intp_t] left_count, right_count
         ndarray[intp_t] rev, left_sorter, right_sorter
-        ndarray[int64_t] left_indexer, right_indexer
-        int64_t lc, rc
+        ndarray[intp_t] left_indexer, right_indexer
+        intp_t lc, rc
         Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
         Py_ssize_t offset
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
-    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
+    left_sorter, left_count = groupsort_indexer(left, max_groups)
+    right_sorter, right_count = groupsort_indexer(right, max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
@@ -109,8 +108,8 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
     left_pos = left_count[0]
     right_pos = right_count[0]
 
-    left_indexer = np.empty(count, dtype=np.int64)
-    right_indexer = np.empty(count, dtype=np.int64)
+    left_indexer = np.empty(count, dtype=np.intp)
+    right_indexer = np.empty(count, dtype=np.intp)
 
     with nogil:
         for i in range(1, max_groups + 1):
@@ -142,11 +141,10 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
             # this is a short-cut to avoid groupsort_indexer
             # otherwise, the `else` path also works in this case
             rev = np.empty(len(left), dtype=np.intp)
-            rev.put(ensure_platform_int(left_sorter), np.arange(len(left)))
+            rev.put(left_sorter, np.arange(len(left)))
         else:
             rev, _ = groupsort_indexer(left_indexer, len(left))
 
-        rev = ensure_platform_int(rev)
         right_indexer = right_indexer.take(rev)
         left_indexer = left_indexer.take(rev)
 
@@ -159,16 +157,16 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[intp_t] left_sorter, right_sorter
-        ndarray[int64_t] left_count, right_count
-        ndarray[int64_t] left_indexer, right_indexer
-        int64_t lc, rc
-        int64_t left_pos = 0, right_pos = 0
+        ndarray[intp_t] left_count, right_count
+        ndarray[intp_t] left_indexer, right_indexer
+        intp_t lc, rc
+        intp_t left_pos = 0, right_pos = 0
         Py_ssize_t offset, position = 0
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
-    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
+    left_sorter, left_count = groupsort_indexer(left, max_groups)
+    right_sorter, right_count = groupsort_indexer(right, max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
@@ -185,8 +183,8 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
     left_pos = left_count[0]
     right_pos = right_count[0]
 
-    left_indexer = np.empty(count, dtype=np.int64)
-    right_indexer = np.empty(count, dtype=np.int64)
+    left_indexer = np.empty(count, dtype=np.intp)
+    right_indexer = np.empty(count, dtype=np.intp)
 
     with nogil:
         for i in range(1, max_groups + 1):
@@ -217,31 +215,29 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
             _get_result_indexer(right_sorter, right_indexer))
 
 
-cdef ndarray[int64_t] _get_result_indexer(
-    ndarray[intp_t] sorter, ndarray[int64_t] indexer
+cdef ndarray[intp_t] _get_result_indexer(
+    ndarray[intp_t] sorter, ndarray[intp_t] indexer
 ):
     if len(sorter) > 0:
         # cython-only equivalent to
         #  `res = algos.take_nd(sorter, indexer, fill_value=-1)`
-        res = np.empty(len(indexer), dtype=np.int64)
-        take_1d_int64_int64(ensure_int64(sorter), ensure_platform_int(indexer), res, -1)
-        # FIXME: sorter is intp_t, not int64_t, opposite for indexer;
-        #  will this break on 32bit builds?
+        res = np.empty(len(indexer), dtype=np.intp)
+        take_1d_intp_intp(sorter, indexer, res, -1)
     else:
         # length-0 case
-        res = np.empty(len(indexer), dtype=np.int64)
+        res = np.empty(len(indexer), dtype=np.intp)
         res[:] = -1
 
     return res
 
 
-def ffill_indexer(const int64_t[:] indexer):
+def ffill_indexer(const intp_t[:] indexer):
     cdef:
         Py_ssize_t i, n = len(indexer)
-        ndarray[int64_t] result
-        int64_t val, last_obs
+        ndarray[intp_t] result
+        intp_t val, last_obs
 
-    result = np.empty(n, dtype=np.int64)
+    result = np.empty(n, dtype=np.intp)
     last_obs = -1
 
     for i in range(n):
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 1398db6960cc8..3c88590991d77 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1981,9 +1981,9 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         """
         categories = self.categories
         r, counts = libalgos.groupsort_indexer(
-            self.codes.astype("int64", copy=False), categories.size
+            ensure_platform_int(self.codes), categories.size
         )
-        counts = counts.cumsum()
+        counts = ensure_int64(counts).cumsum()
         _result = (r[start:end] for start, end in zip(counts, counts[1:]))
         return dict(zip(categories, _result))
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e5e7b446d9cb2..094f4a67d2e61 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4154,7 +4154,7 @@ def _get_leaf_sorter(labels: List[np.ndarray]) -> np.ndarray:
                 return np.empty(0, dtype=np.intp)
 
             if len(labels) == 1:
-                return get_group_index_sorter(labels[0])
+                return get_group_index_sorter(ensure_platform_int(labels[0]))
 
             # find indexers of beginning of each set of
             # same-key labels w.r.t all but last level
@@ -4224,7 +4224,7 @@ def _get_leaf_sorter(labels: List[np.ndarray]) -> np.ndarray:
                 if level == 0:  # outer most level, take the fast route
                     ngroups = 1 + new_lev_codes.max()
                     left_indexer, counts = libalgos.groupsort_indexer(
-                        ensure_int64(new_lev_codes), ngroups
+                        new_lev_codes, ngroups
                     )
 
                     # missing values are placed first; drop them!
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 88fcc13502439..02c41538ca123 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -604,7 +604,7 @@ def get_group_index_sorter(
         (alpha + beta * ngroups) < (count * np.log(count))  # type: ignore[operator]
     )
     if do_groupsort:
-        sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups)
+        sorter, _ = algos.groupsort_indexer(ensure_platform_int(group_index), ngroups)
         # sorter _should_ already be intp, but mypy is not yet able to verify
     else:
         sorter = group_index.argsort(kind="mergesort")
diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py
index f5426c71511bb..eeb66f8941260 100644
--- a/pandas/tests/libs/test_join.py
+++ b/pandas/tests/libs/test_join.py
@@ -264,8 +264,8 @@ def test_left_outer_join_bug():
 
     lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False)
 
-    exp_lidx = np.arange(len(left), dtype=np.int64)
-    exp_ridx = -np.ones(len(left), dtype=np.int64)
+    exp_lidx = np.arange(len(left), dtype=np.intp)
+    exp_ridx = -np.ones(len(left), dtype=np.intp)
 
     exp_ridx[left == 1] = 1
     exp_ridx[left == 3] = 0
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index c8df18ddaeebe..cd800b3f3a452 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -2116,8 +2116,8 @@ def test_is_lexsorted():
 
 
 def test_groupsort_indexer():
-    a = np.random.randint(0, 1000, 100).astype(np.int64)
-    b = np.random.randint(0, 1000, 100).astype(np.int64)
+    a = np.random.randint(0, 1000, 100).astype(np.intp)
+    b = np.random.randint(0, 1000, 100).astype(np.intp)
 
     result = libalgos.groupsort_indexer(a, 1000)[0]