From 25c6cfd607888baaa930889af87297e934061ba2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 8 Sep 2021 14:06:34 -0700
Subject: [PATCH 1/7] TYP: groupby

---
 pandas/core/groupby/ops.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 07353863cdc0b..14d7179751ad7 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -807,7 +807,7 @@ def is_monotonic(self) -> bool:
         return Index(self.group_info[0]).is_monotonic
 
     @cache_readonly
-    def group_info(self):
+    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64], int]:
         comp_ids, obs_group_ids = self._get_compressed_codes()
 
         ngroups = len(obs_group_ids)
@@ -817,22 +817,26 @@ def group_info(self):
 
     @final
     @cache_readonly
-    def codes_info(self) -> np.ndarray:
+    def codes_info(self) -> npt.NDArray[np.intp]:
         # return the codes of items in original grouped axis
         ids, _, _ = self.group_info
         if self.indexer is not None:
             sorter = np.lexsort((ids, self.indexer))
             ids = ids[sorter]
+            ids = ensure_platform_int(ids)
+            # TODO: if numpy annotates np.lexsort, this ensure_platform_int
+            #  may become unnecessary
         return ids
 
     @final
-    def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
+    def _get_compressed_codes(self) -> tuple[np.ndarray, npt.NDArray[np.int64]]:
+        # The first returned ndarray may have any signed integer dtype
         if len(self.groupings) > 1:
             group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
             return compress_group_index(group_index, sort=self._sort)
 
         ping = self.groupings[0]
-        return ping.codes, np.arange(len(ping.group_index))
+        return ping.codes, np.arange(len(ping.group_index), dtype=np.int64)
 
     @final
     @cache_readonly
@@ -1017,7 +1021,7 @@ class BinGrouper(BaseGrouper):
 
     """
 
-    bins: np.ndarray  # np.ndarray[np.int64]
+    bins: npt.NDArray[np.int64]
     binlabels: Index
     mutated: bool
 
@@ -1101,7 +1105,7 @@ def indices(self):
         return indices
 
     @cache_readonly
-    def group_info(self):
+    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64], int]:
         ngroups = self.ngroups
         obs_group_ids = np.arange(ngroups, dtype=np.int64)
         rep = np.diff(np.r_[0, self.bins])

From 882c1436912ba6e2ee643f7990bcbc3478f0fc74 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 19 Sep 2021 12:30:47 -0700
Subject: [PATCH 2/7] TYP: groupby

---
 pandas/_libs/lib.pyi           | 2 +-
 pandas/core/groupby/grouper.py | 3 ++-
 pandas/core/groupby/ops.py     | 2 +-
 pandas/core/sorting.py         | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index e60250169edd1..87fe20214713e 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -197,7 +197,7 @@ def indices_fast(
     labels: np.ndarray,  # const int64_t[:]
     keys: list,
     sorted_labels: list[npt.NDArray[np.int64]],
-) -> dict: ...
+) -> dict[Hashable, npt.NDArray[np.intp]]: ...
 def generate_slices(
     labels: np.ndarray, ngroups: int  # const intp_t[:]
 ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 02f2024091877..7577b1e671d60 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -17,6 +17,7 @@
 from pandas._typing import (
     ArrayLike,
     NDFrameT,
+    npt,
 )
 from pandas.errors import InvalidIndexError
 from pandas.util._decorators import cache_readonly
@@ -604,7 +605,7 @@ def ngroups(self) -> int:
         return len(self.group_index)
 
     @cache_readonly
-    def indices(self):
+    def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         # we have a list of groupers
         if isinstance(self.grouping_vector, ops.BaseGrouper):
             return self.grouping_vector.indices
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 14d7179751ad7..1cde1da731837 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -756,7 +756,7 @@ def apply(
         return result_values, mutated
 
     @cache_readonly
-    def indices(self):
+    def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         """dict {group name -> group indices}"""
         if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex):
             # This shows unused categories in indices GH#38642
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index befa67350e182..ccb51a0ea2132 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -6,6 +6,7 @@
     TYPE_CHECKING,
     Callable,
     DefaultDict,
+    Hashable,
     Iterable,
     Sequence,
 )
@@ -576,7 +577,7 @@ def get_flattened_list(
 
 def get_indexer_dict(
     label_list: list[np.ndarray], keys: list[Index]
-) -> dict[str | tuple, np.ndarray]:
+) -> dict[Hashable, npt.NDArray[np.intp]]:
     """
     Returns
     -------

From 82d5ff59a4e848579c82f7c7465719a12f8acb6a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 19 Sep 2021 12:51:52 -0700
Subject: [PATCH 3/7] missing import

---
 pandas/_libs/lib.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 87fe20214713e..b88a2e4c28cfb 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -5,6 +5,7 @@ from typing import (
     Any,
     Callable,
     Generator,
+    Hashable,
     Literal,
     overload,
 )

From 9da17fb76d58a2595fc5e1f110c13e8ea0e01605 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 23 Sep 2021 11:21:14 -0700
Subject: [PATCH 4/7] TYP blocks

---
 pandas/core/internals/blocks.py | 39 ++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 002473a1a5fb2..2279dbd283905 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -28,6 +28,7 @@
     DtypeObj,
     F,
     Shape,
+    npt,
 )
 from pandas.util._decorators import cache_readonly
 from pandas.util._validators import validate_bool_kwarg
@@ -294,7 +295,7 @@ def __repr__(self) -> str:
     def __len__(self) -> int:
         return len(self.values)
 
-    def _slice(self, slicer):
+    def _slice(self, slicer) -> ArrayLike:
         """return a slice of my values"""
 
         return self.values[slicer]
@@ -343,7 +344,7 @@ def dtype(self) -> DtypeObj:
     def iget(self, i):
         return self.values[i]
 
-    def set_inplace(self, locs, values):
+    def set_inplace(self, locs, values) -> None:
         """
         Modify block values in-place with new item value.
 
@@ -562,13 +563,13 @@ def _downcast_2d(self) -> list[Block]:
         return [self.make_block(new_values)]
 
     @final
-    def astype(self, dtype, copy: bool = False, errors: str = "raise"):
+    def astype(self, dtype: DtypeObj, copy: bool = False, errors: str = "raise"):
         """
         Coerce to the new dtype.
 
         Parameters
         ----------
-        dtype : str, dtype convertible
+        dtype : np.dtype or ExtensionDtype
         copy : bool, default False
             copy if indicated
         errors : str, {'raise', 'ignore'}, default 'raise'
@@ -1278,7 +1279,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:
 
         return result_blocks
 
-    def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
+    def _unstack(
+        self,
+        unstacker,
+        fill_value,
+        new_placement: npt.NDArray[np.intp],
+        allow_fill: bool,
+    ):
         """
         Return a list of unstacked blocks of self
 
@@ -1434,7 +1441,7 @@ def iget(self, col):
                 raise IndexError(f"{self} only contains one item")
             return self.values
 
-    def set_inplace(self, locs, values):
+    def set_inplace(self, locs, values) -> None:
         # NB: This is a misnomer, is supposed to be inplace but is not,
         #  see GH#33457
         assert locs.tolist() == [0]
@@ -1502,7 +1509,7 @@ def setitem(self, indexer, value):
             # https://github.com/pandas-dev/pandas/issues/24020
             # Need a dedicated setitem until GH#24020 (type promotion in setitem
             #  for extension arrays) is designed and implemented.
-            return self.astype(object).setitem(indexer, value)
+            return self.astype(_dtype_obj).setitem(indexer, value)
 
         if isinstance(indexer, tuple):
             # TODO(EA2D): not needed with 2D EAs
@@ -1540,7 +1547,7 @@ def take_nd(
 
         return self.make_block_same_class(new_values, new_mgr_locs)
 
-    def _slice(self, slicer):
+    def _slice(self, slicer) -> ExtensionArray:
         """
         Return a slice of my values.
 
@@ -1551,7 +1558,7 @@ def _slice(self, slicer):
 
         Returns
         -------
-        np.ndarray or ExtensionArray
+        ExtensionArray
         """
         # return same dims as we currently have
         if not isinstance(slicer, tuple) and self.ndim == 2:
@@ -1668,7 +1675,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:
 
         return [self.make_block_same_class(result)]
 
-    def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
+    def _unstack(
+        self,
+        unstacker,
+        fill_value,
+        new_placement: npt.NDArray[np.intp],
+        allow_fill: bool,
+    ):
         # ExtensionArray-safe unstack.
         # We override ObjectBlock._unstack, which unstacks directly on the
         # values of the array. For EA-backed blocks, this would require
@@ -1723,7 +1736,7 @@ def is_view(self) -> bool:
     def setitem(self, indexer, value):
         if not self._can_hold_element(value):
             # TODO: general case needs casting logic.
-            return self.astype(object).setitem(indexer, value)
+            return self.astype(_dtype_obj).setitem(indexer, value)
 
         values = self.values
         if self.ndim > 1:
@@ -1737,7 +1750,7 @@ def putmask(self, mask, new) -> list[Block]:
         mask = extract_bool_array(mask)
 
         if not self._can_hold_element(new):
-            return self.astype(object).putmask(mask, new)
+            return self.astype(_dtype_obj).putmask(mask, new)
 
         arr = self.values
         arr.T.putmask(mask, new)
@@ -1795,7 +1808,7 @@ def fillna(
             # We support filling a DatetimeTZ with a `value` whose timezone
             #  is different by coercing to object.
             # TODO: don't special-case td64
-            return self.astype(object).fillna(value, limit, inplace, downcast)
+            return self.astype(_dtype_obj).fillna(value, limit, inplace, downcast)
 
         values = self.values
         values = values if inplace else values.copy()

From 86f29b3b1652d6df04ca20c3a672b288b68e4b38 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 29 Sep 2021 20:31:13 -0700
Subject: [PATCH 5/7] 32bit fixup

---
 pandas/core/groupby/ops.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 1cde1da731837..ece704d98046e 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -807,7 +807,7 @@ def is_monotonic(self) -> bool:
         return Index(self.group_info[0]).is_monotonic
 
     @cache_readonly
-    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64], int]:
+    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
         comp_ids, obs_group_ids = self._get_compressed_codes()
 
         ngroups = len(obs_group_ids)
@@ -829,14 +829,14 @@ def codes_info(self) -> npt.NDArray[np.intp]:
         return ids
 
     @final
-    def _get_compressed_codes(self) -> tuple[np.ndarray, npt.NDArray[np.int64]]:
+    def _get_compressed_codes(self) -> tuple[np.ndarray, npt.NDArray[np.intp]]:
         # The first returned ndarray may have any signed integer dtype
         if len(self.groupings) > 1:
             group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
             return compress_group_index(group_index, sort=self._sort)
 
         ping = self.groupings[0]
-        return ping.codes, np.arange(len(ping.group_index), dtype=np.int64)
+        return ping.codes, np.arange(len(ping.group_index), dtype=np.intp)
 
     @final
     @cache_readonly
@@ -1105,9 +1105,9 @@ def indices(self):
         return indices
 
     @cache_readonly
-    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64], int]:
+    def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
         ngroups = self.ngroups
-        obs_group_ids = np.arange(ngroups, dtype=np.int64)
+        obs_group_ids = np.arange(ngroups, dtype=np.intp)
         rep = np.diff(np.r_[0, self.bins])
 
         rep = ensure_platform_int(rep)

From d3e4e7874003fad5817db5fdb7047ef2d3674902 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 30 Sep 2021 09:02:07 -0700
Subject: [PATCH 6/7] win32 compat

---
 pandas/tests/groupby/test_grouping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 527b93a28359c..394352036e50a 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -688,7 +688,7 @@ def test_groupby_empty(self):
         )
 
         tm.assert_numpy_array_equal(
-            gr.grouper.group_info[1], np.array([], dtype=np.dtype("int"))
+            gr.grouper.group_info[1], np.array([], dtype=np.dtype(np.int64))
         )
 
         assert gr.grouper.group_info[2] == 0

From 3f76c43bbaf5c45d81849a7a5b1cd61c71fc2563 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 30 Sep 2021 11:09:10 -0700
Subject: [PATCH 7/7] 32bit fixup

---
 pandas/tests/groupby/test_grouping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 394352036e50a..efb0b82f58e97 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -688,7 +688,7 @@ def test_groupby_empty(self):
         )
 
         tm.assert_numpy_array_equal(
-            gr.grouper.group_info[1], np.array([], dtype=np.dtype(np.int64))
+            gr.grouper.group_info[1], np.array([], dtype=np.dtype(np.intp))
         )
 
         assert gr.grouper.group_info[2] == 0