Skip to content

Commit 9909acb

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
TYP: groupby, sorting (pandas-dev#46133)
1 parent 5ddca85 commit 9909acb

File tree

7 files changed

+34
-18
lines changed

7 files changed

+34
-18
lines changed

pandas/core/groupby/grouper.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ class Grouping:
459459
* groups : dict of {group -> label_list}
460460
"""
461461

462-
_codes: np.ndarray | None = None
462+
_codes: npt.NDArray[np.signedinteger] | None = None
463463
_group_index: Index | None = None
464464
_passed_categorical: bool
465465
_all_grouper: Categorical | None
@@ -614,7 +614,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
614614
return values._reverse_indexer()
615615

616616
@property
617-
def codes(self) -> np.ndarray:
617+
def codes(self) -> npt.NDArray[np.signedinteger]:
618618
if self._codes is not None:
619619
# _codes is set in __init__ for MultiIndex cases
620620
return self._codes
@@ -657,7 +657,7 @@ def group_index(self) -> Index:
657657
return Index._with_infer(uniques, name=self.name)
658658

659659
@cache_readonly
660-
def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
660+
def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
661661
if self._passed_categorical:
662662
# we make a CategoricalIndex out of the cat grouper
663663
# preserving the categories / ordered attributes

pandas/core/groupby/ops.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
797797

798798
@final
799799
@property
800-
def codes(self) -> list[np.ndarray]:
800+
def codes(self) -> list[npt.NDArray[np.signedinteger]]:
801801
return [ping.codes for ping in self.groupings]
802802

803803
@property
@@ -860,11 +860,14 @@ def codes_info(self) -> npt.NDArray[np.intp]:
860860
return ids
861861

862862
@final
863-
def _get_compressed_codes(self) -> tuple[np.ndarray, npt.NDArray[np.intp]]:
863+
def _get_compressed_codes(
864+
self,
865+
) -> tuple[npt.NDArray[np.signedinteger], npt.NDArray[np.intp]]:
864866
# The first returned ndarray may have any signed integer dtype
865867
if len(self.groupings) > 1:
866868
group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
867869
return compress_group_index(group_index, sort=self._sort)
870+
# FIXME: compress_group_index's second return value is int64, not intp
868871

869872
ping = self.groupings[0]
870873
return ping.codes, np.arange(len(ping.group_index), dtype=np.intp)
@@ -875,7 +878,7 @@ def ngroups(self) -> int:
875878
return len(self.result_index)
876879

877880
@property
878-
def reconstructed_codes(self) -> list[np.ndarray]:
881+
def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]:
879882
codes = self.codes
880883
ids, obs_ids, _ = self.group_info
881884
return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2187,7 +2187,9 @@ def _drop_level_numbers(self, levnums: list[int]):
21872187
verify_integrity=False,
21882188
)
21892189

2190-
def _get_grouper_for_level(self, mapper, *, level=None):
2190+
def _get_grouper_for_level(
2191+
self, mapper, *, level=None
2192+
) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:
21912193
"""
21922194
Get index grouper corresponding to an index level
21932195

pandas/core/indexes/multi.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1481,7 +1481,9 @@ def _set_names(self, names, *, level=None, validate: bool = True):
14811481
# --------------------------------------------------------------------
14821482

14831483
@doc(Index._get_grouper_for_level)
1484-
def _get_grouper_for_level(self, mapper, *, level):
1484+
def _get_grouper_for_level(
1485+
self, mapper, *, level=None
1486+
) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]:
14851487
indexer = self.codes[level]
14861488
level_index = self.levels[level]
14871489

pandas/core/reshape/reshape.py

-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ def _make_selectors(self):
185185

186186
self.group_index = comp_index
187187
self.mask = mask
188-
self.unique_groups = obs_ids
189188
self.compressor = comp_index.searchsorted(np.arange(ngroups))
190189

191190
@cache_readonly

pandas/core/sorting.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,9 @@ def is_int64_overflow_possible(shape: Shape) -> bool:
224224
return the_prod >= lib.i8max
225225

226226

227-
def decons_group_index(comp_labels, shape: Shape):
227+
def _decons_group_index(
228+
comp_labels: npt.NDArray[np.intp], shape: Shape
229+
) -> list[npt.NDArray[np.intp]]:
228230
# reconstruct labels
229231
if is_int64_overflow_possible(shape):
230232
# at some point group indices are factorized,
@@ -233,7 +235,7 @@ def decons_group_index(comp_labels, shape: Shape):
233235

234236
label_list = []
235237
factor = 1
236-
y = 0
238+
y = np.array(0)
237239
x = comp_labels
238240
for i in reversed(range(len(shape))):
239241
labels = (x - y) % (factor * shape[i]) // factor
@@ -245,24 +247,32 @@ def decons_group_index(comp_labels, shape: Shape):
245247

246248

247249
def decons_obs_group_ids(
248-
comp_ids: npt.NDArray[np.intp], obs_ids, shape: Shape, labels, xnull: bool
249-
):
250+
comp_ids: npt.NDArray[np.intp],
251+
obs_ids: npt.NDArray[np.intp],
252+
shape: Shape,
253+
labels: Sequence[npt.NDArray[np.signedinteger]],
254+
xnull: bool,
255+
) -> list[npt.NDArray[np.intp]]:
250256
"""
251257
Reconstruct labels from observed group ids.
252258
253259
Parameters
254260
----------
255261
comp_ids : np.ndarray[np.intp]
262+
obs_ids: np.ndarray[np.intp]
263+
shape : tuple[int]
264+
labels : Sequence[np.ndarray[np.signedinteger]]
256265
xnull : bool
257266
If nulls are excluded; i.e. -1 labels are passed through.
258267
"""
259268
if not xnull:
260-
lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")
261-
shape = np.asarray(shape, dtype="i8") + lift
269+
lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp)
270+
arr_shape = np.asarray(shape, dtype=np.intp) + lift
271+
shape = tuple(arr_shape)
262272

263273
if not is_int64_overflow_possible(shape):
264274
# obs ids are deconstructable! take the fast route!
265-
out = decons_group_index(obs_ids, shape)
275+
out = _decons_group_index(obs_ids, shape)
266276
return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)]
267277

268278
indexer = unique_label_indices(comp_ids)

pandas/tests/test_sorting.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pandas.core.algorithms import safe_sort
2323
import pandas.core.common as com
2424
from pandas.core.sorting import (
25-
decons_group_index,
25+
_decons_group_index,
2626
get_group_index,
2727
is_int64_overflow_possible,
2828
lexsort_indexer,
@@ -389,7 +389,7 @@ def align(df):
389389
)
390390
def test_decons(codes_list, shape):
391391
group_index = get_group_index(codes_list, shape, sort=True, xnull=True)
392-
codes_list2 = decons_group_index(group_index, shape)
392+
codes_list2 = _decons_group_index(group_index, shape)
393393

394394
for a, b in zip(codes_list, codes_list2):
395395
tm.assert_numpy_array_equal(a, b)

0 commit comments

Comments
 (0)