Skip to content

Commit 132e191

Browse files
authored
TYP: annotations in core.groupby (#35939)
1 parent 0e621bc commit 132e191

File tree

5 files changed

+35
-29
lines changed

5 files changed

+35
-29
lines changed

pandas/core/groupby/categorical.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Optional, Tuple
2+
13
import numpy as np
24

35
from pandas.core.algorithms import unique1d
@@ -6,9 +8,12 @@
68
CategoricalDtype,
79
recode_for_categories,
810
)
11+
from pandas.core.indexes.api import CategoricalIndex
912

1013

11-
def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
14+
def recode_for_groupby(
15+
c: Categorical, sort: bool, observed: bool
16+
) -> Tuple[Categorical, Optional[Categorical]]:
1217
"""
1318
Code the categories to ensure we can groupby for categoricals.
1419
@@ -73,7 +78,9 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
7378
return c.reorder_categories(cat.categories), None
7479

7580

76-
def recode_from_groupby(c: Categorical, sort: bool, ci):
81+
def recode_from_groupby(
82+
c: Categorical, sort: bool, ci: CategoricalIndex
83+
) -> CategoricalIndex:
7784
"""
7885
Reverse the codes_to_groupby to account for sort / observed.
7986
@@ -91,7 +98,8 @@ def recode_from_groupby(c: Categorical, sort: bool, ci):
9198
"""
9299
# we re-order to the original category orderings
93100
if sort:
94-
return ci.set_categories(c.categories)
101+
return ci.set_categories(c.categories) # type: ignore [attr-defined]
95102

96103
# we are not sorting, so add unobserved to the end
97-
return ci.add_categories(c.categories[~c.categories.isin(ci.categories)])
104+
new_cats = c.categories[~c.categories.isin(ci.categories)]
105+
return ci.add_categories(new_cats) # type: ignore [attr-defined]

pandas/core/groupby/generic.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
Type,
2424
TypeVar,
2525
Union,
26+
cast,
2627
)
2728
import warnings
2829

@@ -83,7 +84,7 @@
8384
from pandas.plotting import boxplot_frame_groupby
8485

8586
if TYPE_CHECKING:
86-
from pandas.core.internals import Block
87+
from pandas.core.internals import Block # noqa:F401
8788

8889

8990
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
@@ -1591,7 +1592,7 @@ def _gotitem(self, key, ndim: int, subset=None):
15911592
Parameters
15921593
----------
15931594
key : string / list of selections
1594-
ndim : 1,2
1595+
ndim : {1, 2}
15951596
requested ndim of result
15961597
subset : object, default None
15971598
subset to act on
@@ -1617,7 +1618,7 @@ def _gotitem(self, key, ndim: int, subset=None):
16171618

16181619
raise AssertionError("invalid ndim for _gotitem")
16191620

1620-
def _wrap_frame_output(self, result, obj) -> DataFrame:
1621+
def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame:
16211622
result_index = self.grouper.levels[0]
16221623

16231624
if self.axis == 0:
@@ -1634,20 +1635,14 @@ def _get_data_to_aggregate(self) -> BlockManager:
16341635
else:
16351636
return obj._mgr
16361637

1637-
def _insert_inaxis_grouper_inplace(self, result):
1638+
def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None:
16381639
# zip in reverse so we can always insert at loc 0
1639-
izip = zip(
1640-
*map(
1641-
reversed,
1642-
(
1643-
self.grouper.names,
1644-
self.grouper.get_group_levels(),
1645-
[grp.in_axis for grp in self.grouper.groupings],
1646-
),
1647-
)
1648-
)
16491640
columns = result.columns
1650-
for name, lev, in_axis in izip:
1641+
for name, lev, in_axis in zip(
1642+
reversed(self.grouper.names),
1643+
reversed(self.grouper.get_group_levels()),
1644+
reversed([grp.in_axis for grp in self.grouper.groupings]),
1645+
):
16511646
# GH #28549
16521647
# When using .apply(-), name will be in columns already
16531648
if in_axis and name not in columns:
@@ -1712,7 +1707,7 @@ def _wrap_transformed_output(
17121707

17131708
return result
17141709

1715-
def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame:
1710+
def _wrap_agged_blocks(self, blocks: Sequence["Block"], items: Index) -> DataFrame:
17161711
if not self.as_index:
17171712
index = np.arange(blocks[0].values.shape[-1])
17181713
mgr = BlockManager(blocks, axes=[items, index])
@@ -1739,7 +1734,7 @@ def _iterate_column_groupbys(self):
17391734
exclusions=self.exclusions,
17401735
)
17411736

1742-
def _apply_to_column_groupbys(self, func):
1737+
def _apply_to_column_groupbys(self, func) -> DataFrame:
17431738
from pandas.core.reshape.concat import concat
17441739

17451740
return concat(
@@ -1748,7 +1743,7 @@ def _apply_to_column_groupbys(self, func):
17481743
axis=1,
17491744
)
17501745

1751-
def count(self):
1746+
def count(self) -> DataFrame:
17521747
"""
17531748
Compute count of group, excluding missing values.
17541749
@@ -1778,7 +1773,7 @@ def count(self):
17781773

17791774
return self._reindex_output(result, fill_value=0)
17801775

1781-
def nunique(self, dropna: bool = True):
1776+
def nunique(self, dropna: bool = True) -> DataFrame:
17821777
"""
17831778
Return DataFrame with counts of unique elements in each position.
17841779
@@ -1844,6 +1839,7 @@ def nunique(self, dropna: bool = True):
18441839
],
18451840
axis=1,
18461841
)
1842+
results = cast(DataFrame, results)
18471843

18481844
if axis_number == 1:
18491845
results = results.T

pandas/core/groupby/groupby.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def f(self):
459459

460460

461461
@contextmanager
462-
def _group_selection_context(groupby):
462+
def _group_selection_context(groupby: "_GroupBy"):
463463
"""
464464
Set / reset the _group_selection_context.
465465
"""
@@ -489,7 +489,7 @@ def __init__(
489489
keys: Optional[_KeysArgType] = None,
490490
axis: int = 0,
491491
level=None,
492-
grouper: "Optional[ops.BaseGrouper]" = None,
492+
grouper: Optional["ops.BaseGrouper"] = None,
493493
exclusions=None,
494494
selection=None,
495495
as_index: bool = True,
@@ -734,7 +734,7 @@ def pipe(self, func, *args, **kwargs):
734734

735735
plot = property(GroupByPlot)
736736

737-
def _make_wrapper(self, name):
737+
def _make_wrapper(self, name: str) -> Callable:
738738
assert name in self._apply_allowlist
739739

740740
with _group_selection_context(self):

pandas/core/groupby/grouper.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,9 @@ def codes(self) -> np.ndarray:
568568
@cache_readonly
569569
def result_index(self) -> Index:
570570
if self.all_grouper is not None:
571-
return recode_from_groupby(self.all_grouper, self.sort, self.group_index)
571+
group_idx = self.group_index
572+
assert isinstance(group_idx, CategoricalIndex) # set in __init__
573+
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
572574
return self.group_index
573575

574576
@property
@@ -607,7 +609,7 @@ def get_grouper(
607609
mutated: bool = False,
608610
validate: bool = True,
609611
dropna: bool = True,
610-
) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]":
612+
) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]:
611613
"""
612614
Create and return a BaseGrouper, which is an internal
613615
mapping of how to create the grouper indexers.

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class BaseGrouper:
8282
def __init__(
8383
self,
8484
axis: Index,
85-
groupings: "Sequence[grouper.Grouping]",
85+
groupings: Sequence["grouper.Grouping"],
8686
sort: bool = True,
8787
group_keys: bool = True,
8888
mutated: bool = False,

0 commit comments

Comments
 (0)