-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: annotations in core.groupby #35939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
4c5eddd
c632c9f
9e64be3
42649fb
47121dd
1decb3e
57c5dd3
a358463
ffa7ad7
e5e98d4
408db5a
d3493cf
75a805a
9f61070
2d10f6e
3e20187
e27d07f
c52bed4
5f71a05
f832baa
bde6dac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,7 +83,7 @@ | |
from pandas.plotting import boxplot_frame_groupby | ||
|
||
if TYPE_CHECKING: | ||
from pandas.core.internals import Block | ||
from pandas.core.internals import Block # noqa:F401 | ||
|
||
|
||
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) | ||
|
@@ -1595,7 +1595,7 @@ def _gotitem(self, key, ndim: int, subset=None): | |
Parameters | ||
---------- | ||
key : string / list of selections | ||
ndim : 1,2 | ||
ndim : {1, 2} | ||
requested ndim of result | ||
subset : object, default None | ||
subset to act on | ||
|
@@ -1621,7 +1621,7 @@ def _gotitem(self, key, ndim: int, subset=None): | |
|
||
raise AssertionError("invalid ndim for _gotitem") | ||
|
||
def _wrap_frame_output(self, result, obj) -> DataFrame: | ||
def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame: | ||
result_index = self.grouper.levels[0] | ||
|
||
if self.axis == 0: | ||
|
@@ -1638,7 +1638,7 @@ def _get_data_to_aggregate(self) -> BlockManager: | |
else: | ||
return obj._mgr | ||
|
||
def _insert_inaxis_grouper_inplace(self, result): | ||
def _insert_inaxis_grouper_inplace(self, result: DataFrame): | ||
# zip in reverse so we can always insert at loc 0 | ||
izip = zip( | ||
*map( | ||
|
@@ -1716,7 +1716,7 @@ def _wrap_transformed_output( | |
|
||
return result | ||
|
||
def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame: | ||
def _wrap_agged_blocks(self, blocks: Sequence["Block"], items: Index) -> DataFrame: | ||
if not self.as_index: | ||
index = np.arange(blocks[0].values.shape[-1]) | ||
mgr = BlockManager(blocks, axes=[items, index]) | ||
|
@@ -1743,7 +1743,7 @@ def _iterate_column_groupbys(self): | |
exclusions=self.exclusions, | ||
) | ||
|
||
def _apply_to_column_groupbys(self, func): | ||
def _apply_to_column_groupbys(self, func) -> DataFrame: | ||
from pandas.core.reshape.concat import concat | ||
|
||
return concat( | ||
|
@@ -1752,7 +1752,7 @@ def _apply_to_column_groupbys(self, func): | |
axis=1, | ||
) | ||
|
||
def count(self): | ||
def count(self) -> DataFrame: | ||
""" | ||
Compute count of group, excluding missing values. | ||
|
||
|
@@ -1782,7 +1782,7 @@ def count(self): | |
|
||
return self._reindex_output(result, fill_value=0) | ||
|
||
def nunique(self, dropna: bool = True): | ||
def nunique(self, dropna: bool = True) -> DataFrame: | ||
""" | ||
Return DataFrame with counts of unique elements in each position. | ||
|
||
|
@@ -1848,6 +1848,7 @@ def nunique(self, dropna: bool = True): | |
], | ||
axis=1, | ||
) | ||
assert isinstance(results, DataFrame) # for mypy | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you use cast here, then once we adopt Literal and concat can be overloaded on axis value, then with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. neat, will do |
||
|
||
if axis_number == 1: | ||
results = results.T | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -459,7 +459,7 @@ def f(self): | |
|
||
|
||
@contextmanager | ||
def _group_selection_context(groupby): | ||
def _group_selection_context(groupby: "_GroupBy"): | ||
""" | ||
Set / reset the _group_selection_context. | ||
""" | ||
|
@@ -489,7 +489,7 @@ def __init__( | |
keys: Optional[_KeysArgType] = None, | ||
axis: int = 0, | ||
level=None, | ||
grouper: "Optional[ops.BaseGrouper]" = None, | ||
grouper: Optional["ops.BaseGrouper"] = None, | ||
exclusions=None, | ||
selection=None, | ||
as_index: bool = True, | ||
|
@@ -734,7 +734,7 @@ def pipe(self, func, *args, **kwargs): | |
|
||
plot = property(GroupByPlot) | ||
|
||
def _make_wrapper(self, name): | ||
def _make_wrapper(self, name: str) -> Callable: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add type parameters for Callable (if you can) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its going to end up being There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
assert name in self._apply_allowlist | ||
|
||
with _group_selection_context(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -568,7 +568,9 @@ def codes(self) -> np.ndarray: | |
@cache_readonly | ||
def result_index(self) -> Index: | ||
if self.all_grouper is not None: | ||
return recode_from_groupby(self.all_grouper, self.sort, self.group_index) | ||
group_idx = self.group_index | ||
assert isinstance(group_idx, CategoricalIndex) # set in __init__ | ||
return recode_from_groupby(self.all_grouper, self.sort, group_idx) | ||
Comment on lines
+571
to
+573
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed. are the type annotations for in __init__
or
so self._group_index can only be CategoricalIndex or None and group_index can only be CategoricalIndex ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC its the self.all_grouper check a few lines up that ensures we have a CategoricalIndex here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. self.all_grouper is not None when is_categorical_dtype(self.grouper) so I don't think that narrows it. (but I may need to trace through further) |
||
return self.group_index | ||
|
||
@property | ||
|
@@ -607,7 +609,7 @@ def get_grouper( | |
mutated: bool = False, | ||
validate: bool = True, | ||
dropna: bool = True, | ||
) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]": | ||
) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]: | ||
""" | ||
Create and return a BaseGrouper, which is an internal | ||
mapping of how to create the grouper indexers. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for functions other than __init__ you can add None as the return type.