Skip to content

TYP: annotations in core.groupby #35939

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4c5eddd
REF: remove unnecesary try/except
jbrockmendel Aug 21, 2020
c632c9f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Aug 21, 2020
9e64be3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Aug 21, 2020
42649fb
TST: add test for agg on ordered categorical cols (#35630)
mathurk1 Aug 21, 2020
47121dd
TST: resample does not yield empty groups (#10603) (#35799)
tkmz-n Aug 21, 2020
1decb3e
revert accidental rebase
jbrockmendel Aug 22, 2020
57c5dd3
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 22, 2020
a358463
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 23, 2020
ffa7ad7
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 23, 2020
e5e98d4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 24, 2020
408db5a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 24, 2020
d3493cf
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
75a805a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
9f61070
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
2d10f6e
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 26, 2020
3e20187
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 26, 2020
e27d07f
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 27, 2020
c52bed4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 27, 2020
5f71a05
TYP: annotations in core.groupby
jbrockmendel Aug 28, 2020
f832baa
Merge branch 'master' of https://github.com/pandas-dev/pandas into an…
jbrockmendel Aug 29, 2020
bde6dac
update per comments
jbrockmendel Aug 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional, Tuple

import numpy as np

from pandas.core.algorithms import unique1d
Expand All @@ -6,9 +8,12 @@
CategoricalDtype,
recode_for_categories,
)
from pandas.core.indexes.api import CategoricalIndex


def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
def recode_for_groupby(
c: Categorical, sort: bool, observed: bool
) -> Tuple[Categorical, Optional[Categorical]]:
"""
Code the categories to ensure we can groupby for categoricals.

Expand Down Expand Up @@ -73,7 +78,9 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
return c.reorder_categories(cat.categories), None


def recode_from_groupby(c: Categorical, sort: bool, ci):
def recode_from_groupby(
c: Categorical, sort: bool, ci: CategoricalIndex
) -> CategoricalIndex:
"""
Reverse the codes_to_groupby to account for sort / observed.

Expand Down
17 changes: 9 additions & 8 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
from pandas.plotting import boxplot_frame_groupby

if TYPE_CHECKING:
from pandas.core.internals import Block
from pandas.core.internals import Block # noqa:F401


NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
Expand Down Expand Up @@ -1595,7 +1595,7 @@ def _gotitem(self, key, ndim: int, subset=None):
Parameters
----------
key : string / list of selections
ndim : 1,2
ndim : {1, 2}
requested ndim of result
subset : object, default None
subset to act on
Expand All @@ -1621,7 +1621,7 @@ def _gotitem(self, key, ndim: int, subset=None):

raise AssertionError("invalid ndim for _gotitem")

def _wrap_frame_output(self, result, obj) -> DataFrame:
def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame:
result_index = self.grouper.levels[0]

if self.axis == 0:
Expand All @@ -1638,7 +1638,7 @@ def _get_data_to_aggregate(self) -> BlockManager:
else:
return obj._mgr

def _insert_inaxis_grouper_inplace(self, result):
def _insert_inaxis_grouper_inplace(self, result: DataFrame):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for functions other than __init__ you can add None as the return type.

# zip in reverse so we can always insert at loc 0
izip = zip(
*map(
Expand Down Expand Up @@ -1716,7 +1716,7 @@ def _wrap_transformed_output(

return result

def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame:
def _wrap_agged_blocks(self, blocks: Sequence["Block"], items: Index) -> DataFrame:
if not self.as_index:
index = np.arange(blocks[0].values.shape[-1])
mgr = BlockManager(blocks, axes=[items, index])
Expand All @@ -1743,7 +1743,7 @@ def _iterate_column_groupbys(self):
exclusions=self.exclusions,
)

def _apply_to_column_groupbys(self, func):
def _apply_to_column_groupbys(self, func) -> DataFrame:
from pandas.core.reshape.concat import concat

return concat(
Expand All @@ -1752,7 +1752,7 @@ def _apply_to_column_groupbys(self, func):
axis=1,
)

def count(self):
def count(self) -> DataFrame:
"""
Compute count of group, excluding missing values.

Expand Down Expand Up @@ -1782,7 +1782,7 @@ def count(self):

return self._reindex_output(result, fill_value=0)

def nunique(self, dropna: bool = True):
def nunique(self, dropna: bool = True) -> DataFrame:
"""
Return DataFrame with counts of unique elements in each position.

Expand Down Expand Up @@ -1848,6 +1848,7 @@ def nunique(self, dropna: bool = True):
],
axis=1,
)
assert isinstance(results, DataFrame) # for mypy
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you use cast here, then once we adopt Literal and concat can be overloaded on axis value, then with warn_redundant_casts we can remove

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

neat, will do


if axis_number == 1:
results = results.T
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def f(self):


@contextmanager
def _group_selection_context(groupby):
def _group_selection_context(groupby: "_GroupBy"):
"""
Set / reset the _group_selection_context.
"""
Expand Down Expand Up @@ -489,7 +489,7 @@ def __init__(
keys: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
grouper: "Optional[ops.BaseGrouper]" = None,
grouper: Optional["ops.BaseGrouper"] = None,
exclusions=None,
selection=None,
as_index: bool = True,
Expand Down Expand Up @@ -734,7 +734,7 @@ def pipe(self, func, *args, **kwargs):

plot = property(GroupByPlot)

def _make_wrapper(self, name):
def _make_wrapper(self, name: str) -> Callable:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add type parameters for Callable (if you can)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its going to end up being wrapper(self, *args, **kwargs) -> FrameOrSeriesUnion. Is that Callable[..., FrameOrSeriesUnion]?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

assert name in self._apply_allowlist

with _group_selection_context(self):
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,9 @@ def codes(self) -> np.ndarray:
@cache_readonly
def result_index(self) -> Index:
if self.all_grouper is not None:
return recode_from_groupby(self.all_grouper, self.sort, self.group_index)
group_idx = self.group_index
assert isinstance(group_idx, CategoricalIndex) # set in __init__
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
Comment on lines +571 to +573
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed.

are the type annotations for def group_index(self) -> Index and _group_index: Optional[Index] = None correct.

in __init__

self._group_index = CategoricalIndex(..

or

(
                self.grouper,
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, level)

so self._group_index can only be CategoricalIndex or None and group_index can only be CategoricalIndex ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC its the self.all_grouper check a few lines up that ensures we have a CategoricalIndex here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.all_grouper is not None when is_categorical_dtype(self.grouper) so I don't think that narrows it. (but I may need to trace through further)

return self.group_index

@property
Expand Down Expand Up @@ -607,7 +609,7 @@ def get_grouper(
mutated: bool = False,
validate: bool = True,
dropna: bool = True,
) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]":
) -> Tuple["ops.BaseGrouper", List[Hashable], FrameOrSeries]:
"""
Create and return a BaseGrouper, which is an internal
mapping of how to create the grouper indexers.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class BaseGrouper:
def __init__(
self,
axis: Index,
groupings: "Sequence[grouper.Grouping]",
groupings: Sequence["grouper.Grouping"],
sort: bool = True,
group_keys: bool = True,
mutated: bool = False,
Expand Down