Skip to content

TYP: selection and groups type-hinting in groupby #36643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 7, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np

import pandas._libs.lib as lib
from pandas._typing import IndexLabel
from pandas.compat import PYPY
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
Expand Down Expand Up @@ -135,7 +136,7 @@ class SelectionMixin:
object sub-classes need to define: obj, exclusions
"""

_selection = None
_selection: Optional[IndexLabel] = None
_internal_names = ["_cache", "__setstate__"]
_internal_names_set = set(_internal_names)

Expand Down
20 changes: 14 additions & 6 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ class providing the base-class of operations.

from pandas._libs import Timestamp, lib
import pandas._libs.groupby as libgroupby
from pandas._typing import F, FrameOrSeries, FrameOrSeriesUnion, Label, Scalar
from pandas._typing import (
F,
FrameOrSeries,
FrameOrSeriesUnion,
IndexLabel,
Label,
Scalar,
)
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
Expand Down Expand Up @@ -68,6 +75,8 @@ class providing the base-class of operations.
from pandas.core.sorting import get_group_index_sorter
from pandas.core.util.numba_ import NUMBA_FUNC_CACHE

from pandas.io.formats.printing import PrettyDict

_common_see_also = """
See Also
--------
Expand Down Expand Up @@ -487,10 +496,10 @@ def __init__(
obj: FrameOrSeries,
keys: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
level: Optional[IndexLabel] = None,
grouper: Optional["ops.BaseGrouper"] = None,
exclusions: Optional[Set[Label]] = None,
selection=None,
selection: Optional[IndexLabel] = None,
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
Expand All @@ -499,7 +508,6 @@ def __init__(
mutated: bool = False,
dropna: bool = True,
):

self._selection = selection

assert isinstance(obj, NDFrame), type(obj)
Expand Down Expand Up @@ -547,15 +555,15 @@ def __repr__(self) -> str:
# TODO: Better repr for GroupBy object
return object.__repr__(self)

def _assure_grouper(self):
def _assure_grouper(self) -> None:
"""
We create the grouper on instantiation sub-classes may have a
different policy.
"""
pass

@property
def groups(self):
def groups(self) -> PrettyDict[Hashable, np.ndarray]:
"""
Dict {group name -> group labels}.
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Provide user facing operators for doing the split part of the
split-apply-combine paradigm.
"""
from typing import Dict, Hashable, List, Optional, Set, Tuple
from typing import Hashable, List, Optional, Set, Tuple
import warnings

import numpy as np
Expand All @@ -29,7 +29,7 @@
from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex
from pandas.core.series import Series

from pandas.io.formats.printing import pprint_thing
from pandas.io.formats.printing import PrettyDict, pprint_thing


class Grouper:
Expand Down Expand Up @@ -600,7 +600,7 @@ def _make_codes(self) -> None:
self._group_index = uniques

@cache_readonly
def groups(self) -> Dict[Hashable, np.ndarray]:
def groups(self) -> PrettyDict[Hashable, np.ndarray]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should just be Dict - while literally correct we type our API with as permissive of a type as possible for arguments, so I think should do the same on return values as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed on arguments, especially for user-facing methods. But does that also apply to a return, where the result is known to be a PrettyDict? In general I try to be as specific with types as possible; the more specific one can be the more issues mypy can check. I also find the specificity helps with readability.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem arises when users start using the return type in their own type checking, particularly when things are invariant. For all practical purposes (outside of the repr) this returns a Dict, so I think better downstream to label as such

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation, makes sense. To add to your reasons, I think PrettyDict is also not a public part of the public API. With this, it also shouldn't be leaked out via type-hints.

return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))


Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""

import collections
from typing import List, Optional, Sequence, Tuple, Type
from typing import Hashable, List, Optional, Sequence, Tuple, Type

import numpy as np

Expand Down Expand Up @@ -56,6 +56,8 @@
get_indexer_dict,
)

from pandas.io.formats.printing import PrettyDict


class BaseGrouper:
"""
Expand Down Expand Up @@ -246,7 +248,7 @@ def size(self) -> Series:
return Series(out, index=self.result_index, dtype="int64")

@cache_readonly
def groups(self):
def groups(self) -> PrettyDict[Hashable, np.ndarray]:
""" dict {group name -> group labels} """
if len(self.groupings) == 1:
return self.groupings[0].groups
Expand Down