Skip to content

CLN: type annotations in groupby.grouper, groupby.ops #29456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 13, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
def ax(self):
return self.grouper

def _get_grouper(self, obj, validate=True):
def _get_grouper(self, obj, validate: bool = True):
"""
Parameters
----------
Expand All @@ -144,17 +144,18 @@ def _get_grouper(self, obj, validate=True):
)
return self.binner, self.grouper, self.obj

def _set_grouper(self, obj, sort=False):
def _set_grouper(self, obj: NDFrame, sort: bool = False):
"""
given an object and the specifications, setup the internal grouper
for this particular specification

Parameters
----------
obj : the subject object
obj : Series or DataFrame
sort : bool, default False
whether the resulting grouper should be sorted
"""
assert obj is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for my own benefit in trying to reason about this code.


if self.key is not None and self.level is not None:
raise ValueError("The Grouper cannot specify both a key and a level!")
Expand Down Expand Up @@ -210,15 +211,15 @@ def _set_grouper(self, obj, sort=False):
def groups(self):
return self.grouper.groups

def __repr__(self):
def __repr__(self) -> str:
attrs_list = (
"{}={!r}".format(attr_name, getattr(self, attr_name))
"{name}={val!r}".format(name=attr_name, val=getattr(self, attr_name))
for attr_name in self._attributes
if getattr(self, attr_name) is not None
)
attrs = ", ".join(attrs_list)
cls_name = self.__class__.__name__
return "{}({})".format(cls_name, attrs)
return "{cls}({attrs})".format(cls=cls_name, attrs=attrs)


class Grouping:
Expand Down Expand Up @@ -372,8 +373,8 @@ def __init__(

self.grouper = self.grouper.astype("timedelta64[ns]")

def __repr__(self):
return "Grouping({0})".format(self.name)
def __repr__(self) -> str:
return "Grouping({name})".format(name=self.name)

def __iter__(self):
return iter(self.indices)
Expand Down Expand Up @@ -434,10 +435,10 @@ def _get_grouper(
key=None,
axis: int = 0,
level=None,
sort=True,
observed=False,
mutated=False,
validate=True,
sort: bool = True,
observed: bool = False,
mutated: bool = False,
validate: bool = True,
):
"""
create and return a BaseGrouper, which is an internal
Expand Down Expand Up @@ -672,7 +673,7 @@ def is_in_obj(gpr) -> bool:
return grouper, exclusions, obj


def _is_label_like(val):
def _is_label_like(val) -> bool:
return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))


Expand Down
46 changes: 25 additions & 21 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(

self._filter_empty_groups = self.compressed = len(groupings) != 1
self.axis = axis
self.groupings = groupings # type: Sequence[grouper.Grouping]
self.groupings = list(groupings) # type: List[grouper.Grouping]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this necessary? I think Sequence should have been allowable

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was an attempt to troubleshoot a (still-failing, mentioned in OP) complaint about a mismatch in the groupings annotations.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins any thoughts on why mypy is still complaining about groupings?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right about the property and the attribute.

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index cbe012012..46a427434 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -90,12 +90,16 @@ class BaseGrouper:
 
         self._filter_empty_groups = self.compressed = len(groupings) != 1
         self.axis = axis
-        self.groupings = list(groupings)  # type: List[grouper.Grouping]
+        self._groupings = list(groupings)  # type: List[grouper.Grouping]
         self.sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
         self.indexer = indexer
 
+    @property
+    def groupings(self):
+        return self._groupings
+
     @property
     def shape(self):
         return tuple(ping.ngroups for ping in self.groupings)

silences mypy if you're happy with this approach.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems to work, updated

self.sort = sort
self.group_keys = group_keys
self.mutated = mutated
Expand All @@ -106,7 +106,7 @@ def __iter__(self):
def nkeys(self) -> int:
return len(self.groupings)

def get_iterator(self, data, axis=0):
def get_iterator(self, data: NDFrame, axis: int = 0):
"""
Groupby iterator

Expand All @@ -120,7 +120,7 @@ def get_iterator(self, data, axis=0):
for key, (i, group) in zip(keys, splitter):
yield key, group

def _get_splitter(self, data, axis=0):
def _get_splitter(self, data: NDFrame, axis: int = 0) -> "DataSplitter":
comp_ids, _, ngroups = self.group_info
return get_splitter(data, comp_ids, ngroups, axis=axis)

Expand All @@ -142,7 +142,7 @@ def _get_group_keys(self):
# provide "flattened" iterator for multi-group setting
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)

def apply(self, f, data, axis: int = 0):
def apply(self, f, data: NDFrame, axis: int = 0):
mutated = self.mutated
splitter = self._get_splitter(data, axis=axis)
group_keys = self._get_group_keys()
Expand All @@ -157,7 +157,7 @@ def apply(self, f, data, axis: int = 0):

elif (
com.get_callable_name(f) not in base.plotting_methods
and hasattr(splitter, "fast_apply")
and splitter.fast_apply is not None
and axis == 0
# with MultiIndex, apply_frame_axis0 would raise InvalidApply
# TODO: can we make this check prettier?
Expand Down Expand Up @@ -229,8 +229,7 @@ def names(self):

def size(self) -> Series:
"""
Compute group sizes

Compute group sizes.
"""
ids, _, ngroup = self.group_info
ids = ensure_platform_int(ids)
Expand Down Expand Up @@ -292,7 +291,7 @@ def recons_codes(self):
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)

@cache_readonly
def result_index(self):
def result_index(self) -> Index:
if not self.compressed and len(self.groupings) == 1:
return self.groupings[0].result_index.rename(self.names[0])

Expand Down Expand Up @@ -607,7 +606,7 @@ def agg_series(self, obj: Series, func):
raise
return self._aggregate_series_pure_python(obj, func)

def _aggregate_series_fast(self, obj, func):
def _aggregate_series_fast(self, obj: Series, func):
# At this point we have already checked that obj.index is not a MultiIndex
# and that obj is backed by an ndarray, not ExtensionArray
func = self._is_builtin_func(func)
Expand All @@ -623,7 +622,7 @@ def _aggregate_series_fast(self, obj, func):
result, counts = grouper.get_result()
return result, counts

def _aggregate_series_pure_python(self, obj, func):
def _aggregate_series_pure_python(self, obj: Series, func):

group_index, _, ngroups = self.group_info

Expand Down Expand Up @@ -682,7 +681,12 @@ class BinGrouper(BaseGrouper):
"""

def __init__(
self, bins, binlabels, filter_empty=False, mutated=False, indexer=None
self,
bins,
binlabels,
filter_empty: bool = False,
mutated: bool = False,
indexer=None,
):
self.bins = ensure_int64(bins)
self.binlabels = ensure_index(binlabels)
Expand Down Expand Up @@ -783,11 +787,9 @@ def names(self):
return [self.binlabels.name]

@property
def groupings(self):
from pandas.core.groupby.grouper import Grouping

def groupings(self) -> "List[grouper.Grouping]":
return [
Grouping(lvl, lvl, in_axis=False, level=None, name=name)
grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name)
for lvl, name in zip(self.levels, self.names)
]

Expand Down Expand Up @@ -825,7 +827,9 @@ def _is_indexed_like(obj, axes) -> bool:


class DataSplitter:
def __init__(self, data, labels, ngroups, axis: int = 0):
fast_apply = None

def __init__(self, data: NDFrame, labels, ngroups: int, axis: int = 0):
self.data = data
self.labels = ensure_int64(labels)
self.ngroups = ngroups
Expand Down Expand Up @@ -856,15 +860,15 @@ def __iter__(self):
for i, (start, end) in enumerate(zip(starts, ends)):
yield i, self._chop(sdata, slice(start, end))

def _get_sorted_data(self):
def _get_sorted_data(self) -> NDFrame:
return self.data.take(self.sort_idx, axis=self.axis)

def _chop(self, sdata, slice_obj: slice):
def _chop(self, sdata: NDFrame, slice_obj: slice) -> NDFrame:
raise AbstractMethodError(self)


class SeriesSplitter(DataSplitter):
def _chop(self, sdata, slice_obj: slice):
def _chop(self, sdata: Series, slice_obj: slice) -> Series:
return sdata._get_values(slice_obj)


Expand All @@ -876,14 +880,14 @@ def fast_apply(self, f, names):
sdata = self._get_sorted_data()
return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)

def _chop(self, sdata, slice_obj: slice):
def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
if self.axis == 0:
return sdata.iloc[slice_obj]
else:
return sdata._slice(slice_obj, axis=1)


def get_splitter(data: NDFrame, *args, **kwargs):
def get_splitter(data: NDFrame, *args, **kwargs) -> DataSplitter:
if isinstance(data, Series):
klass = SeriesSplitter # type: Type[DataSplitter]
else:
Expand Down