Skip to content

REF: remove name arg from Grouping #41426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 38 additions & 24 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,15 +446,14 @@ def __init__(
index: Index,
grouper=None,
obj: FrameOrSeries | None = None,
name: Hashable = None,
level=None,
sort: bool = True,
observed: bool = False,
in_axis: bool = False,
dropna: bool = True,
):
self.name = name
self.level = level
self._orig_grouper = grouper
self.grouper = _convert_grouper(index, grouper)
self.all_grouper = None
self.index = index
Expand All @@ -466,18 +465,11 @@ def __init__(

self._passed_categorical = False

# right place for this?
if isinstance(grouper, (Series, Index)) and name is None:
self.name = grouper.name

# we have a single grouper which may be a myriad of things,
# some of which are dependent on the passing in level

ilevel = self._ilevel
if ilevel is not None:
if self.name is None:
self.name = index.names[ilevel]

(
self.grouper, # Index
self._codes,
Expand All @@ -491,16 +483,22 @@ def __init__(
# what key/level refer to exactly, don't need to
# check again as we have by this point converted these
# to an actual value (rather than a pd.Grouper)
_, grouper, _ = self.grouper._get_grouper(
_, newgrouper, newobj = self.grouper._get_grouper(
# error: Value of type variable "FrameOrSeries" of "_get_grouper"
# of "Grouper" cannot be "Optional[FrameOrSeries]"
self.obj, # type: ignore[type-var]
validate=False,
)
if self.name is None:
self.name = grouper.result_index.name
self.obj = self.grouper.obj
self.grouper = grouper._get_grouper()
self.obj = newobj

ng = newgrouper._get_grouper()
if isinstance(newgrouper, ops.BinGrouper):
# in this case we have `ng is newgrouper`
self.grouper = ng
else:
# ops.BaseGrouper
# use Index instead of ndarray so we can recover the name
self.grouper = Index(ng, name=newgrouper.result_index.name)

else:
# a passed Categorical
Expand All @@ -511,10 +509,6 @@ def __init__(
self.grouper, self.sort, observed
)

# we are done
elif isinstance(self.grouper, Grouping):
self.grouper = self.grouper.grouper

# no level passed
elif not isinstance(
self.grouper, (Series, Index, ExtensionArray, np.ndarray)
Expand Down Expand Up @@ -546,6 +540,23 @@ def __repr__(self) -> str:
def __iter__(self):
return iter(self.indices)

@cache_readonly
def name(self) -> Hashable:
ilevel = self._ilevel
if ilevel is not None:
return self.index.names[ilevel]

if isinstance(self._orig_grouper, (Index, Series)):
return self._orig_grouper.name

elif isinstance(self.grouper, ops.BaseGrouper):
return self.grouper.result_index.name

elif isinstance(self.grouper, Index):
return self.grouper.name

return None

@cache_readonly
def _ilevel(self) -> int | None:
"""
Expand Down Expand Up @@ -814,25 +825,29 @@ def is_in_obj(gpr) -> bool:
for gpr, level in zip(keys, levels):

if is_in_obj(gpr): # df.groupby(df['name'])
in_axis, name = True, gpr.name
exclusions.add(name)
in_axis = True
exclusions.add(gpr.name)

elif is_in_axis(gpr): # df.groupby('name')
if gpr in obj:
if validate:
obj._check_label_or_level_ambiguity(gpr, axis=axis)
in_axis, name, gpr = True, gpr, obj[gpr]
if gpr.ndim != 1:
# non-unique columns; raise here to get the name in the
# exception message
raise ValueError(f"Grouper for '{name}' not 1-dimensional")
exclusions.add(name)
elif obj._is_level_reference(gpr, axis=axis):
in_axis, name, level, gpr = False, None, gpr, None
in_axis, level, gpr = False, gpr, None
else:
raise KeyError(gpr)
elif isinstance(gpr, Grouper) and gpr.key is not None:
# Add key to exclusions
exclusions.add(gpr.key)
in_axis, name = False, None
in_axis = False
else:
in_axis, name = False, None
in_axis = False

if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
raise ValueError(
Expand All @@ -847,7 +862,6 @@ def is_in_obj(gpr) -> bool:
group_axis,
gpr,
obj=obj,
name=name,
level=level,
sort=sort,
observed=observed,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ def names(self) -> list[Hashable]:
@property
def groupings(self) -> list[grouper.Grouping]:
lev = self.binlabels
ping = grouper.Grouping(lev, lev, in_axis=False, level=None, name=lev.name)
ping = grouper.Grouping(lev, lev, in_axis=False, level=None)
return [ping]

def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray:
Expand Down