Skip to content

REF: Grouping.grouper -> Grouping.grouping_vector #41532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3064,7 +3064,7 @@ def _reindex_output(

# reindexing only applies to a Categorical grouper
elif not any(
isinstance(ping.grouper, (Categorical, CategoricalIndex))
isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
for ping in groupings
):
return output
Expand Down
66 changes: 34 additions & 32 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def __init__(
):
self.level = level
self._orig_grouper = grouper
self.grouper = _convert_grouper(index, grouper)
self.grouping_vector = _convert_grouper(index, grouper)
self.all_grouper = None
self.index = index
self.sort = sort
Expand All @@ -472,19 +472,19 @@ def __init__(
ilevel = self._ilevel
if ilevel is not None:
(
self.grouper, # Index
self.grouping_vector, # Index
self._codes,
self._group_index,
) = index._get_grouper_for_level(self.grouper, ilevel)
) = index._get_grouper_for_level(self.grouping_vector, ilevel)

# a passed Grouper like, directly get the grouper in the same way
# as single grouper groupby, use the group_info to get codes
elif isinstance(self.grouper, Grouper):
elif isinstance(self.grouping_vector, Grouper):
# get the new grouper; we already have disambiguated
# what key/level refer to exactly, don't need to
# check again as we have by this point converted these
# to an actual value (rather than a pd.Grouper)
_, newgrouper, newobj = self.grouper._get_grouper(
_, newgrouper, newobj = self.grouping_vector._get_grouper(
# error: Value of type variable "FrameOrSeries" of "_get_grouper"
# of "Grouper" cannot be "Optional[FrameOrSeries]"
self.obj, # type: ignore[type-var]
Expand All @@ -495,44 +495,46 @@ def __init__(
ng = newgrouper._get_grouper()
if isinstance(newgrouper, ops.BinGrouper):
# in this case we have `ng is newgrouper`
self.grouper = ng
self.grouping_vector = ng
else:
# ops.BaseGrouper
# use Index instead of ndarray so we can recover the name
self.grouper = Index(ng, name=newgrouper.result_index.name)
self.grouping_vector = Index(ng, name=newgrouper.result_index.name)

elif is_categorical_dtype(self.grouper):
elif is_categorical_dtype(self.grouping_vector):
# a passed Categorical
self._passed_categorical = True

self.grouper, self.all_grouper = recode_for_groupby(
self.grouper, self.sort, observed
self.grouping_vector, self.all_grouper = recode_for_groupby(
self.grouping_vector, self.sort, observed
)

elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)):
elif not isinstance(
self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
):
# no level passed
if getattr(self.grouper, "ndim", 1) != 1:
t = self.name or str(type(self.grouper))
if getattr(self.grouping_vector, "ndim", 1) != 1:
t = self.name or str(type(self.grouping_vector))
raise ValueError(f"Grouper for '{t}' not 1-dimensional")

self.grouper = self.index.map(self.grouper)
self.grouping_vector = self.index.map(self.grouping_vector)

if not (
hasattr(self.grouper, "__len__")
and len(self.grouper) == len(self.index)
hasattr(self.grouping_vector, "__len__")
and len(self.grouping_vector) == len(self.index)
):
grper = pprint_thing(self.grouper)
grper = pprint_thing(self.grouping_vector)
errmsg = (
"Grouper result violates len(labels) == "
f"len(data)\nresult: {grper}"
)
self.grouper = None # Try for sanity
self.grouping_vector = None # Try for sanity
raise AssertionError(errmsg)

if isinstance(self.grouper, np.ndarray):
if isinstance(self.grouping_vector, np.ndarray):
# if we have a date/time-like grouper, make sure that we have
# Timestamps like
self.grouper = sanitize_to_nanoseconds(self.grouper)
self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector)

def __repr__(self) -> str:
return f"Grouping({self.name})"
Expand All @@ -549,11 +551,11 @@ def name(self) -> Hashable:
if isinstance(self._orig_grouper, (Index, Series)):
return self._orig_grouper.name

elif isinstance(self.grouper, ops.BaseGrouper):
return self.grouper.result_index.name
elif isinstance(self.grouping_vector, ops.BaseGrouper):
return self.grouping_vector.result_index.name

elif isinstance(self.grouper, Index):
return self.grouper.name
elif isinstance(self.grouping_vector, Index):
return self.grouping_vector.name

# otherwise we have ndarray or ExtensionArray -> no name
return None
Expand All @@ -580,10 +582,10 @@ def ngroups(self) -> int:
@cache_readonly
def indices(self):
# we have a list of groupers
if isinstance(self.grouper, ops.BaseGrouper):
return self.grouper.indices
if isinstance(self.grouping_vector, ops.BaseGrouper):
return self.grouping_vector.indices

values = Categorical(self.grouper)
values = Categorical(self.grouping_vector)
return values._reverse_indexer()

@property
Expand Down Expand Up @@ -624,7 +626,7 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
if self._passed_categorical:
# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
cat = self.grouper
cat = self.grouping_vector
categories = cat.categories

if self.observed:
Expand All @@ -640,18 +642,18 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
)
return cat.codes, uniques

elif isinstance(self.grouper, ops.BaseGrouper):
elif isinstance(self.grouping_vector, ops.BaseGrouper):
# we have a list of groupers
codes = self.grouper.codes_info
uniques = self.grouper.result_arraylike
codes = self.grouping_vector.codes_info
uniques = self.grouping_vector.result_arraylike
else:
# GH35667, replace dropna=False with na_sentinel=None
if not self.dropna:
na_sentinel = None
else:
na_sentinel = -1
codes, uniques = algorithms.factorize(
self.grouper, sort=self.sort, na_sentinel=na_sentinel
self.grouping_vector, sort=self.sort, na_sentinel=na_sentinel
)
return codes, uniques

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ def _get_grouper(self):
We have a specific method of grouping, so cannot
convert to a Index for our grouper.
"""
return self.groupings[0].grouper
return self.groupings[0].grouping_vector

@final
def _get_group_keys(self):
Expand Down Expand Up @@ -858,7 +858,7 @@ def groups(self) -> dict[Hashable, np.ndarray]:
if len(self.groupings) == 1:
return self.groupings[0].groups
else:
to_groupby = zip(*(ping.grouper for ping in self.groupings))
to_groupby = zip(*(ping.grouping_vector for ping in self.groupings))
index = Index(to_groupby)
return self.axis.groupby(index)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def test_grouping_grouper(self, data_for_grouping):
gr1 = df.groupby("A").grouper.groupings[0]
gr2 = df.groupby("B").grouper.groupings[0]

tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)

@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ def test_grouping_grouper(self, data_for_grouping):
gr1 = df.groupby("A").grouper.groupings[0]
gr2 = df.groupby("B").grouper.groupings[0]

tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)

@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_agg_grouping_is_list_tuple(ts):
df = tm.makeTimeDataFrame()

grouped = df.groupby(lambda x: x.year)
grouper = grouped.grouper.groupings[0].grouper
grouper = grouped.grouper.groupings[0].grouping_vector
grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))

result = grouped.agg(np.mean)
Expand Down