Skip to content

REF: privatize Grouping attrs #41534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ class Grouping:

_codes: np.ndarray | None = None
_group_index: Index | None = None
_passed_categorical: bool
_all_grouper: Categorical | None
_index: Index

def __init__(
self,
Expand All @@ -456,13 +459,13 @@ def __init__(
self.level = level
self._orig_grouper = grouper
self.grouping_vector = _convert_grouper(index, grouper)
self.all_grouper = None
self.index = index
self.sort = sort
self._all_grouper = None
self._index = index
self._sort = sort
self.obj = obj
self.observed = observed
self._observed = observed
self.in_axis = in_axis
self.dropna = dropna
self._dropna = dropna

self._passed_categorical = False

Expand All @@ -471,11 +474,15 @@ def __init__(

ilevel = self._ilevel
if ilevel is not None:
mapper = self.grouping_vector
# In extant tests, the new self.grouping_vector matches
# `index.get_level_values(ilevel)` whenever
# mapper is None and isinstance(index, MultiIndex)
(
self.grouping_vector, # Index
self._codes,
self._group_index,
) = index._get_grouper_for_level(self.grouping_vector, ilevel)
) = index._get_grouper_for_level(mapper, ilevel)

# a passed Grouper like, directly get the grouper in the same way
# as single grouper groupby, use the group_info to get codes
Expand Down Expand Up @@ -505,8 +512,8 @@ def __init__(
# a passed Categorical
self._passed_categorical = True

self.grouping_vector, self.all_grouper = recode_for_groupby(
self.grouping_vector, self.sort, observed
self.grouping_vector, self._all_grouper = recode_for_groupby(
self.grouping_vector, sort, observed
)

elif not isinstance(
Expand All @@ -517,11 +524,11 @@ def __init__(
t = self.name or str(type(self.grouping_vector))
raise ValueError(f"Grouper for '{t}' not 1-dimensional")

self.grouping_vector = self.index.map(self.grouping_vector)
self.grouping_vector = index.map(self.grouping_vector)

if not (
hasattr(self.grouping_vector, "__len__")
and len(self.grouping_vector) == len(self.index)
and len(self.grouping_vector) == len(index)
):
grper = pprint_thing(self.grouping_vector)
errmsg = (
Expand All @@ -546,7 +553,7 @@ def __iter__(self):
def name(self) -> Hashable:
ilevel = self._ilevel
if ilevel is not None:
return self.index.names[ilevel]
return self._index.names[ilevel]

if isinstance(self._orig_grouper, (Index, Series)):
return self._orig_grouper.name
Expand All @@ -569,7 +576,7 @@ def _ilevel(self) -> int | None:
if level is None:
return None
if not isinstance(level, int):
index = self.index
index = self._index
if level not in index.names:
raise AssertionError(f"Level {level} not in index")
return index.names.index(level)
Expand Down Expand Up @@ -607,10 +614,10 @@ def group_arraylike(self) -> ArrayLike:
@cache_readonly
def result_index(self) -> Index:
# TODO: what's the difference between result_index vs group_index?
if self.all_grouper is not None:
if self._all_grouper is not None:
group_idx = self.group_index
assert isinstance(group_idx, CategoricalIndex)
return recode_from_groupby(self.all_grouper, self.sort, group_idx)
return recode_from_groupby(self._all_grouper, self._sort, group_idx)
return self.group_index

@cache_readonly
Expand All @@ -629,10 +636,10 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
cat = self.grouping_vector
categories = cat.categories

if self.observed:
if self._observed:
ucodes = algorithms.unique1d(cat.codes)
ucodes = ucodes[ucodes != -1]
if self.sort or cat.ordered:
if self._sort or cat.ordered:
ucodes = np.sort(ucodes)
else:
ucodes = np.arange(len(categories))
Expand All @@ -648,18 +655,18 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
uniques = self.grouping_vector.result_arraylike
else:
# GH35667, replace dropna=False with na_sentinel=None
if not self.dropna:
if not self._dropna:
na_sentinel = None
else:
na_sentinel = -1
codes, uniques = algorithms.factorize(
self.grouping_vector, sort=self.sort, na_sentinel=na_sentinel
self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
)
return codes, uniques

@cache_readonly
def groups(self) -> dict[Hashable, np.ndarray]:
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
return self._index.groupby(Categorical.from_codes(self.codes, self.group_index))


def get_grouper(
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def __init__(

self.axis = axis
self._groupings: list[grouper.Grouping] = list(groupings)
self.sort = sort
self._sort = sort
self.group_keys = group_keys
self.mutated = mutated
self.indexer = indexer
Expand Down Expand Up @@ -891,7 +891,7 @@ def codes_info(self) -> np.ndarray:
def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
if len(self.groupings) > 1:
group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
return compress_group_index(group_index, sort=self.sort)
return compress_group_index(group_index, sort=self._sort)

ping = self.groupings[0]
return ping.codes, np.arange(len(ping.group_index))
Expand Down