Skip to content

Commit ffe5bfd

Browse files
topper-123Mateusz Górski
authored and
Mateusz Górski
committed
API: rename labels to codes in core/groupby (pandas-dev#29402)
1 parent 893a33b commit ffe5bfd

File tree

6 files changed

+92
-89
lines changed

6 files changed

+92
-89
lines changed

pandas/core/groupby/generic.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -655,16 +655,17 @@ def value_counts(
655655
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
656656

657657
# multi-index components
658-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
658+
codes = self.grouper.recons_codes
659+
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
659660
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
660661
names = self.grouper.names + [self._selection_name]
661662

662663
if dropna:
663-
mask = labels[-1] != -1
664+
mask = codes[-1] != -1
664665
if mask.all():
665666
dropna = False
666667
else:
667-
out, labels = out[mask], [label[mask] for label in labels]
668+
out, codes = out[mask], [level_codes[mask] for level_codes in codes]
668669

669670
if normalize:
670671
out = out.astype("float")
@@ -680,11 +681,11 @@ def value_counts(
680681
if sort and bins is None:
681682
cat = ids[inc][mask] if dropna else ids[inc]
682683
sorter = np.lexsort((out if ascending else -out, cat))
683-
out, labels[-1] = out[sorter], labels[-1][sorter]
684+
out, codes[-1] = out[sorter], codes[-1][sorter]
684685

685686
if bins is None:
686687
mi = MultiIndex(
687-
levels=levels, codes=labels, names=names, verify_integrity=False
688+
levels=levels, codes=codes, names=names, verify_integrity=False
688689
)
689690

690691
if is_integer_dtype(out):
@@ -694,14 +695,14 @@ def value_counts(
694695
# for compat. with libgroupby.value_counts need to ensure every
695696
# bin is present at every index level, null filled with zeros
696697
diff = np.zeros(len(out), dtype="bool")
697-
for lab in labels[:-1]:
698-
diff |= np.r_[True, lab[1:] != lab[:-1]]
698+
for level_codes in codes[:-1]:
699+
diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
699700

700701
ncat, nbin = diff.sum(), len(levels[-1])
701702

702703
left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
703704

704-
right = [diff.cumsum() - 1, labels[-1]]
705+
right = [diff.cumsum() - 1, codes[-1]]
705706

706707
_, idx = _get_join_indexers(left, right, sort=False, how="left")
707708
out = np.where(idx != -1, out[idx], 0)
@@ -711,7 +712,10 @@ def value_counts(
711712
out, left[-1] = out[sorter], left[-1][sorter]
712713

713714
# build the multi-index w/ full levels
714-
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
715+
def build_codes(lev_codes: np.ndarray) -> np.ndarray:
716+
return np.repeat(lev_codes[diff], nbin)
717+
718+
codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
715719
codes.append(left[-1])
716720

717721
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
@@ -758,7 +762,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
758762
)
759763
)
760764
filled = getattr(self, fill_method)(limit=limit)
761-
fill_grp = filled.groupby(self.grouper.labels)
765+
fill_grp = filled.groupby(self.grouper.codes)
762766
shifted = fill_grp.shift(periods=periods, freq=freq)
763767

764768
return (filled / shifted) - 1

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2349,7 +2349,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23492349
)
23502350
)
23512351
filled = getattr(self, fill_method)(limit=limit)
2352-
fill_grp = filled.groupby(self.grouper.labels)
2352+
fill_grp = filled.groupby(self.grouper.codes)
23532353
shifted = fill_grp.shift(periods=periods, freq=freq)
23542354
return (filled / shifted) - 1
23552355

pandas/core/groupby/grouper.py

+32-34
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
split-apply-combine paradigm.
44
"""
55

6-
from typing import Tuple
6+
from typing import Optional, Tuple
77
import warnings
88

99
import numpy as np
@@ -21,6 +21,7 @@
2121
)
2222
from pandas.core.dtypes.generic import ABCSeries
2323

24+
from pandas._typing import FrameOrSeries
2425
import pandas.core.algorithms as algorithms
2526
from pandas.core.arrays import Categorical, ExtensionArray
2627
import pandas.core.common as com
@@ -228,10 +229,10 @@ class Grouping:
228229
----------
229230
index : Index
230231
grouper :
231-
obj :
232+
obj Union[DataFrame, Series]:
232233
name :
233234
level :
234-
observed : boolean, default False
235+
observed : bool, default False
235236
If we are a Categorical, use the observed values
236237
in_axis : if the Grouping is a column in self.obj and hence among
237238
Groupby.exclusions list
@@ -240,25 +241,22 @@ class Grouping:
240241
-------
241242
**Attributes**:
242243
* indices : dict of {group -> index_list}
243-
* labels : ndarray, group labels
244-
* ids : mapping of label -> group
245-
* counts : array of group counts
244+
* codes : ndarray, group codes
246245
* group_index : unique groups
247246
* groups : dict of {group -> label_list}
248247
"""
249248

250249
def __init__(
251250
self,
252-
index,
251+
index: Index,
253252
grouper=None,
254-
obj=None,
253+
obj: Optional[FrameOrSeries] = None,
255254
name=None,
256255
level=None,
257-
sort=True,
258-
observed=False,
259-
in_axis=False,
256+
sort: bool = True,
257+
observed: bool = False,
258+
in_axis: bool = False,
260259
):
261-
262260
self.name = name
263261
self.level = level
264262
self.grouper = _convert_grouper(index, grouper)
@@ -290,12 +288,12 @@ def __init__(
290288
if self.name is None:
291289
self.name = index.names[level]
292290

293-
self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501
291+
self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501
294292
self.grouper, level
295293
)
296294

297295
# a passed Grouper like, directly get the grouper in the same way
298-
# as single grouper groupby, use the group_info to get labels
296+
# as single grouper groupby, use the group_info to get codes
299297
elif isinstance(self.grouper, Grouper):
300298
# get the new grouper; we already have disambiguated
301299
# what key/level refer to exactly, don't need to
@@ -308,7 +306,7 @@ def __init__(
308306
self.grouper = grouper._get_grouper()
309307

310308
else:
311-
if self.grouper is None and self.name is not None:
309+
if self.grouper is None and self.name is not None and self.obj is not None:
312310
self.grouper = self.obj[self.name]
313311

314312
elif isinstance(self.grouper, (list, tuple)):
@@ -324,7 +322,7 @@ def __init__(
324322

325323
# we make a CategoricalIndex out of the cat grouper
326324
# preserving the categories / ordered attributes
327-
self._labels = self.grouper.codes
325+
self._codes = self.grouper.codes
328326
if observed:
329327
codes = algorithms.unique1d(self.grouper.codes)
330328
codes = codes[codes != -1]
@@ -380,11 +378,11 @@ def __repr__(self):
380378
def __iter__(self):
381379
return iter(self.indices)
382380

383-
_labels = None
384-
_group_index = None
381+
_codes = None # type: np.ndarray
382+
_group_index = None # type: Index
385383

386384
@property
387-
def ngroups(self):
385+
def ngroups(self) -> int:
388386
return len(self.group_index)
389387

390388
@cache_readonly
@@ -397,38 +395,38 @@ def indices(self):
397395
return values._reverse_indexer()
398396

399397
@property
400-
def labels(self):
401-
if self._labels is None:
402-
self._make_labels()
403-
return self._labels
398+
def codes(self) -> np.ndarray:
399+
if self._codes is None:
400+
self._make_codes()
401+
return self._codes
404402

405403
@cache_readonly
406-
def result_index(self):
404+
def result_index(self) -> Index:
407405
if self.all_grouper is not None:
408406
return recode_from_groupby(self.all_grouper, self.sort, self.group_index)
409407
return self.group_index
410408

411409
@property
412-
def group_index(self):
410+
def group_index(self) -> Index:
413411
if self._group_index is None:
414-
self._make_labels()
412+
self._make_codes()
415413
return self._group_index
416414

417-
def _make_labels(self):
418-
if self._labels is None or self._group_index is None:
415+
def _make_codes(self) -> None:
416+
if self._codes is None or self._group_index is None:
419417
# we have a list of groupers
420418
if isinstance(self.grouper, BaseGrouper):
421-
labels = self.grouper.label_info
419+
codes = self.grouper.codes_info
422420
uniques = self.grouper.result_index
423421
else:
424-
labels, uniques = algorithms.factorize(self.grouper, sort=self.sort)
422+
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
425423
uniques = Index(uniques, name=self.name)
426-
self._labels = labels
424+
self._codes = codes
427425
self._group_index = uniques
428426

429427
@cache_readonly
430-
def groups(self):
431-
return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
428+
def groups(self) -> dict:
429+
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
432430

433431

434432
def _get_grouper(
@@ -678,7 +676,7 @@ def _is_label_like(val):
678676
return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))
679677

680678

681-
def _convert_grouper(axis, grouper):
679+
def _convert_grouper(axis: Index, grouper):
682680
if isinstance(grouper, dict):
683681
return grouper.get
684682
elif isinstance(grouper, Series):

0 commit comments

Comments
 (0)