3
3
split-apply-combine paradigm.
4
4
"""
5
5
6
- from typing import Tuple
6
+ from typing import Optional , Tuple
7
7
import warnings
8
8
9
9
import numpy as np
21
21
)
22
22
from pandas .core .dtypes .generic import ABCSeries
23
23
24
+ from pandas ._typing import FrameOrSeries
24
25
import pandas .core .algorithms as algorithms
25
26
from pandas .core .arrays import Categorical , ExtensionArray
26
27
import pandas .core .common as com
@@ -228,10 +229,10 @@ class Grouping:
228
229
----------
229
230
index : Index
230
231
grouper :
231
- obj :
232
+ obj Union[DataFrame, Series] :
232
233
name :
233
234
level :
234
- observed : boolean , default False
235
+ observed : bool , default False
235
236
If we are a Categorical, use the observed values
236
237
in_axis : if the Grouping is a column in self.obj and hence among
237
238
Groupby.exclusions list
@@ -240,25 +241,22 @@ class Grouping:
240
241
-------
241
242
**Attributes**:
242
243
* indices : dict of {group -> index_list}
243
- * labels : ndarray, group labels
244
- * ids : mapping of label -> group
245
- * counts : array of group counts
244
+ * codes : ndarray, group codes
246
245
* group_index : unique groups
247
246
* groups : dict of {group -> label_list}
248
247
"""
249
248
250
249
def __init__ (
251
250
self ,
252
- index ,
251
+ index : Index ,
253
252
grouper = None ,
254
- obj = None ,
253
+ obj : Optional [ FrameOrSeries ] = None ,
255
254
name = None ,
256
255
level = None ,
257
- sort = True ,
258
- observed = False ,
259
- in_axis = False ,
256
+ sort : bool = True ,
257
+ observed : bool = False ,
258
+ in_axis : bool = False ,
260
259
):
261
-
262
260
self .name = name
263
261
self .level = level
264
262
self .grouper = _convert_grouper (index , grouper )
@@ -290,12 +288,12 @@ def __init__(
290
288
if self .name is None :
291
289
self .name = index .names [level ]
292
290
293
- self .grouper , self ._labels , self ._group_index = index ._get_grouper_for_level ( # noqa: E501
291
+ self .grouper , self ._codes , self ._group_index = index ._get_grouper_for_level ( # noqa: E501
294
292
self .grouper , level
295
293
)
296
294
297
295
# a passed Grouper like, directly get the grouper in the same way
298
- # as single grouper groupby, use the group_info to get labels
296
+ # as single grouper groupby, use the group_info to get codes
299
297
elif isinstance (self .grouper , Grouper ):
300
298
# get the new grouper; we already have disambiguated
301
299
# what key/level refer to exactly, don't need to
@@ -308,7 +306,7 @@ def __init__(
308
306
self .grouper = grouper ._get_grouper ()
309
307
310
308
else :
311
- if self .grouper is None and self .name is not None :
309
+ if self .grouper is None and self .name is not None and self . obj is not None :
312
310
self .grouper = self .obj [self .name ]
313
311
314
312
elif isinstance (self .grouper , (list , tuple )):
@@ -324,7 +322,7 @@ def __init__(
324
322
325
323
# we make a CategoricalIndex out of the cat grouper
326
324
# preserving the categories / ordered attributes
327
- self ._labels = self .grouper .codes
325
+ self ._codes = self .grouper .codes
328
326
if observed :
329
327
codes = algorithms .unique1d (self .grouper .codes )
330
328
codes = codes [codes != - 1 ]
@@ -380,11 +378,11 @@ def __repr__(self):
380
378
def __iter__ (self ):
381
379
return iter (self .indices )
382
380
383
- _labels = None
384
- _group_index = None
381
+ _codes = None # type: np.ndarray
382
+ _group_index = None # type: Index
385
383
386
384
@property
387
- def ngroups (self ):
385
+ def ngroups (self ) -> int :
388
386
return len (self .group_index )
389
387
390
388
@cache_readonly
@@ -397,38 +395,38 @@ def indices(self):
397
395
return values ._reverse_indexer ()
398
396
399
397
@property
400
- def labels (self ):
401
- if self ._labels is None :
402
- self ._make_labels ()
403
- return self ._labels
398
+ def codes (self ) -> np . ndarray :
399
+ if self ._codes is None :
400
+ self ._make_codes ()
401
+ return self ._codes
404
402
405
403
@cache_readonly
406
- def result_index (self ):
404
+ def result_index (self ) -> Index :
407
405
if self .all_grouper is not None :
408
406
return recode_from_groupby (self .all_grouper , self .sort , self .group_index )
409
407
return self .group_index
410
408
411
409
@property
412
- def group_index (self ):
410
+ def group_index (self ) -> Index :
413
411
if self ._group_index is None :
414
- self ._make_labels ()
412
+ self ._make_codes ()
415
413
return self ._group_index
416
414
417
- def _make_labels (self ):
418
- if self ._labels is None or self ._group_index is None :
415
+ def _make_codes (self ) -> None :
416
+ if self ._codes is None or self ._group_index is None :
419
417
# we have a list of groupers
420
418
if isinstance (self .grouper , BaseGrouper ):
421
- labels = self .grouper .label_info
419
+ codes = self .grouper .codes_info
422
420
uniques = self .grouper .result_index
423
421
else :
424
- labels , uniques = algorithms .factorize (self .grouper , sort = self .sort )
422
+ codes , uniques = algorithms .factorize (self .grouper , sort = self .sort )
425
423
uniques = Index (uniques , name = self .name )
426
- self ._labels = labels
424
+ self ._codes = codes
427
425
self ._group_index = uniques
428
426
429
427
@cache_readonly
430
- def groups (self ):
431
- return self .index .groupby (Categorical .from_codes (self .labels , self .group_index ))
428
+ def groups (self ) -> dict :
429
+ return self .index .groupby (Categorical .from_codes (self .codes , self .group_index ))
432
430
433
431
434
432
def _get_grouper (
@@ -678,7 +676,7 @@ def _is_label_like(val):
678
676
return isinstance (val , (str , tuple )) or (val is not None and is_scalar (val ))
679
677
680
678
681
- def _convert_grouper (axis , grouper ):
679
+ def _convert_grouper (axis : Index , grouper ):
682
680
if isinstance (grouper , dict ):
683
681
return grouper .get
684
682
elif isinstance (grouper , Series ):
0 commit comments