Skip to content

Commit 3c038e0

Browse files
jbrockmendelJulianWgs
authored andcommitted
CLN: standardize naming in core.groupby (pandas-dev#41247)
1 parent 1ac3aad commit 3c038e0

File tree

5 files changed

+50
-53
lines changed

5 files changed

+50
-53
lines changed

pandas/core/groupby/categorical.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
from typing import (
2-
Optional,
3-
Tuple,
4-
)
1+
from __future__ import annotations
52

63
import numpy as np
74

@@ -16,7 +13,7 @@
1613

1714
def recode_for_groupby(
1815
c: Categorical, sort: bool, observed: bool
19-
) -> Tuple[Categorical, Optional[Categorical]]:
16+
) -> tuple[Categorical, Categorical | None]:
2017
"""
2118
Code the categories to ensure we can groupby for categoricals.
2219

pandas/core/groupby/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
366366
)
367367
except NotImplementedError:
368368
ser = Series(values) # equiv 'obj' from outer frame
369-
if self.grouper.ngroups > 0:
369+
if self.ngroups > 0:
370370
res_values, _ = self.grouper.agg_series(ser, alt)
371371
else:
372372
# equiv: res_values = self._python_agg_general(alt)
@@ -604,12 +604,12 @@ def _transform_fast(self, result) -> Series:
604604
fast version of transform, only applicable to
605605
builtin/cythonizable functions
606606
"""
607-
ids, _, ngroup = self.grouper.group_info
607+
ids, _, _ = self.grouper.group_info
608608
result = result.reindex(self.grouper.result_index, copy=False)
609609
out = algorithms.take_nd(result._values, ids)
610610
return self.obj._constructor(out, index=self.obj.index, name=self.obj.name)
611611

612-
def filter(self, func, dropna=True, *args, **kwargs):
612+
def filter(self, func, dropna: bool = True, *args, **kwargs):
613613
"""
614614
Return a copy of a Series excluding elements from groups that
615615
do not satisfy the boolean criterion specified by func.
@@ -1445,7 +1445,7 @@ def _transform_fast(self, result: DataFrame) -> DataFrame:
14451445
obj = self._obj_with_exclusions
14461446

14471447
# for each col, reshape to size of original frame by take operation
1448-
ids, _, ngroup = self.grouper.group_info
1448+
ids, _, _ = self.grouper.group_info
14491449
result = result.reindex(self.grouper.result_index, copy=False)
14501450
output = [
14511451
algorithms.take_nd(result.iloc[:, i].values, ids)

pandas/core/groupby/groupby.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -1108,13 +1108,13 @@ def _numba_prep(self, func, data):
11081108
raise NotImplementedError(
11091109
"Numba engine can only be used with a single function."
11101110
)
1111-
labels, _, n_groups = self.grouper.group_info
1112-
sorted_index = get_group_index_sorter(labels, n_groups)
1113-
sorted_labels = algorithms.take_nd(labels, sorted_index, allow_fill=False)
1111+
ids, _, ngroups = self.grouper.group_info
1112+
sorted_index = get_group_index_sorter(ids, ngroups)
1113+
sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False)
11141114

11151115
sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
11161116

1117-
starts, ends = lib.generate_slices(sorted_labels, n_groups)
1117+
starts, ends = lib.generate_slices(sorted_ids, ngroups)
11181118
return starts, ends, sorted_index, sorted_data
11191119

11201120
@final
@@ -1258,11 +1258,12 @@ def _python_agg_general(self, func, *args, **kwargs):
12581258
# iterate through "columns" ex exclusions to populate output dict
12591259
output: dict[base.OutputKey, ArrayLike] = {}
12601260

1261+
if self.ngroups == 0:
1262+
# agg_series below assumes ngroups > 0
1263+
return self._python_apply_general(f, self._selected_obj)
1264+
12611265
for idx, obj in enumerate(self._iterate_slices()):
12621266
name = obj.name
1263-
if self.grouper.ngroups == 0:
1264-
# agg_series below assumes ngroups > 0
1265-
continue
12661267

12671268
try:
12681269
# if this function is invalid for this dtype, we will ignore it.
@@ -1373,7 +1374,7 @@ def _apply_filter(self, indices, dropna):
13731374
return filtered
13741375

13751376
@final
1376-
def _cumcount_array(self, ascending: bool = True):
1377+
def _cumcount_array(self, ascending: bool = True) -> np.ndarray:
13771378
"""
13781379
Parameters
13791380
----------
@@ -2726,7 +2727,7 @@ def _get_cythonized_result(
27262727

27272728
grouper = self.grouper
27282729

2729-
labels, _, ngroups = grouper.group_info
2730+
ids, _, ngroups = grouper.group_info
27302731
output: dict[base.OutputKey, np.ndarray] = {}
27312732
base_func = getattr(libgroupby, how)
27322733

@@ -2759,15 +2760,15 @@ def _get_cythonized_result(
27592760
if pre_processing:
27602761
try:
27612762
vals, inferences = pre_processing(vals)
2762-
except TypeError as e:
2763-
error_msg = str(e)
2763+
except TypeError as err:
2764+
error_msg = str(err)
27642765
continue
27652766
vals = vals.astype(cython_dtype, copy=False)
27662767
if needs_2d:
27672768
vals = vals.reshape((-1, 1))
27682769
func = partial(func, vals)
27692770

2770-
func = partial(func, labels)
2771+
func = partial(func, ids)
27712772

27722773
if min_count is not None:
27732774
func = partial(func, min_count)

pandas/core/groupby/numba_.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
"""Common utilities for Numba operations with groupby ops"""
2+
from __future__ import annotations
3+
24
import inspect
35
from typing import (
46
Any,
57
Callable,
6-
Dict,
7-
Optional,
8-
Tuple,
98
)
109

1110
import numpy as np
@@ -57,10 +56,10 @@ def f(values, index, ...):
5756

5857

5958
def generate_numba_agg_func(
60-
args: Tuple,
61-
kwargs: Dict[str, Any],
59+
args: tuple,
60+
kwargs: dict[str, Any],
6261
func: Callable[..., Scalar],
63-
engine_kwargs: Optional[Dict[str, bool]],
62+
engine_kwargs: dict[str, bool] | None,
6463
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int], np.ndarray]:
6564
"""
6665
Generate a numba jitted agg function specified by values from engine_kwargs.
@@ -117,10 +116,10 @@ def group_agg(
117116

118117

119118
def generate_numba_transform_func(
120-
args: Tuple,
121-
kwargs: Dict[str, Any],
119+
args: tuple,
120+
kwargs: dict[str, Any],
122121
func: Callable[..., np.ndarray],
123-
engine_kwargs: Optional[Dict[str, bool]],
122+
engine_kwargs: dict[str, bool] | None,
124123
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int], np.ndarray]:
125124
"""
126125
Generate a numba jitted transform function specified by values from engine_kwargs.

pandas/core/groupby/ops.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,8 @@ def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> DataSplitter:
723723
724724
__finalize__ has not been called for the subsetted objects returned.
725725
"""
726-
comp_ids, _, ngroups = self.group_info
727-
return get_splitter(data, comp_ids, ngroups, axis=axis)
726+
ids, _, ngroups = self.group_info
727+
return get_splitter(data, ids, ngroups, axis=axis)
728728

729729
def _get_grouper(self):
730730
"""
@@ -740,10 +740,10 @@ def _get_group_keys(self):
740740
if len(self.groupings) == 1:
741741
return self.levels[0]
742742
else:
743-
comp_ids, _, ngroups = self.group_info
743+
ids, _, ngroups = self.group_info
744744

745745
# provide "flattened" iterator for multi-group setting
746-
return get_flattened_list(comp_ids, ngroups, self.levels, self.codes)
746+
return get_flattened_list(ids, ngroups, self.levels, self.codes)
747747

748748
@final
749749
def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
@@ -846,9 +846,9 @@ def size(self) -> Series:
846846
"""
847847
Compute group sizes.
848848
"""
849-
ids, _, ngroup = self.group_info
850-
if ngroup:
851-
out = np.bincount(ids[ids != -1], minlength=ngroup)
849+
ids, _, ngroups = self.group_info
850+
if ngroups:
851+
out = np.bincount(ids[ids != -1], minlength=ngroups)
852852
else:
853853
out = []
854854
return Series(out, index=self.result_index, dtype="int64")
@@ -882,11 +882,11 @@ def group_info(self):
882882
@cache_readonly
883883
def codes_info(self) -> np.ndarray:
884884
# return the codes of items in original grouped axis
885-
codes, _, _ = self.group_info
885+
ids, _, _ = self.group_info
886886
if self.indexer is not None:
887-
sorter = np.lexsort((codes, self.indexer))
888-
codes = codes[sorter]
889-
return codes
887+
sorter = np.lexsort((ids, self.indexer))
888+
ids = ids[sorter]
889+
return ids
890890

891891
@final
892892
def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]:
@@ -906,8 +906,8 @@ def ngroups(self) -> int:
906906
@property
907907
def reconstructed_codes(self) -> list[np.ndarray]:
908908
codes = self.codes
909-
comp_ids, obs_ids, _ = self.group_info
910-
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)
909+
ids, obs_ids, _ = self.group_info
910+
return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)
911911

912912
@cache_readonly
913913
def result_index(self) -> Index:
@@ -954,13 +954,13 @@ def _cython_operation(
954954

955955
cy_op = WrappedCythonOp(kind=kind, how=how)
956956

957-
comp_ids, _, _ = self.group_info
957+
ids, _, _ = self.group_info
958958
ngroups = self.ngroups
959959
return cy_op.cython_operation(
960960
values=values,
961961
axis=axis,
962962
min_count=min_count,
963-
comp_ids=comp_ids,
963+
comp_ids=ids,
964964
ngroups=ngroups,
965965
**kwargs,
966966
)
@@ -997,26 +997,26 @@ def _aggregate_series_fast(
997997
# - ngroups != 0
998998
func = com.is_builtin_func(func)
999999

1000-
group_index, _, ngroups = self.group_info
1000+
ids, _, ngroups = self.group_info
10011001

10021002
# avoids object / Series creation overhead
1003-
indexer = get_group_index_sorter(group_index, ngroups)
1003+
indexer = get_group_index_sorter(ids, ngroups)
10041004
obj = obj.take(indexer)
1005-
group_index = group_index.take(indexer)
1006-
sgrouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups)
1005+
ids = ids.take(indexer)
1006+
sgrouper = libreduction.SeriesGrouper(obj, func, ids, ngroups)
10071007
result, counts = sgrouper.get_result()
10081008
return result, counts
10091009

10101010
@final
10111011
def _aggregate_series_pure_python(self, obj: Series, func: F):
1012-
group_index, _, ngroups = self.group_info
1012+
ids, _, ngroups = self.group_info
10131013

10141014
counts = np.zeros(ngroups, dtype=int)
10151015
result = np.empty(ngroups, dtype="O")
10161016
initialized = False
10171017

10181018
# equiv: splitter = self._get_splitter(obj, axis=0)
1019-
splitter = get_splitter(obj, group_index, ngroups, axis=0)
1019+
splitter = get_splitter(obj, ids, ngroups, axis=0)
10201020

10211021
for i, group in enumerate(splitter):
10221022

@@ -1152,7 +1152,7 @@ def indices(self):
11521152
@cache_readonly
11531153
def group_info(self):
11541154
ngroups = self.ngroups
1155-
obs_group_ids = np.arange(ngroups)
1155+
obs_group_ids = np.arange(ngroups, dtype=np.int64)
11561156
rep = np.diff(np.r_[0, self.bins])
11571157

11581158
rep = ensure_platform_int(rep)
@@ -1163,7 +1163,7 @@ def group_info(self):
11631163

11641164
return (
11651165
ensure_platform_int(comp_ids),
1166-
obs_group_ids.astype("int64", copy=False),
1166+
obs_group_ids,
11671167
ngroups,
11681168
)
11691169

0 commit comments

Comments
 (0)