Skip to content

Commit 77ea98e

Browse files
authored
PERF: avoid doing check at each step in loop (pandas-dev#40780)
1 parent 17c7000 commit 77ea98e

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

pandas/core/groupby/ops.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ def get_iterator(
336336
"""
337337
splitter = self._get_splitter(data, axis=axis)
338338
keys = self._get_group_keys()
339-
for key, (i, group) in zip(keys, splitter):
339+
for key, group in zip(keys, splitter):
340340
yield key, group.__finalize__(data, method="groupby")
341341

342342
@final
@@ -411,21 +411,27 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
411411
if len(result_values) == len(group_keys):
412412
return group_keys, result_values, mutated
413413

414-
for key, (i, group) in zip(group_keys, splitter):
415-
object.__setattr__(group, "name", key)
416-
414+
if result_values is None:
417415
# result_values is None if fast apply path wasn't taken
418416
# or fast apply aborted with an unexpected exception.
419417
# In either case, initialize the result list and perform
420418
# the slow iteration.
421-
if result_values is None:
422-
result_values = []
423-
419+
result_values = []
420+
skip_first = False
421+
else:
424422
# If result_values is not None we're in the case that the
425423
# fast apply loop was broken prematurely but we have
426424
# already the result for the first group which we can reuse.
427-
elif i == 0:
428-
continue
425+
skip_first = True
426+
427+
# This calls DataSplitter.__iter__
428+
zipped = zip(group_keys, splitter)
429+
if skip_first:
430+
# pop the first item from the front of the iterator
431+
next(zipped)
432+
433+
for key, group in zipped:
434+
object.__setattr__(group, "name", key)
429435

430436
# group might be modified
431437
group_axes = group.axes
@@ -779,7 +785,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
779785

780786
splitter = get_splitter(obj, group_index, ngroups, axis=0)
781787

782-
for label, group in splitter:
788+
for label, group in enumerate(splitter):
783789

784790
# Each step of this loop corresponds to
785791
# libreduction._BaseGrouper._apply_to_group
@@ -1012,8 +1018,8 @@ def __iter__(self):
10121018

10131019
starts, ends = lib.generate_slices(self.slabels, self.ngroups)
10141020

1015-
for i, (start, end) in enumerate(zip(starts, ends)):
1016-
yield i, self._chop(sdata, slice(start, end))
1021+
for start, end in zip(starts, ends):
1022+
yield self._chop(sdata, slice(start, end))
10171023

10181024
@cache_readonly
10191025
def sorted_data(self) -> FrameOrSeries:

0 commit comments

Comments
 (0)