Skip to content

Commit fa1c6eb

Browse files
jbrockmendelyeshsurya
authored andcommitted
REF: make libreduction behavior match _aggregate_series_pure_python (pandas-dev#41242)
1 parent ccaa262 commit fa1c6eb

File tree

3 files changed

+13
-20
lines changed

3 files changed

+13
-20
lines changed

pandas/_libs/reduction.pyx

+4-6
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ from pandas._libs.lib import (
2727
)
2828

2929

30-
cpdef check_result_array(object obj, Py_ssize_t cnt):
30+
cpdef check_result_array(object obj):
3131

3232
if (is_array(obj) or
33-
(isinstance(obj, list) and len(obj) == cnt) or
34-
getattr(obj, 'shape', None) == (cnt,)):
33+
(isinstance(obj, list) and len(obj) == 0) or
34+
getattr(obj, 'shape', None) == (0,)):
3535
raise ValueError('Must produce aggregated value')
3636

3737

@@ -89,9 +89,7 @@ cdef class _BaseGrouper:
8989
# On the first pass, we check the output shape to see
9090
# if this looks like a reduction.
9191
initialized = True
92-
# In all tests other than test_series_grouper and
93-
# test_series_bin_grouper, we have len(self.dummy_arr) == 0
94-
check_result_array(res, len(self.dummy_arr))
92+
check_result_array(res)
9593

9694
return res, initialized
9795

pandas/core/groupby/generic.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,8 @@ def _get_index() -> Index:
510510
return self._reindex_output(result)
511511

512512
def _aggregate_named(self, func, *args, **kwargs):
513+
# Note: this is very similar to _aggregate_series_pure_python,
514+
# but that does not pin group.name
513515
result = {}
514516
initialized = False
515517

@@ -522,7 +524,7 @@ def _aggregate_named(self, func, *args, **kwargs):
522524
output = libreduction.extract_result(output)
523525
if not initialized:
524526
# We only do this validation on the first iteration
525-
libreduction.check_result_array(output, 0)
527+
libreduction.check_result_array(output)
526528
initialized = True
527529
result[name] = output
528530

pandas/core/groupby/ops.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -985,15 +985,7 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]:
985985
# Preempt TypeError in _aggregate_series_fast
986986
return self._aggregate_series_pure_python(obj, func)
987987

988-
try:
989-
return self._aggregate_series_fast(obj, func)
990-
except ValueError as err:
991-
if "Must produce aggregated value" in str(err):
992-
# raised in libreduction
993-
pass
994-
else:
995-
raise
996-
return self._aggregate_series_pure_python(obj, func)
988+
return self._aggregate_series_fast(obj, func)
997989

998990
def _aggregate_series_fast(
999991
self, obj: Series, func: F
@@ -1023,9 +1015,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
10231015
result = np.empty(ngroups, dtype="O")
10241016
initialized = False
10251017

1018+
# equiv: splitter = self._get_splitter(obj, axis=0)
10261019
splitter = get_splitter(obj, group_index, ngroups, axis=0)
10271020

1028-
for label, group in enumerate(splitter):
1021+
for i, group in enumerate(splitter):
10291022

10301023
# Each step of this loop corresponds to
10311024
# libreduction._BaseGrouper._apply_to_group
@@ -1034,11 +1027,11 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
10341027

10351028
if not initialized:
10361029
# We only do this validation on the first iteration
1037-
libreduction.check_result_array(res, 0)
1030+
libreduction.check_result_array(res)
10381031
initialized = True
10391032

1040-
counts[label] = group.shape[0]
1041-
result[label] = res
1033+
counts[i] = group.shape[0]
1034+
result[i] = res
10421035

10431036
npvalues = lib.maybe_convert_objects(result, try_float=False)
10441037
out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)

0 commit comments

Comments
 (0)