Skip to content

Commit 1f92c47

Browse files
jbrockmendelyeshsurya
authored andcommitted
REF: hide ArrayManager implementation details from GroupBy (pandas-dev#41086)
1 parent 864060f commit 1f92c47

File tree

2 files changed

+9
-14
lines changed

2 files changed

+9
-14
lines changed

pandas/core/groupby/generic.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@
9797
all_indexes_same,
9898
)
9999
import pandas.core.indexes.base as ibase
100-
from pandas.core.internals import ArrayManager
101100
from pandas.core.series import Series
102101
from pandas.core.util.numba_ import maybe_use_numba
103102

@@ -1103,17 +1102,11 @@ def _cython_agg_general(
11031102
if numeric_only:
11041103
data = data.get_numeric_data(copy=False)
11051104

1106-
using_array_manager = isinstance(data, ArrayManager)
1107-
11081105
def cast_agg_result(result: ArrayLike, values: ArrayLike) -> ArrayLike:
11091106
# see if we can cast the values to the desired dtype
11101107
# this may not be the original dtype
11111108

1112-
if (
1113-
not using_array_manager
1114-
and isinstance(result.dtype, np.dtype)
1115-
and result.ndim == 1
1116-
):
1109+
if isinstance(result.dtype, np.dtype) and result.ndim == 1:
11171110
# We went through a SeriesGroupByPath and need to reshape
11181111
# GH#32223 includes case with IntegerArray values
11191112
# We only get here with values.dtype == object
@@ -1794,8 +1787,6 @@ def count(self) -> DataFrame:
17941787
ids, _, ngroups = self.grouper.group_info
17951788
mask = ids != -1
17961789

1797-
using_array_manager = isinstance(data, ArrayManager)
1798-
17991790
def hfunc(bvalues: ArrayLike) -> ArrayLike:
18001791
# TODO(2DEA): reshape would not be necessary with 2D EAs
18011792
if bvalues.ndim == 1:
@@ -1805,10 +1796,6 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
18051796
masked = mask & ~isna(bvalues)
18061797

18071798
counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
1808-
if using_array_manager:
1809-
# count_level_2d return (1, N) array for single column
1810-
# -> extract 1D array
1811-
counted = counted[0, :]
18121799
return counted
18131800

18141801
new_mgr = data.grouped_reduce(hfunc)

pandas/core/internals/array_manager.py

+8
Original file line numberDiff line numberDiff line change
@@ -932,12 +932,20 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
932932
result_indices: list[int] = []
933933

934934
for i, arr in enumerate(self.arrays):
935+
# grouped_reduce functions all expect 2D arrays
936+
arr = ensure_block_shape(arr, ndim=2)
935937
try:
936938
res = func(arr)
937939
except (TypeError, NotImplementedError):
938940
if not ignore_failures:
939941
raise
940942
continue
943+
944+
if res.ndim == 2:
945+
# reverse of ensure_block_shape
946+
assert res.shape[0] == 1
947+
res = res[0]
948+
941949
result_arrays.append(res)
942950
result_indices.append(i)
943951

0 commit comments

Comments
 (0)