Skip to content

Commit d0f4646

Browse files
jbrockmendelAlexeyGy
authored andcommitted
REF: Groupby._get_cythonized_result operate blockwise in axis==1 case (#43435)
1 parent d2f80f0 commit d0f4646

File tree

1 file changed

+42
-39
lines changed

1 file changed

+42
-39
lines changed

pandas/core/groupby/groupby.py

+42-39
Original file line numberDiff line numberDiff line change
@@ -3077,7 +3077,6 @@ def _get_cythonized_result(
30773077
grouper = self.grouper
30783078

30793079
ids, _, ngroups = grouper.group_info
3080-
output: dict[base.OutputKey, ArrayLike] = {}
30813080

30823081
how = base_func.__name__
30833082
base_func = partial(base_func, labels=ids)
@@ -3133,48 +3132,52 @@ def blk_func(values: ArrayLike) -> ArrayLike:
31333132
return result.T
31343133

31353134
obj = self._obj_with_exclusions
3136-
if obj.ndim == 2 and self.axis == 0:
3137-
# Operate block-wise instead of column-by-column
3138-
mgr = obj._mgr
3139-
if numeric_only:
3140-
mgr = mgr.get_numeric_data()
3141-
3142-
# setting ignore_failures=False for troubleshooting
3143-
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=False)
3144-
output = type(obj)(res_mgr)
3145-
return self._wrap_aggregated_output(output)
3146-
3147-
error_msg = ""
3148-
for idx, obj in enumerate(self._iterate_slices()):
3149-
values = obj._values
3150-
3151-
if numeric_only and not is_numeric_dtype(values.dtype):
3152-
continue
31533135

3154-
try:
3155-
result = blk_func(values)
3156-
except TypeError as err:
3157-
error_msg = str(err)
3158-
howstr = how.replace("group_", "")
3159-
warnings.warn(
3160-
"Dropping invalid columns in "
3161-
f"{type(self).__name__}.{howstr} is deprecated. "
3162-
"In a future version, a TypeError will be raised. "
3163-
f"Before calling .{howstr}, select only columns which "
3164-
"should be valid for the function.",
3165-
FutureWarning,
3166-
stacklevel=find_stack_level(),
3167-
)
3168-
continue
3136+
# Operate block-wise instead of column-by-column
3137+
orig_ndim = obj.ndim
3138+
if orig_ndim == 1:
3139+
# Operate on DataFrame, then squeeze below
3140+
obj = obj.to_frame()
31693141

3170-
key = base.OutputKey(label=obj.name, position=idx)
3171-
output[key] = result
3142+
mgr = obj._mgr
3143+
if self.axis == 1:
3144+
mgr = obj.T._mgr
31723145

3173-
# error_msg is "" on an frame/series with no rows or columns
3174-
if not output and error_msg != "":
3175-
raise TypeError(error_msg)
3146+
if numeric_only:
3147+
mgr = mgr.get_numeric_data()
31763148

3177-
return self._wrap_aggregated_output(output)
3149+
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
3150+
if len(res_mgr.items) != len(mgr.items):
3151+
howstr = how.replace("group_", "")
3152+
warnings.warn(
3153+
"Dropping invalid columns in "
3154+
f"{type(self).__name__}.{howstr} is deprecated. "
3155+
"In a future version, a TypeError will be raised. "
3156+
f"Before calling .{howstr}, select only columns which "
3157+
"should be valid for the function.",
3158+
FutureWarning,
3159+
stacklevel=3,
3160+
)
3161+
if len(res_mgr.items) == 0:
3162+
# We re-call grouped_reduce to get the right exception message
3163+
try:
3164+
mgr.grouped_reduce(blk_func, ignore_failures=False)
3165+
except Exception as err:
3166+
error_msg = str(err)
3167+
raise TypeError(error_msg)
3168+
# We should never get here
3169+
raise TypeError("All columns were dropped in grouped_reduce")
3170+
3171+
out = type(obj)(res_mgr)
3172+
3173+
if orig_ndim == 1:
3174+
assert out.ndim == 2
3175+
assert out.shape[1] == 1
3176+
out = out.iloc[:, 0]
3177+
# restore name=None in case to_frame set columns to [0]
3178+
out.name = self.obj.name
3179+
3180+
return self._wrap_aggregated_output(out)
31783181

31793182
@final
31803183
@Substitution(name="groupby")

0 commit comments

Comments
 (0)