Skip to content

Commit 0fde208

Browse files
authored
REF: implement Block.reduce for DataFrame._reduce (#35867)
1 parent a3e94de commit 0fde208

File tree

3 files changed

+27
-27
lines changed

3 files changed

+27
-27
lines changed

pandas/core/frame.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -8647,13 +8647,11 @@ def blk_func(values):
86478647
return op(values, axis=1, skipna=skipna, **kwds)
86488648

86498649
# After possibly _get_data and transposing, we are now in the
8650-
# simple case where we can use BlockManager._reduce
8650+
# simple case where we can use BlockManager.reduce
86518651
res = df._mgr.reduce(blk_func)
8652-
assert isinstance(res, dict)
8653-
if len(res):
8654-
assert len(res) == max(list(res.keys())) + 1, res.keys()
8655-
out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
8656-
out.index = df.columns
8652+
out = df._constructor(res,).iloc[0].rename(None)
8653+
if out_dtype is not None:
8654+
out = out.astype(out_dtype)
86578655
if axis == 0 and is_object_dtype(out.dtype):
86588656
out[:] = coerce_to_dtypes(out.values, df.dtypes)
86598657
return out

pandas/core/internals/blocks.py

+15
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,21 @@ def apply(self, func, **kwargs) -> List["Block"]:
346346

347347
return self._split_op_result(result)
348348

349+
def reduce(self, func) -> List["Block"]:
350+
# We will apply the function and reshape the result into a single-row
351+
# Block with the same mgr_locs; squeezing will be done at a higher level
352+
assert self.ndim == 2
353+
354+
result = func(self.values)
355+
if np.ndim(result) == 0:
356+
# TODO(EA2D): special case not needed with 2D EAs
357+
res_values = np.array([[result]])
358+
else:
359+
res_values = result.reshape(-1, 1)
360+
361+
nb = self.make_block(res_values)
362+
return [nb]
363+
349364
def _split_op_result(self, result) -> List["Block"]:
350365
# See also: split_and_operate
351366
if is_extension_array_dtype(result) and result.ndim > 1:

pandas/core/internals/managers.py

+8-21
Original file line numberDiff line numberDiff line change
@@ -330,31 +330,18 @@ def _verify_integrity(self) -> None:
330330
f"tot_items: {tot_items}"
331331
)
332332

333-
def reduce(self, func):
333+
def reduce(self: T, func) -> T:
334334
# If 2D, we assume that we're operating column-wise
335-
if self.ndim == 1:
336-
# we'll be returning a scalar
337-
blk = self.blocks[0]
338-
return func(blk.values)
335+
assert self.ndim == 2
339336

340-
res = {}
337+
res_blocks = []
341338
for blk in self.blocks:
342-
bres = func(blk.values)
343-
344-
if np.ndim(bres) == 0:
345-
# EA
346-
assert blk.shape[0] == 1
347-
new_res = zip(blk.mgr_locs.as_array, [bres])
348-
else:
349-
assert bres.ndim == 1, bres.shape
350-
assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
351-
new_res = zip(blk.mgr_locs.as_array, bres)
352-
353-
nr = dict(new_res)
354-
assert not any(key in res for key in nr)
355-
res.update(nr)
339+
nbs = blk.reduce(func)
340+
res_blocks.extend(nbs)
356341

357-
return res
342+
index = Index([0]) # placeholder
343+
new_mgr = BlockManager.from_blocks(res_blocks, [self.items, index])
344+
return new_mgr
358345

359346
def operate_blockwise(self, other: "BlockManager", array_op) -> "BlockManager":
360347
"""

0 commit comments

Comments
 (0)