Skip to content

Commit 7d257c6

Browse files
authored
REF: ignore_failures in BlockManager.reduce (#35881)
1 parent d26a630 commit 7d257c6

File tree

3 files changed

+92
-12
lines changed

3 files changed

+92
-12
lines changed

pandas/core/frame.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -8630,6 +8630,7 @@ def _reduce(
86308630
cols = self.columns[~dtype_is_dt]
86318631
self = self[cols]
86328632

8633+
any_object = self.dtypes.apply(is_object_dtype).any()
86338634
# TODO: Make other agg func handle axis=None properly GH#21597
86348635
axis = self._get_axis_number(axis)
86358636
labels = self._get_agg_axis(axis)
@@ -8656,22 +8657,36 @@ def _get_data() -> DataFrame:
86568657
data = self._get_bool_data()
86578658
return data
86588659

8659-
if numeric_only is not None:
8660+
if numeric_only is not None or (
8661+
numeric_only is None
8662+
and axis == 0
8663+
and not any_object
8664+
and not self._mgr.any_extension_types
8665+
):
8666+
# For numeric_only non-None and axis non-None, we know
8667+
# which blocks to use and no try/except is needed.
8668+
# For numeric_only=None only the case with axis==0 and no object
8669+
# dtypes are unambiguous can be handled with BlockManager.reduce
8670+
# Case with EAs see GH#35881
86608671
df = self
86618672
if numeric_only is True:
86628673
df = _get_data()
86638674
if axis == 1:
86648675
df = df.T
86658676
axis = 0
86668677

8678+
ignore_failures = numeric_only is None
8679+
86678680
# After possibly _get_data and transposing, we are now in the
86688681
# simple case where we can use BlockManager.reduce
8669-
res = df._mgr.reduce(blk_func)
8670-
out = df._constructor(res).iloc[0].rename(None)
8682+
res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
8683+
out = df._constructor(res).iloc[0]
86718684
if out_dtype is not None:
86728685
out = out.astype(out_dtype)
86738686
if axis == 0 and is_object_dtype(out.dtype):
8674-
out[:] = coerce_to_dtypes(out.values, df.dtypes)
8687+
# GH#35865 careful to cast explicitly to object
8688+
nvs = coerce_to_dtypes(out.values, df.dtypes.iloc[np.sort(indexer)])
8689+
out[:] = np.array(nvs, dtype=object)
86758690
return out
86768691

86778692
assert numeric_only is None

pandas/core/internals/blocks.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -365,12 +365,18 @@ def apply(self, func, **kwargs) -> List["Block"]:
365365

366366
return self._split_op_result(result)
367367

368-
def reduce(self, func) -> List["Block"]:
368+
def reduce(self, func, ignore_failures: bool = False) -> List["Block"]:
369369
# We will apply the function and reshape the result into a single-row
370370
# Block with the same mgr_locs; squeezing will be done at a higher level
371371
assert self.ndim == 2
372372

373-
result = func(self.values)
373+
try:
374+
result = func(self.values)
375+
except (TypeError, NotImplementedError):
376+
if ignore_failures:
377+
return []
378+
raise
379+
374380
if np.ndim(result) == 0:
375381
# TODO(EA2D): special case not needed with 2D EAs
376382
res_values = np.array([[result]])
@@ -2427,6 +2433,34 @@ def is_bool(self):
24272433
"""
24282434
return lib.is_bool_array(self.values.ravel("K"))
24292435

2436+
def reduce(self, func, ignore_failures: bool = False) -> List[Block]:
2437+
"""
2438+
For object-dtype, we operate column-wise.
2439+
"""
2440+
assert self.ndim == 2
2441+
2442+
values = self.values
2443+
if len(values) > 1:
2444+
# split_and_operate expects func with signature (mask, values, inplace)
2445+
def mask_func(mask, values, inplace):
2446+
if values.ndim == 1:
2447+
values = values.reshape(1, -1)
2448+
return func(values)
2449+
2450+
return self.split_and_operate(None, mask_func, False)
2451+
2452+
try:
2453+
res = func(values)
2454+
except TypeError:
2455+
if not ignore_failures:
2456+
raise
2457+
return []
2458+
2459+
assert isinstance(res, np.ndarray)
2460+
assert res.ndim == 1
2461+
res = res.reshape(1, -1)
2462+
return [self.make_block_same_class(res)]
2463+
24302464
def convert(
24312465
self,
24322466
copy: bool = True,

pandas/core/internals/managers.py

+37-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import itertools
33
from typing import (
44
Any,
5+
Callable,
56
DefaultDict,
67
Dict,
78
List,
@@ -324,18 +325,44 @@ def _verify_integrity(self) -> None:
324325
f"tot_items: {tot_items}"
325326
)
326327

327-
def reduce(self: T, func) -> T:
328+
def reduce(
329+
self: T, func: Callable, ignore_failures: bool = False
330+
) -> Tuple[T, np.ndarray]:
331+
"""
332+
Apply reduction function blockwise, returning a single-row BlockManager.
333+
334+
Parameters
335+
----------
336+
func : reduction function
337+
ignore_failures : bool, default False
338+
Whether to drop blocks where func raises TypeError.
339+
340+
Returns
341+
-------
342+
BlockManager
343+
np.ndarray
344+
Indexer of mgr_locs that are retained.
345+
"""
328346
# If 2D, we assume that we're operating column-wise
329347
assert self.ndim == 2
330348

331349
res_blocks: List[Block] = []
332350
for blk in self.blocks:
333-
nbs = blk.reduce(func)
351+
nbs = blk.reduce(func, ignore_failures)
334352
res_blocks.extend(nbs)
335353

336-
index = Index([0]) # placeholder
337-
new_mgr = BlockManager.from_blocks(res_blocks, [self.items, index])
338-
return new_mgr
354+
index = Index([None]) # placeholder
355+
if ignore_failures:
356+
if res_blocks:
357+
indexer = np.concatenate([blk.mgr_locs.as_array for blk in res_blocks])
358+
new_mgr = self._combine(res_blocks, copy=False, index=index)
359+
else:
360+
indexer = []
361+
new_mgr = type(self).from_blocks([], [Index([]), index])
362+
else:
363+
indexer = np.arange(self.shape[0])
364+
new_mgr = type(self).from_blocks(res_blocks, [self.items, index])
365+
return new_mgr, indexer
339366

340367
def operate_blockwise(self, other: "BlockManager", array_op) -> "BlockManager":
341368
"""
@@ -698,7 +725,9 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager":
698725
"""
699726
return self._combine([b for b in self.blocks if b.is_numeric], copy)
700727

701-
def _combine(self: T, blocks: List[Block], copy: bool = True) -> T:
728+
def _combine(
729+
self: T, blocks: List[Block], copy: bool = True, index: Optional[Index] = None
730+
) -> T:
702731
""" return a new manager with the blocks """
703732
if len(blocks) == 0:
704733
return self.make_empty()
@@ -714,6 +743,8 @@ def _combine(self: T, blocks: List[Block], copy: bool = True) -> T:
714743
new_blocks.append(b)
715744

716745
axes = list(self.axes)
746+
if index is not None:
747+
axes[-1] = index
717748
axes[0] = self.items.take(indexer)
718749

719750
return type(self).from_blocks(new_blocks, axes)

0 commit comments

Comments
 (0)