pandas-dev · jreback · Mar 16, 2021 · Mar 5, 2021 · Mar 5, 2021 · Mar 5, 2021
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from functools import wraps
 import re
 from typing import (
     TYPE_CHECKING,
@@ -30,6 +31,7 @@
     ArrayLike,
     Dtype,
     DtypeObj,
+    F,
     Shape,
     final,
 )
@@ -121,6 +123,24 @@
 _dtype_obj = np.dtype("object")
 
 
+def maybe_split(meth: F) -> F:
+    """
+    If we have a multi-column block, split and operate block-wise.  Otherwise
+    use the original method.
+    """
+
+    @wraps(meth)
+    def newfunc(self, *args, **kwargs) -> List[Block]:
+
+        if self.ndim == 1 or self.shape[0] == 1:
+            return meth(self, *args, **kwargs)
+        else:
+            # Split and operate column-by-column
+            return self.split_and_operate(meth, *args, **kwargs)
+
+    return cast(F, newfunc)
+
+
 class Block(PandasObject):
     """
     Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -441,17 +461,16 @@ def fillna(
             # we can't process the value, but nothing to do
             return [self] if inplace else [self.copy()]
 
-        # operate column-by-column
-        def f(mask, val, idx):
-            block = self.coerce_to_target_dtype(value)
-
-            # slice out our block
-            if idx is not None:
-                # i.e. self.ndim == 2
-                block = block.getitem_block(slice(idx, idx + 1))
-            return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
+        elif self.ndim == 1 or self.shape[0] == 1:
+            blk = self.coerce_to_target_dtype(value)
+            # bc we have already cast, inplace=True may avoid an extra copy
+            return blk.fillna(value, limit=limit, inplace=True, downcast=None)
 
-        return self.split_and_operate(None, f, inplace)
+        else:
+            # operate column-by-column
+            return self.split_and_operate(
+                type(self).fillna, value, limit=limit, inplace=inplace, downcast=None
+            )
 
     @final
     def _split(self) -> List[Block]:
@@ -469,75 +488,27 @@ def _split(self) -> List[Block]:
         return new_blocks
 
     @final
-    def split_and_operate(
-        self, mask, f, inplace: bool, ignore_failures: bool = False
-    ) -> List[Block]:
+    def split_and_operate(self, func, *args, **kwargs) -> List[Block]:
         """
-        split the block per-column, and apply the callable f
-        per-column, return a new block for each. Handle
-        masking which will not change a block unless needed.
+        Split the block and apply func column-by-column.
 
         Parameters
         ----------
-        mask : 2-d boolean mask
-        f : callable accepting (1d-mask, 1d values, indexer)
-        inplace : bool
-        ignore_failures : bool, default False
+        func : Block method
+        *args
+        **kwargs
 
         Returns
         -------
-        list of blocks
+        List[Block]
         """
-        if mask is None:
-            mask = np.broadcast_to(True, shape=self.shape)
-
-        new_values = self.values
-
-        def make_a_block(nv, ref_loc):
-            if isinstance(nv, list):
-                assert len(nv) == 1, nv
-                assert isinstance(nv[0], Block)
-                block = nv[0]
-            else:
-                # Put back the dimension that was taken from it and make
-                # a block out of the result.
-                nv = ensure_block_shape(nv, ndim=self.ndim)
-                block = self.make_block(values=nv, placement=ref_loc)
-            return block
-
-        # ndim == 1
-        if self.ndim == 1:
-            if mask.any():
-                nv = f(mask, new_values, None)
-            else:
-                nv = new_values if inplace else new_values.copy()
-            block = make_a_block(nv, self._mgr_locs)
-            return [block]
-
-        # ndim > 1
-        new_blocks = []
-        for i, ref_loc in enumerate(self._mgr_locs):
-            m = mask[i]
-            v = new_values[i]
-
-            # need a new block
-            if m.any() or m.size == 0:
-                # Apply our function; we may ignore_failures if this is a
-                #  reduction that is dropping nuisance columns GH#37827
-                try:
-                    nv = f(m, v, i)
-                except TypeError:
-                    if ignore_failures:
-                        continue
-                    else:
-                        raise
-            else:
-                nv = v if inplace else v.copy()
-
-            block = make_a_block(nv, [ref_loc])
-            new_blocks.append(block)
+        assert self.ndim == 2 and self.shape[0] != 1
 
-        return new_blocks
+        res_blocks = []
+        for nb in self._split():
+            rbs = func(nb, *args, **kwargs)
+            res_blocks.extend(rbs)
+        return res_blocks
 
     def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]:
 
@@ -577,13 +548,17 @@ def downcast(self, dtypes=None) -> List[Block]:
         elif dtypes != "infer":
             raise AssertionError("dtypes as dict is not supported yet")
 
-        # operate column-by-column
-        # this is expensive as it splits the blocks items-by-item
-        def f(mask, val, idx):
-            val = maybe_downcast_to_dtype(val, dtype="infer")
-            return val
+        return self._downcast_2d()
 
-        return self.split_and_operate(None, f, False)
+    @maybe_split
+    def _downcast_2d(self) -> List[Block]:
+        """
+        downcast specialized to 2D case post-validation.
+
+        Refactored to allow use of maybe_split.
+        """
+        new_values = maybe_downcast_to_dtype(self.values, dtype="infer")
+        return [self.make_block(new_values)]
 
     @final
     def astype(self, dtype, copy: bool = False, errors: str = "raise"):
@@ -712,18 +687,13 @@ def replace(
             #  bc _can_hold_element is incorrect.
             return [self] if inplace else [self.copy()]
 
-        if not self._can_hold_element(value):
-            if self.ndim == 2 and self.shape[0] > 1:
-                # split so that we only upcast where necessary
-                nbs = self._split()
-                res_blocks = extend_blocks(
-                    [
-                        blk.replace(to_replace, value, inplace=inplace, regex=regex)
-                        for blk in nbs
-                    ]
-                )
-                return res_blocks
+        elif self._can_hold_element(value):
+            blk = self if inplace else self.copy()
+            putmask_inplace(blk.values, mask, value)
+            blocks = blk.convert(numeric=False, copy=False)
+            return blocks
 
+        elif self.ndim == 1 or self.shape[0] == 1:
             blk = self.coerce_to_target_dtype(value)
             return blk.replace(
                 to_replace=to_replace,
@@ -732,10 +702,11 @@ def replace(
                 regex=regex,
             )
 
-        blk = self if inplace else self.copy()
-        putmask_inplace(blk.values, mask, value)
-        blocks = blk.convert(numeric=False, copy=False)
-        return blocks
+        else:
+            # split so that we only upcast where necessary
+            return self.split_and_operate(
+                type(self).replace, to_replace, value, inplace=inplace, regex=regex
+            )
 
     @final
     def _replace_regex(
@@ -2025,6 +1996,8 @@ class ObjectBlock(Block):
     is_object = True
     _can_hold_na = True
 
+    values: np.ndarray
+
     @property
     def is_bool(self):
         """
@@ -2033,26 +2006,15 @@ def is_bool(self):
         """
         return lib.is_bool_array(self.values.ravel("K"))
 
+    @maybe_split
     def reduce(self, func, ignore_failures: bool = False) -> List[Block]:
         """
         For object-dtype, we operate column-wise.
         """
         assert self.ndim == 2
 
-        values = self.values
-        if len(values) > 1:
-            # split_and_operate expects func with signature (mask, values, inplace)
-            def mask_func(mask, values, inplace):
-                if values.ndim == 1:
-                    values = values.reshape(1, -1)
-                return func(values)
-
-            return self.split_and_operate(
-                None, mask_func, False, ignore_failures=ignore_failures
-            )
-
         try:
-            res = func(values)
+            res = func(self.values)
         except TypeError:
             if not ignore_failures:
                 raise
@@ -2063,6 +2025,7 @@ def mask_func(mask, values, inplace):
         res = res.reshape(1, -1)
         return [self.make_block_same_class(res)]
 
+    @maybe_split
     def convert(
         self,
         copy: bool = True,
@@ -2074,30 +2037,15 @@ def convert(
         attempt to cast any object types to better types return a copy of
         the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
         """
-
-        # operate column-by-column
-        def f(mask, val, idx):
-            shape = val.shape
-            values = soft_convert_objects(
-                val.ravel(),
-                datetime=datetime,
-                numeric=numeric,
-                timedelta=timedelta,
-                copy=copy,
-            )
-            if isinstance(values, np.ndarray):
-                # TODO(EA2D): allow EA once reshape is supported
-                values = values.reshape(shape)
-
-            return values
-
-        if self.ndim == 2:
-            blocks = self.split_and_operate(None, f, False)
-        else:
-            values = f(None, self.values.ravel(), None)
-            blocks = [self.make_block(values)]
-
-        return blocks
+        res_values = soft_convert_objects(
+            self.values.ravel(),
+            datetime=datetime,
+            numeric=numeric,
+            timedelta=timedelta,
+            copy=copy,
+        )
+        res_values = ensure_block_shape(res_values, self.ndim)
+        return [self.make_block(res_values)]
 
     def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: