PERF: further improve take (reindex/unstack) for ArrayManager (#40300)

jorisvandenbossche · web-flow · commit 1ced878e3d7a · 2021-03-14T20:30:45.000-04:00
diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
@@ -67,6 +67,9 @@ def take_nd(
     This dispatches to ``take`` defined on ExtensionArrays. It does not
     currently dispatch to ``SparseArray.take`` for sparse ``arr``.
 
+    Note: this function assumes that the indexer is a valid(ated) indexer with
+    no out of bound indices.
+
     Parameters
     ----------
     arr : np.ndarray or ExtensionArray
@@ -113,8 +116,13 @@ def _take_nd_ndarray(
     allow_fill: bool,
 ) -> np.ndarray:
 
+    if indexer is None:
+        indexer = np.arange(arr.shape[axis], dtype=np.int64)
+        dtype, fill_value = arr.dtype, arr.dtype.type()
+    else:
+        indexer = ensure_int64(indexer, copy=False)
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
-        arr, indexer, axis, out, fill_value, allow_fill
+        arr, indexer, out, fill_value, allow_fill
     )
 
     flip_order = False
@@ -159,13 +167,17 @@ def take_1d(
     allow_fill: bool = True,
 ) -> ArrayLike:
     """
-    Specialized version for 1D arrays. Differences compared to take_nd:
+    Specialized version for 1D arrays. Differences compared to `take_nd`:
 
-    - Assumes input (arr, indexer) has already been converted to numpy array / EA
+    - Assumes input array has already been converted to numpy array / EA
+    - Assumes indexer is already guaranteed to be int64 dtype ndarray
     - Only works for 1D arrays
 
     To ensure the lowest possible overhead.
 
+    Note: similarly to `take_nd`, this function assumes that the indexer is
+    a valid(ated) indexer with no out of bound indices.
+
     TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
     be removed again if we don't end up with ArrayManager.
     """
@@ -180,8 +192,11 @@ def take_1d(
             allow_fill=allow_fill,
         )
 
+    if not allow_fill:
+        return arr.take(indexer)
+
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
-        arr, indexer, 0, None, fill_value, allow_fill
+        arr, indexer, None, fill_value, allow_fill
     )
 
     # at this point, it's guaranteed that dtype can hold both the arr values
@@ -502,43 +517,32 @@ def _take_2d_multi_object(
 
 def _take_preprocess_indexer_and_fill_value(
     arr: np.ndarray,
-    indexer: Optional[np.ndarray],
-    axis: int,
+    indexer: np.ndarray,
     out: Optional[np.ndarray],
     fill_value,
     allow_fill: bool,
 ):
     mask_info = None
 
-    if indexer is None:
-        indexer = np.arange(arr.shape[axis], dtype=np.int64)
+    if not allow_fill:
         dtype, fill_value = arr.dtype, arr.dtype.type()
+        mask_info = None, False
     else:
-        indexer = ensure_int64(indexer, copy=False)
-        if not allow_fill:
-            dtype, fill_value = arr.dtype, arr.dtype.type()
-            mask_info = None, False
-        else:
-            # check for promotion based on types only (do this first because
-            # it's faster than computing a mask)
-            dtype, fill_value = maybe_promote(arr.dtype, fill_value)
-            if dtype != arr.dtype and (out is None or out.dtype != dtype):
-                # check if promotion is actually required based on indexer
-                mask = indexer == -1
-                # error: Item "bool" of "Union[Any, bool]" has no attribute "any"
-                # [union-attr]
-                needs_masking = mask.any()  # type: ignore[union-attr]
-                # error: Incompatible types in assignment (expression has type
-                # "Tuple[Union[Any, bool], Any]", variable has type
-                # "Optional[Tuple[None, bool]]")
-                mask_info = mask, needs_masking  # type: ignore[assignment]
-                if needs_masking:
-                    if out is not None and out.dtype != dtype:
-                        raise TypeError("Incompatible type for fill_value")
-                else:
-                    # if not, then depromote, set fill_value to dummy
-                    # (it won't be used but we don't want the cython code
-                    # to crash when trying to cast it to dtype)
-                    dtype, fill_value = arr.dtype, arr.dtype.type()
+        # check for promotion based on types only (do this first because
+        # it's faster than computing a mask)
+        dtype, fill_value = maybe_promote(arr.dtype, fill_value)
+        if dtype != arr.dtype and (out is None or out.dtype != dtype):
+            # check if promotion is actually required based on indexer
+            mask = indexer == -1
+            needs_masking = mask.any()
+            mask_info = mask, needs_masking
+            if needs_masking:
+                if out is not None and out.dtype != dtype:
+                    raise TypeError("Incompatible type for fill_value")
+            else:
+                # if not, then depromote, set fill_value to dummy
+                # (it won't be used but we don't want the cython code
+                # to crash when trying to cast it to dtype)
+                dtype, fill_value = arr.dtype, arr.dtype.type()
 
     return indexer, dtype, fill_value, mask_info
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -32,6 +32,7 @@
     soft_convert_objects,
 )
 from pandas.core.dtypes.common import (
+    ensure_int64,
     is_bool_dtype,
     is_datetime64_ns_dtype,
     is_dtype_equal,
@@ -1005,11 +1006,16 @@ def _reindex_indexer(
 
         else:
             validate_indices(indexer, len(self._axes[0]))
+            indexer = ensure_int64(indexer)
+            if (indexer == -1).any():
+                allow_fill = True
+            else:
+                allow_fill = False
             new_arrays = [
                 take_1d(
                     arr,
                     indexer,
-                    allow_fill=True,
+                    allow_fill=allow_fill,
                     fill_value=fill_value,
                     # if fill_value is not None else blk.fill_value
                 )
@@ -1080,15 +1086,24 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
         unstacked : BlockManager
         """
         indexer, _ = unstacker._indexer_and_to_sort
-        new_indexer = np.full(unstacker.mask.shape, -1)
-        new_indexer[unstacker.mask] = indexer
+        if unstacker.mask.all():
+            new_indexer = indexer
+            allow_fill = False
+        else:
+            new_indexer = np.full(unstacker.mask.shape, -1)
+            new_indexer[unstacker.mask] = indexer
+            allow_fill = True
         new_indexer2D = new_indexer.reshape(*unstacker.full_shape)
+        new_indexer2D = ensure_int64(new_indexer2D)
 
         new_arrays = []
         for arr in self.arrays:
             for i in range(unstacker.full_shape[1]):
                 new_arr = take_1d(
-                    arr, new_indexer2D[:, i], allow_fill=True, fill_value=fill_value
+                    arr,
+                    new_indexer2D[:, i],
+                    allow_fill=allow_fill,
+                    fill_value=fill_value,
                 )
                 new_arrays.append(new_arr)