PERF/CLN: avoid potential copies in ravel (#45002)

jbrockmendel · web-flow · commit a3c0e7bcfb8b · 2021-12-22T11:06:15.000-05:00
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -12,4 +12,5 @@ def isposinf_scalar(val: object) -> bool: ...
 def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
 def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
+def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -1,6 +1,8 @@
 """
 missing types & inference
 """
+from __future__ import annotations
+
 from decimal import Decimal
 from functools import partial
 
@@ -17,6 +19,7 @@
 from pandas._typing import (
     ArrayLike,
     DtypeObj,
+    npt,
 )
 
 from pandas.core.dtypes.common import (
@@ -259,18 +262,22 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
     return result
 
 
-def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
+def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
     # Working around NumPy ticket 1542
     dtype = values.dtype
-    shape = values.shape
 
     if dtype.kind in ("S", "U"):
         result = np.zeros(values.shape, dtype=bool)
     else:
-        result = np.empty(shape, dtype=bool)
-        vec = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
 
-        result[...] = vec.reshape(shape)
+        if values.ndim == 1:
+            result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
+        elif values.ndim == 2:
+            result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na)
+        else:
+            # 0-D, reached via e.g. mask_missing
+            result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
+            result = result.reshape(values.shape)
 
     return result
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1862,8 +1862,14 @@ def convert(
         attempt to cast any object types to better types return a copy of
         the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
         """
+        values = self.values
+        if values.ndim == 2:
+            # maybe_split ensures we only get here with values.shape[0] == 1,
+            # avoid doing .ravel as that might make a copy
+            values = values[0]
+
         res_values = soft_convert_objects(
-            self.values.ravel(),
+            values,
             datetime=datetime,
             numeric=numeric,
             timedelta=timedelta,
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -757,13 +757,10 @@ def get_median(x):
         if mask is not None:
             values[mask] = np.nan
 
-    if axis is None:
-        values = values.ravel("K")
-
     notempty = values.size
 
     # an array from a frame
-    if values.ndim > 1:
+    if values.ndim > 1 and axis is not None:
 
         # there's a non-empty array to apply over otherwise numpy raises
         if notempty:
diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py
@@ -47,7 +47,7 @@ def _fill_zeros(result, x, y):
     if is_float_dtype(result.dtype):
         return result
 
-    is_variable_type = hasattr(y, "dtype") or hasattr(y, "type")
+    is_variable_type = hasattr(y, "dtype")
     is_scalar_type = is_scalar(y)
 
     if not is_variable_type and not is_scalar_type:
@@ -58,19 +58,18 @@ def _fill_zeros(result, x, y):
 
     if is_integer_dtype(y.dtype):
 
-        if (y == 0).any():
+        ymask = y == 0
+        if ymask.any():
 
-            # GH#7325, mask and nans must be broadcastable (also: GH#9308)
-            # Raveling and then reshaping makes np.putmask faster
-            mask = ((y == 0) & ~np.isnan(result)).ravel()
+            # GH#7325, mask and nans must be broadcastable
+            mask = ymask & ~np.isnan(result)
 
-            shape = result.shape
-            result = result.astype("float64", copy=False).ravel()
+            # GH#9308 doing ravel on result and mask can improve putmask perf,
+            #  but can also make unwanted copies.
+            result = result.astype("float64", copy=False)
 
             np.putmask(result, mask, np.nan)
 
-            result = result.reshape(shape)
-
     return result