From f6ddc6bad63139fcff2ea3ebd447ef81d2323d57 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Dec 2021 12:32:46 -0800 Subject: [PATCH 1/5] PERF: avoid ravel in isna with object dtype --- pandas/core/dtypes/missing.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 4e3306e84c1a1..5e810e7464d51 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -1,6 +1,8 @@ """ missing types & inference """ +from __future__ import annotations + from decimal import Decimal from functools import partial @@ -17,6 +19,7 @@ from pandas._typing import ( ArrayLike, DtypeObj, + npt, ) from pandas.core.dtypes.common import ( @@ -259,18 +262,22 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): return result -def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray: +def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]: # Working around NumPy ticket 1542 dtype = values.dtype - shape = values.shape if dtype.kind in ("S", "U"): result = np.zeros(values.shape, dtype=bool) else: - result = np.empty(shape, dtype=bool) - vec = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na) - result[...] = vec.reshape(shape) + if values.ndim == 1: + result = libmissing.isnaobj(values, inf_as_na=inf_as_na) + elif values.ndim == 2: + result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na) + else: + # 0-D, reached via e.g. mask_missing + result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na) + result = result.reshape(values.shape) return result From b941333fb0e665405175850d5e311f3ed37f55d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Dec 2021 18:13:35 -0800 Subject: [PATCH 2/5] PERF: avoid potential copy in Block.convert --- pandas/core/internals/blocks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index abbebcefc7a87..6129cceff1f8f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1860,8 +1860,14 @@ def convert( attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! """ + values = self.values + if values.ndim == 2: + # maybe_split ensures we only get here with values.shape[0] == 1, + # avoid doing .ravel as that might make a copy + values = values[0] + res_values = soft_convert_objects( - self.values.ravel(), + values, datetime=datetime, numeric=numeric, timedelta=timedelta, From a0429e055227366af9038b828b1c522cc964e5e9 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 11:03:40 -0800 Subject: [PATCH 3/5] PERF: avoid ravel --- pandas/core/ops/missing.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 65725cbcf5a0c..8d5f7fb8de758 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -47,7 +47,7 @@ def _fill_zeros(result, x, y): if is_float_dtype(result.dtype): return result - is_variable_type = hasattr(y, "dtype") or hasattr(y, "type") + is_variable_type = hasattr(y, "dtype") is_scalar_type = is_scalar(y) if not is_variable_type and not is_scalar_type: @@ -58,19 +58,18 @@ def _fill_zeros(result, x, y): if is_integer_dtype(y.dtype): - if (y == 0).any(): + ymask = y == 0 + if ymask.any(): - # GH#7325, mask and nans must be broadcastable (also: GH#9308) - # Raveling and then reshaping makes np.putmask faster - mask = ((y == 0) & ~np.isnan(result)).ravel() + # GH#7325, mask and nans must be broadcastable + mask = ymask & ~np.isnan(result) - shape = result.shape - result = result.astype("float64", copy=False).ravel() + # GH#9308 doing ravel on result and mask can improve putmask perf, + # but can also make unwanted copies. + result = result.astype("float64", copy=False) np.putmask(result, mask, np.nan) - result = result.reshape(shape) - return result From cdffaf6db2e2a1476ef4df24660e1b1e73d5ea8d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 11:28:52 -0800 Subject: [PATCH 4/5] PERF: avoid ravel --- pandas/core/nanops.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 52d2322b11f42..61a81386b54a6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -757,13 +757,10 @@ def get_median(x): if mask is not None: values[mask] = np.nan - if axis is None: - values = values.ravel("K") - notempty = values.size # an array from a frame - if values.ndim > 1: + if values.ndim > 1 and axis is not None: # there's a non-empty array to apply over otherwise numpy raises if notempty: From ce8362b83fbb3820ef034b058073250dbd17a38a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 18:45:50 -0800 Subject: [PATCH 5/5] add isnaobj2d to missing.pyi --- pandas/_libs/missing.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi index 1177e82906190..ab6841e7ddf35 100644 --- a/pandas/_libs/missing.pyi +++ b/pandas/_libs/missing.pyi @@ -12,4 +12,5 @@ def isposinf_scalar(val: object) -> bool: ... def isneginf_scalar(val: object) -> bool: ... def checknull(val: object, inf_as_na: bool = ...) -> bool: ... def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... +def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...