From 37f588ff749e9617955e5e5f824f094b0b63eb1e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 30 Aug 2021 13:48:40 -0700 Subject: [PATCH 1/2] PERF: nanops --- pandas/core/nanops.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a80bd8ba76dac..c10efc7f379dd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -449,6 +449,25 @@ def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarr return np.full(result_shape, fill_value, dtype=values.dtype) +def maybe_operate_rowwise(func): + """ + NumPy operations on C-contiguous ndarrays with axis=1 can be + very slow. Operate row-by-row and concatenate the results. + """ + + @functools.wraps(func) + def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): + if axis == 1: + if values.ndim == 2 and values.flags["C_CONTIGUOUS"]: + arrs = list(values) + results = [func(x, **kwargs) for x in arrs] + return np.array(results) + + return func(values, axis=axis, **kwargs) + + return newfunc + + def nanany( values: np.ndarray, *, @@ -543,6 +562,7 @@ def nanall( @disallow("M8") @_datetimelike_compat +@maybe_operate_rowwise def nansum( values: np.ndarray, *, @@ -1111,6 +1131,7 @@ def nanargmin( @disallow("M8", "m8") +@maybe_operate_rowwise def nanskew( values: np.ndarray, *, @@ -1198,6 +1219,7 @@ def nanskew( @disallow("M8", "m8") +@maybe_operate_rowwise def nankurt( values: np.ndarray, *, @@ -1294,6 +1316,7 @@ def nankurt( @disallow("M8", "m8") +@maybe_operate_rowwise def nanprod( values: np.ndarray, *, From 5074138ef279e4c48922a53c466ef8e674cb4101 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 30 Aug 2021 16:46:26 -0700 Subject: [PATCH 2/2] corner cases --- pandas/core/nanops.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index c10efc7f379dd..5d96e9bb6cd19 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -457,11 +457,21 @@ def maybe_operate_rowwise(func): @functools.wraps(func) def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): - if axis == 1: - if values.ndim == 2 and values.flags["C_CONTIGUOUS"]: - arrs = list(values) + if ( + axis == 1 + and values.ndim == 2 + and values.flags["C_CONTIGUOUS"] + and values.dtype != object + ): + arrs = list(values) + if kwargs.get("mask") is not None: + mask = kwargs.pop("mask") + results = [ + func(arrs[i], mask=mask[i], **kwargs) for i in range(len(arrs)) + ] + else: results = [func(x, **kwargs) for x in arrs] - return np.array(results) + return np.array(results) return func(values, axis=axis, **kwargs)