From 57493d1941428af2ab3779568b124697afe944e0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 18 Jan 2023 19:07:22 +0100 Subject: [PATCH] ENH: Get rid of float cast in masked reduction ops --- pandas/core/arrays/masked.py | 7 +------ pandas/core/nanops.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 77735add89bf7..d45fe05d52937 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1081,12 +1081,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): data = self._data mask = self._mask - # coerce to a nan-aware float if needed - # (we explicitly use NaN within reductions) - if self._hasna: - data = self.to_numpy("float64", na_value=np.nan) - - # median, skew, kurt, idxmin, idxmax + # median, skew, kurt, sem op = getattr(nanops, f"nan{name}") result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 02372356d3fe4..c22af960927f6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -758,15 +758,15 @@ def get_median(x): res = np.nanmedian(x[mask]) return res - values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask) + values, mask, dtype, _, _ = _get_values(values, skipna, mask=mask, fill_value=0) if not is_float_dtype(values.dtype): try: values = values.astype("f8") except ValueError as err: # e.g. "could not convert string to float: 'a'" raise TypeError(str(err)) from err - if mask is not None: - values[mask] = np.nan + if mask is not None: + values[mask] = np.nan notempty = values.size @@ -1040,8 +1040,11 @@ def nansem( if not is_float_dtype(values.dtype): values = values.astype("f8") + if not skipna and mask is not None and mask.any(): + return np.nan + count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) - var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) + var = nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask) return np.sqrt(var) / np.sqrt(count) @@ -1222,6 +1225,8 @@ def nanskew( if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) + elif not skipna and mask is not None and mask.any(): + return np.nan mean = values.sum(axis, dtype=np.float64) / count if axis is not None: @@ -1310,6 +1315,8 @@ def nankurt( if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) + elif not skipna and mask is not None and mask.any(): + return np.nan mean = values.sum(axis, dtype=np.float64) / count if axis is not None: