Skip to content

REF: implement nanops.na_accum_func #32597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 8 additions & 64 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from pandas._config import config

from pandas._libs import Timestamp, iNaT, lib
from pandas._libs import Timestamp, lib
from pandas._typing import (
Axis,
FilePathOrBuffer,
Expand Down Expand Up @@ -10102,8 +10102,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="minimum",
accum_func=np.minimum.accumulate,
accum_func_name="min",
mask_a=np.inf,
mask_b=np.nan,
examples=_cummin_examples,
)
cls.cumsum = _make_cum_function(
Expand All @@ -10115,8 +10113,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="sum",
accum_func=np.cumsum,
accum_func_name="sum",
mask_a=0.0,
mask_b=np.nan,
examples=_cumsum_examples,
)
cls.cumprod = _make_cum_function(
Expand All @@ -10128,8 +10124,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="product",
accum_func=np.cumprod,
accum_func_name="prod",
mask_a=1.0,
mask_b=np.nan,
examples=_cumprod_examples,
)
cls.cummax = _make_cum_function(
Expand All @@ -10141,8 +10135,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="maximum",
accum_func=np.maximum.accumulate,
accum_func_name="max",
mask_a=-np.inf,
mask_b=np.nan,
examples=_cummax_examples,
)

Expand Down Expand Up @@ -11182,8 +11174,6 @@ def _make_cum_function(
desc: str,
accum_func: Callable,
accum_func_name: str,
mask_a: float,
mask_b: float,
examples: str,
) -> Callable:
@Substitution(
Expand All @@ -11205,61 +11195,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T

def na_accum_func(blk_values):
# We will be applying this function to block values
if blk_values.dtype.kind in ["m", "M"]:
# GH#30460, GH#29058
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = blk_values.dtype

# We need to define mask before masking NaTs
mask = isna(blk_values)

if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
y = blk_values.view("i8")
y[mask] = np.iinfo(np.int64).max
changed = True
else:
y = blk_values
changed = False

result = accum_func(y.view("i8"), axis)
if skipna:
np.putmask(result, mask, iNaT)
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT

if changed:
# restore NaT elements
y[mask] = iNaT # TODO: could try/finally for this?

if isinstance(blk_values, np.ndarray):
result = result.view(orig_dtype)
else:
# DatetimeArray
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)

elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
def block_accum_func(blk_values):
values = blk_values.T if hasattr(blk_values, "T") else blk_values

# transpose back for ndarray, not for EA
return result.T if hasattr(result, "T") else result
result = nanops.na_accum_func(values, accum_func, skipna=skipna)

result = result.T if hasattr(result, "T") else result
return result

result = self._data.apply(na_accum_func)
result = self._data.apply(block_accum_func)

d = self._construct_axes_dict()
d["copy"] = False
Expand Down
74 changes: 73 additions & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pandas._config import get_option

from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib
from pandas._typing import Dtype, Scalar
from pandas._typing import ArrayLike, Dtype, Scalar
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
Expand Down Expand Up @@ -1500,3 +1500,75 @@ def nanpercentile(
return result
else:
return np.percentile(values, q, axis=axis, interpolation=interpolation)


def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
"""
Cumulative function with skipna support.

Parameters
----------
values : np.ndarray or ExtensionArray
accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate}
skipna : bool

Returns
-------
np.ndarray or ExtensionArray
"""
mask_a, mask_b = {
np.cumprod: (1.0, np.nan),
np.maximum.accumulate: (-np.inf, np.nan),
np.cumsum: (0.0, np.nan),
np.minimum.accumulate: (np.inf, np.nan),
}[accum_func]

# We will be applying this function to block values
if values.dtype.kind in ["m", "M"]:
# GH#30460, GH#29058
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = values.dtype

# We need to define mask before masking NaTs
mask = isna(values)

if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
y = values.view("i8")
y[mask] = np.iinfo(np.int64).max
changed = True
else:
y = values
changed = False

result = accum_func(y.view("i8"), axis=0)
if skipna:
result[mask] = iNaT
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT

if changed:
# restore NaT elements
y[mask] = iNaT # TODO: could try/finally for this?

if isinstance(values, np.ndarray):
result = result.view(orig_dtype)
else:
# DatetimeArray
result = type(values)._from_sequence(result, dtype=orig_dtype)

elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
vals = values.copy()
mask = isna(vals)
vals[mask] = mask_a
result = accum_func(vals, axis=0)
result[mask] = mask_b
else:
result = accum_func(values, axis=0)

return result