Skip to content

Commit 2d20b00

Browse files
jbrockmendelSeeminSyed
authored andcommitted
REF: implement nanops.na_accum_func (pandas-dev#32597)
1 parent 231f54c commit 2d20b00

File tree

2 files changed

+81
-65
lines changed

2 files changed

+81
-65
lines changed

pandas/core/generic.py

+8-64
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
from pandas._config import config
3232

33-
from pandas._libs import Timestamp, iNaT, lib
33+
from pandas._libs import Timestamp, lib
3434
from pandas._typing import (
3535
Axis,
3636
FilePathOrBuffer,
@@ -10106,8 +10106,6 @@ def mad(self, axis=None, skipna=None, level=None):
1010610106
desc="minimum",
1010710107
accum_func=np.minimum.accumulate,
1010810108
accum_func_name="min",
10109-
mask_a=np.inf,
10110-
mask_b=np.nan,
1011110109
examples=_cummin_examples,
1011210110
)
1011310111
cls.cumsum = _make_cum_function(
@@ -10119,8 +10117,6 @@ def mad(self, axis=None, skipna=None, level=None):
1011910117
desc="sum",
1012010118
accum_func=np.cumsum,
1012110119
accum_func_name="sum",
10122-
mask_a=0.0,
10123-
mask_b=np.nan,
1012410120
examples=_cumsum_examples,
1012510121
)
1012610122
cls.cumprod = _make_cum_function(
@@ -10132,8 +10128,6 @@ def mad(self, axis=None, skipna=None, level=None):
1013210128
desc="product",
1013310129
accum_func=np.cumprod,
1013410130
accum_func_name="prod",
10135-
mask_a=1.0,
10136-
mask_b=np.nan,
1013710131
examples=_cumprod_examples,
1013810132
)
1013910133
cls.cummax = _make_cum_function(
@@ -10145,8 +10139,6 @@ def mad(self, axis=None, skipna=None, level=None):
1014510139
desc="maximum",
1014610140
accum_func=np.maximum.accumulate,
1014710141
accum_func_name="max",
10148-
mask_a=-np.inf,
10149-
mask_b=np.nan,
1015010142
examples=_cummax_examples,
1015110143
)
1015210144

@@ -11186,8 +11178,6 @@ def _make_cum_function(
1118611178
desc: str,
1118711179
accum_func: Callable,
1118811180
accum_func_name: str,
11189-
mask_a: float,
11190-
mask_b: float,
1119111181
examples: str,
1119211182
) -> Callable:
1119311183
@Substitution(
@@ -11209,61 +11199,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
1120911199
if axis == 1:
1121011200
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
1121111201

11212-
def na_accum_func(blk_values):
11213-
# We will be applying this function to block values
11214-
if blk_values.dtype.kind in ["m", "M"]:
11215-
# GH#30460, GH#29058
11216-
# numpy 1.18 started sorting NaTs at the end instead of beginning,
11217-
# so we need to work around to maintain backwards-consistency.
11218-
orig_dtype = blk_values.dtype
11219-
11220-
# We need to define mask before masking NaTs
11221-
mask = isna(blk_values)
11222-
11223-
if accum_func == np.minimum.accumulate:
11224-
# Note: the accum_func comparison fails as an "is" comparison
11225-
y = blk_values.view("i8")
11226-
y[mask] = np.iinfo(np.int64).max
11227-
changed = True
11228-
else:
11229-
y = blk_values
11230-
changed = False
11231-
11232-
result = accum_func(y.view("i8"), axis)
11233-
if skipna:
11234-
np.putmask(result, mask, iNaT)
11235-
elif accum_func == np.minimum.accumulate:
11236-
# Restore NaTs that we masked previously
11237-
nz = (~np.asarray(mask)).nonzero()[0]
11238-
if len(nz):
11239-
# everything up to the first non-na entry stays NaT
11240-
result[: nz[0]] = iNaT
11241-
11242-
if changed:
11243-
# restore NaT elements
11244-
y[mask] = iNaT # TODO: could try/finally for this?
11245-
11246-
if isinstance(blk_values, np.ndarray):
11247-
result = result.view(orig_dtype)
11248-
else:
11249-
# DatetimeArray
11250-
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)
11251-
11252-
elif skipna and not issubclass(
11253-
blk_values.dtype.type, (np.integer, np.bool_)
11254-
):
11255-
vals = blk_values.copy().T
11256-
mask = isna(vals)
11257-
np.putmask(vals, mask, mask_a)
11258-
result = accum_func(vals, axis)
11259-
np.putmask(result, mask, mask_b)
11260-
else:
11261-
result = accum_func(blk_values.T, axis)
11202+
def block_accum_func(blk_values):
11203+
values = blk_values.T if hasattr(blk_values, "T") else blk_values
1126211204

11263-
# transpose back for ndarray, not for EA
11264-
return result.T if hasattr(result, "T") else result
11205+
result = nanops.na_accum_func(values, accum_func, skipna=skipna)
11206+
11207+
result = result.T if hasattr(result, "T") else result
11208+
return result
1126511209

11266-
result = self._data.apply(na_accum_func)
11210+
result = self._data.apply(block_accum_func)
1126711211

1126811212
d = self._construct_axes_dict()
1126911213
d["copy"] = False

pandas/core/nanops.py

+73-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas._config import get_option
99

1010
from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib
11-
from pandas._typing import Dtype, Scalar
11+
from pandas._typing import ArrayLike, Dtype, Scalar
1212
from pandas.compat._optional import import_optional_dependency
1313

1414
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
@@ -1500,3 +1500,75 @@ def nanpercentile(
15001500
return result
15011501
else:
15021502
return np.percentile(values, q, axis=axis, interpolation=interpolation)
1503+
1504+
1505+
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
1506+
"""
1507+
Cumulative function with skipna support.
1508+
1509+
Parameters
1510+
----------
1511+
values : np.ndarray or ExtensionArray
1512+
accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate}
1513+
skipna : bool
1514+
1515+
Returns
1516+
-------
1517+
np.ndarray or ExtensionArray
1518+
"""
1519+
mask_a, mask_b = {
1520+
np.cumprod: (1.0, np.nan),
1521+
np.maximum.accumulate: (-np.inf, np.nan),
1522+
np.cumsum: (0.0, np.nan),
1523+
np.minimum.accumulate: (np.inf, np.nan),
1524+
}[accum_func]
1525+
1526+
# We will be applying this function to block values
1527+
if values.dtype.kind in ["m", "M"]:
1528+
# GH#30460, GH#29058
1529+
# numpy 1.18 started sorting NaTs at the end instead of beginning,
1530+
# so we need to work around to maintain backwards-consistency.
1531+
orig_dtype = values.dtype
1532+
1533+
# We need to define mask before masking NaTs
1534+
mask = isna(values)
1535+
1536+
if accum_func == np.minimum.accumulate:
1537+
# Note: the accum_func comparison fails as an "is" comparison
1538+
y = values.view("i8")
1539+
y[mask] = np.iinfo(np.int64).max
1540+
changed = True
1541+
else:
1542+
y = values
1543+
changed = False
1544+
1545+
result = accum_func(y.view("i8"), axis=0)
1546+
if skipna:
1547+
result[mask] = iNaT
1548+
elif accum_func == np.minimum.accumulate:
1549+
# Restore NaTs that we masked previously
1550+
nz = (~np.asarray(mask)).nonzero()[0]
1551+
if len(nz):
1552+
# everything up to the first non-na entry stays NaT
1553+
result[: nz[0]] = iNaT
1554+
1555+
if changed:
1556+
# restore NaT elements
1557+
y[mask] = iNaT # TODO: could try/finally for this?
1558+
1559+
if isinstance(values, np.ndarray):
1560+
result = result.view(orig_dtype)
1561+
else:
1562+
# DatetimeArray
1563+
result = type(values)._from_sequence(result, dtype=orig_dtype)
1564+
1565+
elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
1566+
vals = values.copy()
1567+
mask = isna(vals)
1568+
vals[mask] = mask_a
1569+
result = accum_func(vals, axis=0)
1570+
result[mask] = mask_b
1571+
else:
1572+
result = accum_func(values, axis=0)
1573+
1574+
return result

0 commit comments

Comments
 (0)