Skip to content

REF: implement cumulative ops block-wise #29872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 30, 2019
36 changes: 23 additions & 13 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11326,20 +11326,30 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
else:
axis = self._get_axis_number(axis)

y = com.values_from_object(self).copy()

if skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64)):
result = accum_func(y, axis)
mask = isna(self)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isna(self)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T

def na_accum_func(blk_values):
# We will be applying this function to block values
if skipna and issubclass(
blk_values.dtype.type, (np.datetime64, np.timedelta64)
):
result = accum_func(blk_values.T, axis)
mask = isna(blk_values.T)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
return result.T

result = self._data.apply(na_accum_func)
d = self._construct_axes_dict()
d["copy"] = False
return self._constructor(result, **d).__finalize__(self)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,10 @@ def apply(
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)

applied = getattr(b, f)(**kwargs)
if callable(f):
applied = b.apply(f, **kwargs)
else:
applied = getattr(b, f)(**kwargs)
result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,8 @@ def test_agg_cython_table(self, df, func, expected, axis):
_get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])),
("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])),
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
Expand All @@ -1339,6 +1339,10 @@ def test_agg_cython_table_transform(self, df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis == "columns" or axis == 1:
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")

result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)

Expand Down