Skip to content

Commit f38daf3

Browse files
authored
Implement scan operations for decimal columns (#7707)
This adds support for `cummin`, `cummax`, and `cumsum` in cuDF for columns with type `decimal` Authors: - @ChrisJar Approvers: - GALI PREM SAGAR (@galipremsagar) URL: #7707
1 parent 1417297 commit f38daf3

File tree

3 files changed

+57
-2
lines changed

3 files changed

+57
-2
lines changed

python/cudf/cudf/core/column/decimal.py

+3
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ def binary_operator(self, op, other, reflect=False):
7272
result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)
7373
return result
7474

75+
def _apply_scan_op(self, op: str) -> ColumnBase:
76+
return libcudf.reduce.scan(op, self, True)
77+
7578
def as_decimal_column(
7679
self, dtype: Dtype, **kwargs
7780
) -> "cudf.core.column.DecimalColumn":

python/cudf/cudf/core/series.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -4725,8 +4725,9 @@ def cumsum(self, axis=0, skipna=True, *args, **kwargs):
47254725
result_col[first_index:] = None
47264726

47274727
# pandas always returns int64 dtype if original dtype is int or `bool`
4728-
if np.issubdtype(result_col.dtype, np.integer) or np.issubdtype(
4729-
result_col.dtype, np.bool_
4728+
if not is_decimal_dtype(result_col.dtype) and (
4729+
np.issubdtype(result_col.dtype, np.integer)
4730+
or np.issubdtype(result_col.dtype, np.bool_)
47304731
):
47314732
return Series(
47324733
result_col.astype(np.int64)._apply_scan_op("sum"),
@@ -4774,6 +4775,11 @@ def cumprod(self, axis=0, skipna=True, *args, **kwargs):
47744775
if axis not in (None, 0):
47754776
raise NotImplementedError("axis parameter is not implemented yet")
47764777

4778+
if is_decimal_dtype(self.dtype):
4779+
raise NotImplementedError(
4780+
"cumprod does not currently support decimal types"
4781+
)
4782+
47774783
skipna = True if skipna is None else skipna
47784784

47794785
if skipna:

python/cudf/cudf/tests/test_scan.py

+46
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import cudf
88
from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand
9+
from cudf.core.dtypes import Decimal64Dtype
910

1011
params_sizes = [0, 1, 2, 5]
1112

@@ -61,6 +62,21 @@ def test_cumsum_masked():
6162
assert_eq(got, expected)
6263

6364

65+
@pytest.mark.parametrize(
66+
"dtype",
67+
[Decimal64Dtype(8, 4), Decimal64Dtype(10, 5), Decimal64Dtype(12, 7)],
68+
)
69+
def test_cumsum_decimal(dtype):
70+
data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"]
71+
gser = cudf.Series(data).astype(dtype)
72+
pser = pd.Series(data, dtype="float64")
73+
74+
got = gser.cumsum()
75+
expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype)
76+
77+
assert_eq(got, expected)
78+
79+
6480
@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
6581
def test_cummin(dtype, nelem):
6682
if dtype == np.int8:
@@ -103,6 +119,21 @@ def test_cummin_masked():
103119
assert_eq(gs.cummin(), expected)
104120

105121

122+
@pytest.mark.parametrize(
123+
"dtype",
124+
[Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)],
125+
)
126+
def test_cummin_decimal(dtype):
127+
data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"]
128+
gser = cudf.Series(data).astype(dtype)
129+
pser = pd.Series(data, dtype="float64")
130+
131+
got = gser.cummin()
132+
expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype)
133+
134+
assert_eq(got, expected)
135+
136+
106137
@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
107138
def test_cummax(dtype, nelem):
108139
if dtype == np.int8:
@@ -145,6 +176,21 @@ def test_cummax_masked():
145176
assert_eq(gs.cummax(), expected)
146177

147178

179+
@pytest.mark.parametrize(
180+
"dtype",
181+
[Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)],
182+
)
183+
def test_cummax_decimal(dtype):
184+
data = [np.nan, "54.203", "8.222", "644.32", "-562.272"]
185+
gser = cudf.Series(data).astype(dtype)
186+
pser = pd.Series(data, dtype="float64")
187+
188+
got = gser.cummax()
189+
expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype)
190+
191+
assert_eq(got, expected)
192+
193+
148194
@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
149195
def test_cumprod(dtype, nelem):
150196
if dtype == np.int8:

0 commit comments

Comments
 (0)