Skip to content

Commit fbb1f89

Browse files
authored
Deprecate replace with categorical columns (#14988)
Matches pandas 2.2 behavior: pandas-dev/pandas#56385 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Michael Wang (https://github.com/isVoid) URL: #14988
1 parent 7294280 commit fbb1f89

File tree

2 files changed

+60
-19
lines changed

2 files changed

+60
-19
lines changed

python/cudf/cudf/core/column/categorical.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import warnings
56
from collections import abc
67
from functools import cached_property
78
from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Tuple, cast
@@ -990,14 +991,24 @@ def find_and_replace(
990991
replaced, to_replace_col, replacement_col
991992
)
992993

993-
return column.build_categorical_column(
994+
result = column.build_categorical_column(
994995
categories=new_cats["cats"],
995996
codes=column.build_column(output.base_data, dtype=output.dtype),
996997
mask=output.base_mask,
997998
offset=output.offset,
998999
size=output.size,
9991000
ordered=self.dtype.ordered,
10001001
)
1002+
if result.dtype != self.dtype:
1003+
warnings.warn(
1004+
"The behavior of replace with "
1005+
"CategoricalDtype is deprecated. In a future version, replace "
1006+
"will only be used for cases that preserve the categories. "
1007+
"To change the categories, use ser.cat.rename_categories "
1008+
"instead.",
1009+
FutureWarning,
1010+
)
1011+
return result
10011012

10021013
def isnull(self) -> ColumnBase:
10031014
"""

python/cudf/cudf/tests/test_replace.py

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,24 @@ def test_series_replace_all(gsr, to_replace, value):
5757
else:
5858
pd_value = value
5959

60-
actual = gsr.replace(to_replace=gd_to_replace, value=gd_value)
61-
if pd_value is None:
62-
# TODO: Remove this workaround once cudf
63-
# introduces `no_default` values
64-
expected = psr.replace(to_replace=pd_to_replace)
65-
else:
66-
expected = psr.replace(to_replace=pd_to_replace, value=pd_value)
60+
with expect_warning_if(
61+
isinstance(gsr.dtype, cudf.CategoricalDtype)
62+
and isinstance(gd_to_replace, str)
63+
and gd_to_replace == "one"
64+
):
65+
actual = gsr.replace(to_replace=gd_to_replace, value=gd_value)
66+
with expect_warning_if(
67+
PANDAS_GE_220
68+
and isinstance(gsr.dtype, cudf.CategoricalDtype)
69+
and isinstance(gd_to_replace, str)
70+
and gd_to_replace == "one"
71+
):
72+
if pd_value is None:
73+
# TODO: Remove this workaround once cudf
74+
# introduces `no_default` values
75+
expected = psr.replace(to_replace=pd_to_replace)
76+
else:
77+
expected = psr.replace(to_replace=pd_to_replace, value=pd_value)
6778

6879
assert_eq(
6980
expected.sort_values().reset_index(drop=True),
@@ -82,16 +93,19 @@ def test_series_replace():
8293

8394
# Categorical
8495
psr3 = pd.Series(["one", "two", "three"], dtype="category")
85-
psr4 = psr3.replace("one", "two")
96+
with expect_warning_if(PANDAS_GE_220):
97+
psr4 = psr3.replace("one", "two")
8698
sr3 = cudf.from_pandas(psr3)
87-
sr4 = sr3.replace("one", "two")
99+
with pytest.warns(FutureWarning):
100+
sr4 = sr3.replace("one", "two")
88101
assert_eq(
89102
psr4.sort_values().reset_index(drop=True),
90103
sr4.sort_values().reset_index(drop=True),
91104
)
92-
93-
psr5 = psr3.replace("one", "five")
94-
sr5 = sr3.replace("one", "five")
105+
with expect_warning_if(PANDAS_GE_220):
106+
psr5 = psr3.replace("one", "five")
107+
with pytest.warns(FutureWarning):
108+
sr5 = sr3.replace("one", "five")
95109

96110
assert_eq(psr5, sr5)
97111

@@ -236,11 +250,26 @@ def test_dataframe_replace(df, to_replace, value):
236250
else:
237251
gd_to_replace = to_replace
238252

239-
if pd_value is None:
240-
expected = pdf.replace(to_replace=pd_to_replace)
241-
else:
242-
expected = pdf.replace(to_replace=pd_to_replace, value=pd_value)
243-
actual = gdf.replace(to_replace=gd_to_replace, value=gd_value)
253+
with expect_warning_if(
254+
PANDAS_GE_220
255+
and isinstance(df["a"].dtype, cudf.CategoricalDtype)
256+
and isinstance(to_replace, str)
257+
and to_replace == "two"
258+
and isinstance(value, str)
259+
and value == "three"
260+
):
261+
if pd_value is None:
262+
expected = pdf.replace(to_replace=pd_to_replace)
263+
else:
264+
expected = pdf.replace(to_replace=pd_to_replace, value=pd_value)
265+
with expect_warning_if(
266+
isinstance(df["a"].dtype, cudf.CategoricalDtype)
267+
and isinstance(to_replace, str)
268+
and to_replace == "two"
269+
and isinstance(value, str)
270+
and value == "three"
271+
):
272+
actual = gdf.replace(to_replace=gd_to_replace, value=gd_value)
244273

245274
expected_sorted = expected.sort_values(by=list(expected.columns), axis=0)
246275
actual_sorted = actual.sort_values(by=list(actual.columns), axis=0)
@@ -1342,7 +1371,8 @@ def test_series_replace_errors():
13421371
],
13431372
)
13441373
def test_replace_nulls(gsr, old, new, expected):
1345-
actual = gsr.replace(old, new)
1374+
with expect_warning_if(isinstance(gsr.dtype, cudf.CategoricalDtype)):
1375+
actual = gsr.replace(old, new)
13461376
assert_eq(
13471377
expected.sort_values().reset_index(drop=True),
13481378
actual.sort_values().reset_index(drop=True),

0 commit comments

Comments
 (0)