Skip to content

Commit 536ce30

Browse files
authored
DEPR: Series[categorical].replace special-casing (#56385)
1 parent ebde354 commit 536ce30

File tree

8 files changed

+116
-23
lines changed

8 files changed

+116
-23
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ Other Deprecations
474474
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
475475
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
476476
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
477+
- Deprecated the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype`; in a future version replace will change the values while preserving the categories. To change the categories, use ``ser.cat.rename_categories`` instead (:issue:`55147`)
477478
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
478479
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
479480
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)

pandas/core/arrays/categorical.py

+13
Original file line numberDiff line numberDiff line change
@@ -2626,6 +2626,8 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
26262626
def _replace(self, *, to_replace, value, inplace: bool = False):
26272627
from pandas import Index
26282628

2629+
orig_dtype = self.dtype
2630+
26292631
inplace = validate_bool_kwarg(inplace, "inplace")
26302632
cat = self if inplace else self.copy()
26312633

@@ -2656,6 +2658,17 @@ def _replace(self, *, to_replace, value, inplace: bool = False):
26562658
new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered)
26572659
NDArrayBacked.__init__(cat, new_codes, new_dtype)
26582660

2661+
if new_dtype != orig_dtype:
2662+
warnings.warn(
2663+
# GH#55147
2664+
"The behavior of Series.replace (and DataFrame.replace) with "
2665+
"CategoricalDtype is deprecated. In a future version, replace "
2666+
"will only be used for cases that preserve the categories. "
2667+
"To change the categories, use ser.cat.rename_categories "
2668+
"instead.",
2669+
FutureWarning,
2670+
stacklevel=find_stack_level(),
2671+
)
26592672
if not inplace:
26602673
return cat
26612674

pandas/tests/arrays/categorical/test_replace.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
([1, 2, "3"], "5", ["5", "5", 3], True),
3232
],
3333
)
34+
@pytest.mark.filterwarnings(
35+
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
36+
)
3437
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
3538
# GH 31720
3639

@@ -60,7 +63,13 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
6063
# GH#26988
6164
cat = Categorical(["a", "b"])
6265
expected = Categorical(result)
63-
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
66+
msg = (
67+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
68+
"with CategoricalDtype"
69+
)
70+
warn = FutureWarning if expected_error_msg is not None else None
71+
with tm.assert_produces_warning(warn, match=msg):
72+
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
6473

6574
tm.assert_categorical_equal(result, expected)
6675
if to_replace == "b": # the "c" test is supposed to be unchanged
@@ -69,14 +78,20 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
6978
tm.assert_categorical_equal(cat, expected)
7079

7180
ser = pd.Series(cat, copy=False)
72-
ser.replace(to_replace, value, inplace=True)
81+
with tm.assert_produces_warning(warn, match=msg):
82+
ser.replace(to_replace, value, inplace=True)
7383
tm.assert_categorical_equal(cat, expected)
7484

7585

7686
def test_replace_categorical_ea_dtype():
7787
# GH49404
7888
cat = Categorical(pd.array(["a", "b"], dtype="string"))
79-
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
89+
msg = (
90+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
91+
"with CategoricalDtype"
92+
)
93+
with tm.assert_produces_warning(FutureWarning, match=msg):
94+
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
8095
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
8196
tm.assert_categorical_equal(result, expected)
8297

@@ -85,7 +100,12 @@ def test_replace_maintain_ordering():
85100
# GH51016
86101
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
87102
ser = pd.Series([0, 1, 2], dtype=dtype)
88-
result = ser.replace(0, 2)
103+
msg = (
104+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
105+
"with CategoricalDtype"
106+
)
107+
with tm.assert_produces_warning(FutureWarning, match=msg):
108+
result = ser.replace(0, 2)
89109
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
90110
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
91111
tm.assert_series_equal(expected, result, check_category_order=True)

pandas/tests/copy_view/test_replace.py

+35-6
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,19 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write):
162162
def test_replace_list_categorical(using_copy_on_write):
163163
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
164164
arr = get_array(df, "a")
165-
df.replace(["c"], value="a", inplace=True)
165+
msg = (
166+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
167+
"with CategoricalDtype"
168+
)
169+
with tm.assert_produces_warning(FutureWarning, match=msg):
170+
df.replace(["c"], value="a", inplace=True)
166171
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
167172
if using_copy_on_write:
168173
assert df._mgr._has_no_reference(0)
169174

170175
df_orig = df.copy()
171-
df2 = df.replace(["b"], value="a")
176+
with tm.assert_produces_warning(FutureWarning, match=msg):
177+
df2 = df.replace(["b"], value="a")
172178
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)
173179

174180
tm.assert_frame_equal(df, df_orig)
@@ -178,7 +184,12 @@ def test_replace_list_inplace_refs_categorical(using_copy_on_write):
178184
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
179185
view = df[:]
180186
df_orig = df.copy()
181-
df.replace(["c"], value="a", inplace=True)
187+
msg = (
188+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
189+
"with CategoricalDtype"
190+
)
191+
with tm.assert_produces_warning(FutureWarning, match=msg):
192+
df.replace(["c"], value="a", inplace=True)
182193
if using_copy_on_write:
183194
assert not np.shares_memory(
184195
get_array(view, "a").codes, get_array(df, "a").codes
@@ -238,7 +249,13 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl
238249
df_orig = df.copy()
239250
arr_a = get_array(df, "a")
240251
view = df[:]
241-
df.replace(to_replace=to_replace, value=val, inplace=True)
252+
msg = (
253+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
254+
"with CategoricalDtype"
255+
)
256+
warn = FutureWarning if val == 1.5 else None
257+
with tm.assert_produces_warning(warn, match=msg):
258+
df.replace(to_replace=to_replace, value=val, inplace=True)
242259

243260
if using_copy_on_write:
244261
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
@@ -253,7 +270,13 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl
253270
def test_replace_categorical_inplace(using_copy_on_write, val):
254271
df = DataFrame({"a": Categorical([1, 2, 3])})
255272
arr_a = get_array(df, "a")
256-
df.replace(to_replace=1, value=val, inplace=True)
273+
msg = (
274+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
275+
"with CategoricalDtype"
276+
)
277+
warn = FutureWarning if val == 1.5 else None
278+
with tm.assert_produces_warning(warn, match=msg):
279+
df.replace(to_replace=1, value=val, inplace=True)
257280

258281
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
259282
if using_copy_on_write:
@@ -267,7 +290,13 @@ def test_replace_categorical_inplace(using_copy_on_write, val):
267290
def test_replace_categorical(using_copy_on_write, val):
268291
df = DataFrame({"a": Categorical([1, 2, 3])})
269292
df_orig = df.copy()
270-
df2 = df.replace(to_replace=1, value=val)
293+
msg = (
294+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
295+
"with CategoricalDtype"
296+
)
297+
warn = FutureWarning if val == 1.5 else None
298+
with tm.assert_produces_warning(warn, match=msg):
299+
df2 = df.replace(to_replace=1, value=val)
271300

272301
if using_copy_on_write:
273302
assert df._mgr._has_no_reference(0)

pandas/tests/frame/methods/test_replace.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,9 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data):
12791279
b = pd.Categorical(final_data[:, 1], categories=ex_cat)
12801280

12811281
expected = DataFrame({"a": a, "b": b})
1282-
result = df.replace(replace_dict, 3)
1282+
msg2 = "with CategoricalDtype is deprecated"
1283+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1284+
result = df.replace(replace_dict, 3)
12831285
tm.assert_frame_equal(result, expected)
12841286
msg = (
12851287
r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are "
@@ -1288,7 +1290,8 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data):
12881290
with pytest.raises(AssertionError, match=msg):
12891291
# ensure non-inplace call does not affect original
12901292
tm.assert_frame_equal(df, expected)
1291-
return_value = df.replace(replace_dict, 3, inplace=True)
1293+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1294+
return_value = df.replace(replace_dict, 3, inplace=True)
12921295
assert return_value is None
12931296
tm.assert_frame_equal(df, expected)
12941297

@@ -1438,9 +1441,14 @@ def test_replace_value_category_type(self):
14381441
)
14391442

14401443
# replace values in input dataframe
1441-
input_df = input_df.replace("d", "z")
1442-
input_df = input_df.replace("obj1", "obj9")
1443-
result = input_df.replace("cat2", "catX")
1444+
msg = (
1445+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
1446+
"with CategoricalDtype"
1447+
)
1448+
with tm.assert_produces_warning(FutureWarning, match=msg):
1449+
input_df = input_df.replace("d", "z")
1450+
input_df = input_df.replace("obj1", "obj9")
1451+
result = input_df.replace("cat2", "catX")
14441452

14451453
tm.assert_frame_equal(result, expected)
14461454

@@ -1466,7 +1474,12 @@ def test_replace_dict_category_type(self):
14661474
)
14671475

14681476
# replace values in input dataframe using a dict
1469-
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
1477+
msg = (
1478+
r"The behavior of Series\.replace \(and DataFrame.replace\) "
1479+
"with CategoricalDtype"
1480+
)
1481+
with tm.assert_produces_warning(FutureWarning, match=msg):
1482+
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
14701483

14711484
tm.assert_frame_equal(result, expected)
14721485

pandas/tests/groupby/test_groupby_dropna.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,9 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
546546

547547
gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
548548
expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
549-
expected["x"] = expected["x"].replace(4, None)
549+
expected["x"] = expected["x"].cat.remove_categories([4])
550550
if index_kind == "multi":
551-
expected["x2"] = expected["x2"].replace(4, None)
551+
expected["x2"] = expected["x2"].cat.remove_categories([4])
552552
if as_index:
553553
if index_kind == "multi":
554554
expected = expected.set_index(["x", "x2"])

pandas/tests/io/pytables/test_file_handling.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,15 @@ def test_latin_encoding(tmp_path, setup_path, dtype, val):
341341
ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
342342
retr = read_hdf(store, key)
343343

344-
s_nan = ser.replace(nan_rep, np.nan)
344+
# TODO:(3.0): once Categorical replace deprecation is enforced,
345+
# we may be able to re-simplify the construction of s_nan
346+
if dtype == "category":
347+
if nan_rep in ser.cat.categories:
348+
s_nan = ser.cat.remove_categories([nan_rep])
349+
else:
350+
s_nan = ser
351+
else:
352+
s_nan = ser.replace(nan_rep, np.nan)
345353

346354
tm.assert_series_equal(s_nan, retr)
347355

pandas/tests/series/methods/test_replace.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ def test_replace_categorical(self, categorical, numeric):
403403
# GH 24971, GH#23305
404404
ser = pd.Series(categorical)
405405
msg = "Downcasting behavior in `replace`"
406+
msg = "with CategoricalDtype is deprecated"
406407
with tm.assert_produces_warning(FutureWarning, match=msg):
407408
result = ser.replace({"A": 1, "B": 2})
408409
expected = pd.Series(numeric).astype("category")
@@ -418,7 +419,9 @@ def test_replace_categorical(self, categorical, numeric):
418419
def test_replace_categorical_inplace(self, data, data_exp):
419420
# GH 53358
420421
result = pd.Series(data, dtype="category")
421-
result.replace(to_replace="a", value="b", inplace=True)
422+
msg = "with CategoricalDtype is deprecated"
423+
with tm.assert_produces_warning(FutureWarning, match=msg):
424+
result.replace(to_replace="a", value="b", inplace=True)
422425
expected = pd.Series(data_exp, dtype="category")
423426
tm.assert_series_equal(result, expected)
424427

@@ -434,16 +437,22 @@ def test_replace_categorical_single(self):
434437
expected = expected.cat.remove_unused_categories()
435438
assert c[2] != "foo"
436439

437-
result = c.replace(c[2], "foo")
440+
msg = "with CategoricalDtype is deprecated"
441+
with tm.assert_produces_warning(FutureWarning, match=msg):
442+
result = c.replace(c[2], "foo")
438443
tm.assert_series_equal(expected, result)
439444
assert c[2] != "foo" # ensure non-inplace call does not alter original
440445

441-
return_value = c.replace(c[2], "foo", inplace=True)
446+
msg = "with CategoricalDtype is deprecated"
447+
with tm.assert_produces_warning(FutureWarning, match=msg):
448+
return_value = c.replace(c[2], "foo", inplace=True)
442449
assert return_value is None
443450
tm.assert_series_equal(expected, c)
444451

445452
first_value = c[0]
446-
return_value = c.replace(c[1], c[0], inplace=True)
453+
msg = "with CategoricalDtype is deprecated"
454+
with tm.assert_produces_warning(FutureWarning, match=msg):
455+
return_value = c.replace(c[1], c[0], inplace=True)
447456
assert return_value is None
448457
assert c[0] == c[1] == first_value # test replacing with existing value
449458

0 commit comments

Comments
 (0)