Skip to content

CLN: enforce deprecation of the Series[categorical].replace special-casing #58270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ Removal of prior version deprecations/changes
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
- Enforced deprecation of the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype` (:issue:`58270`)
- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`)
- Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2680,15 +2680,9 @@ def _replace(self, *, to_replace, value, inplace: bool = False) -> Self | None:
NDArrayBacked.__init__(cat, new_codes, new_dtype)

if new_dtype != orig_dtype:
warnings.warn(
# GH#55147
raise TypeError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this method called anywhere? i think it can just be removed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is special-casing in internals.blocks that calls this method. that special-casing needs to be removed

"The behavior of Series.replace (and DataFrame.replace) with "
"CategoricalDtype is deprecated. In a future version, replace "
"will only be used for cases that preserve the categories. "
"To change the categories, use ser.cat.rename_categories "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
"CategoricalDtype is not supported."
)
if not inplace:
return cat
Expand Down
72 changes: 20 additions & 52 deletions pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,12 @@
"to_replace,value,expected,flip_categories",
[
# one-to-one
(1, 2, [2, 2, 3], False),
(1, 4, [4, 2, 3], False),
(4, 1, [1, 2, 3], False),
(5, 6, [1, 2, 3], False),
# many-to-one
([1], 2, [2, 2, 3], False),
([1, 2], 3, [3, 3, 3], False),
([1, 2], 4, [4, 4, 3], False),
((1, 2, 4), 5, [5, 5, 3], False),
((5, 6), 2, [1, 2, 3], False),
([1], [2], [2, 2, 3], False),
([1, 4], [5, 2], [5, 2, 3], False),
# GH49404: overlap between to_replace and value
([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
# GH50872, GH46884: replace with null
(1, None, [None, 2, 3], False),
(1, pd.NA, [None, 2, 3], False),
# check_categorical sorts categories, which crashes on mixed dtypes
(3, "4", [1, 2, "4"], False),
([1, 2, "3"], "5", ["5", "5", 3], True),
],
)
@pytest.mark.filterwarnings(
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
)
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
# GH 31720

Expand Down Expand Up @@ -63,49 +44,36 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
# GH#26988
cat = Categorical(["a", "b"])
expected = Categorical(result)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if expected_error_msg is not None else None
with tm.assert_produces_warning(warn, match=msg):

if expected_error_msg is None:
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
tm.assert_categorical_equal(result, expected)
elif value is not None:
result = (
pd.Series(cat, copy=False)
.cat.rename_categories({to_replace: value})
._values
)
tm.assert_categorical_equal(result, expected)

tm.assert_categorical_equal(result, expected)
if to_replace == "b": # the "c" test is supposed to be unchanged
with pytest.raises(AssertionError, match=expected_error_msg):
# ensure non-inplace call does not affect original
tm.assert_categorical_equal(cat, expected)

ser = pd.Series(cat, copy=False)
with tm.assert_produces_warning(warn, match=msg):
if expected_error_msg is None:
ser.replace(to_replace, value, inplace=True)
tm.assert_categorical_equal(cat, expected)
tm.assert_categorical_equal(cat, expected)
else:
msg2 = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg2):
ser.replace(to_replace, value, inplace=True)


def test_replace_categorical_ea_dtype():
def test_replace_categorical_ea_dtype_raises():
# GH49404
cat = Categorical(pd.array(["a", "b"], dtype="string"))
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
tm.assert_categorical_equal(result, expected)


def test_replace_maintain_ordering():
# GH51016
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
ser = pd.Series([0, 1, 2], dtype=dtype)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(0, 2)
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
tm.assert_series_equal(expected, result, check_category_order=True)
msg2 = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg2):
pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
83 changes: 40 additions & 43 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,18 @@ def test_replace_to_replace_wrong_dtype():
def test_replace_list_categorical():
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
arr = get_array(df, "a")
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):

msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
df.replace(["c"], value="a", inplace=True)
df.apply(lambda x: x.cat.rename_categories({"c": "a"}))
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
assert df._mgr._has_no_reference(0)

df_orig = df.copy()
with tm.assert_produces_warning(FutureWarning, match=msg):
df2 = df.replace(["b"], value="a")
with pytest.raises(TypeError, match=msg):
df.replace(["b"], value="a")
df2 = df.apply(lambda x: x.cat.rename_categories({"b": "d"}))
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)

tm.assert_frame_equal(df, df_orig)
Expand All @@ -150,13 +150,12 @@ def test_replace_list_inplace_refs_categorical():
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
view = df[:]
df_orig = df.copy()
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df.replace(["c"], value="a", inplace=True)
assert not np.shares_memory(get_array(view, "a").codes, get_array(df, "a").codes)

msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
df.replace(["c"], value="d", inplace=True)
df.apply(lambda x: x.cat.rename_categories({"c": "d"}))

tm.assert_frame_equal(df_orig, view)


Expand Down Expand Up @@ -201,30 +200,29 @@ def test_replace_categorical_inplace_reference(val, to_replace):
df_orig = df.copy()
arr_a = get_array(df, "a")
view = df[:]
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
if val == 1.5:
msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
df.replace(to_replace=to_replace, value=val, inplace=True)
else:
df.replace(to_replace=to_replace, value=val, inplace=True)

assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
assert df._mgr._has_no_reference(0)
assert view._mgr._has_no_reference(0)
tm.assert_frame_equal(view, df_orig)
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
assert df._mgr._has_no_reference(0)
assert view._mgr._has_no_reference(0)
tm.assert_frame_equal(view, df_orig)


@pytest.mark.parametrize("val", [1, 1.5])
def test_replace_categorical_inplace(val):
df = DataFrame({"a": Categorical([1, 2, 3])})
arr_a = get_array(df, "a")
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):

if val == 1.5:
msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
df.replace(to_replace=1, value=val, inplace=True)
else:
df.replace(to_replace=1, value=val, inplace=True)

assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
Expand All @@ -238,22 +236,21 @@ def test_replace_categorical_inplace(val):
def test_replace_categorical(val):
df = DataFrame({"a": Categorical([1, 2, 3])})
df_orig = df.copy()
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
if val == 1.5:
msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
df.replace(to_replace=1, value=val)
else:
df2 = df.replace(to_replace=1, value=val)

assert df._mgr._has_no_reference(0)
assert df2._mgr._has_no_reference(0)
assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes)
tm.assert_frame_equal(df, df_orig)
assert df._mgr._has_no_reference(0)
assert df2._mgr._has_no_reference(0)
assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes)
tm.assert_frame_equal(df, df_orig)

arr_a = get_array(df2, "a").codes
df2.iloc[0, 0] = 2.0
assert np.shares_memory(get_array(df2, "a").codes, arr_a)
arr_a = get_array(df2, "a").codes
df2.iloc[0, 0] = 2.0
assert np.shares_memory(get_array(df2, "a").codes, arr_a)


@pytest.mark.parametrize("method", ["where", "mask"])
Expand Down
57 changes: 13 additions & 44 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,38 +1171,6 @@ def test_replace_with_empty_dictlike(self, mix_abc):
tm.assert_frame_equal(df, df.replace({"b": {}}))
tm.assert_frame_equal(df, df.replace(Series({"b": {}})))

@pytest.mark.parametrize(
"replace_dict, final_data",
[({"a": 1, "b": 1}, [[3, 3], [2, 2]]), ({"a": 1, "b": 2}, [[3, 1], [2, 3]])],
)
def test_categorical_replace_with_dict(self, replace_dict, final_data):
# GH 26988
df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category")

final_data = np.array(final_data)

a = pd.Categorical(final_data[:, 0], categories=[3, 2])

ex_cat = [3, 2] if replace_dict["b"] == 1 else [1, 3]
b = pd.Categorical(final_data[:, 1], categories=ex_cat)

expected = DataFrame({"a": a, "b": b})
msg2 = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = df.replace(replace_dict, 3)
tm.assert_frame_equal(result, expected)
msg = (
r"Attributes of DataFrame.iloc\[:, 0\] \(column name=\"a\"\) are "
"different"
)
with pytest.raises(AssertionError, match=msg):
# ensure non-inplace call does not affect original
tm.assert_frame_equal(df, expected)
with tm.assert_produces_warning(FutureWarning, match=msg2):
return_value = df.replace(replace_dict, 3, inplace=True)
assert return_value is None
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"df, to_replace, exp",
[
Expand Down Expand Up @@ -1345,15 +1313,17 @@ def test_replace_value_category_type(self):
)

# replace values in input dataframe
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"d": "z"})
)
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"obj1": "obj9"})
)
result = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"cat2": "catX"})
)
with tm.assert_produces_warning(FutureWarning, match=msg):
input_df = input_df.replace("d", "z")
input_df = input_df.replace("obj1", "obj9")
result = input_df.replace("cat2", "catX")

result = result.astype({"col1": "int64", "col3": "float64", "col5": "object"})
tm.assert_frame_equal(result, expected)

def test_replace_dict_category_type(self):
Expand All @@ -1378,12 +1348,11 @@ def test_replace_dict_category_type(self):
)

# replace values in input dataframe using a dict
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
result = input_df.apply(
lambda x: x.cat.rename_categories(
{"a": "z", "obj1": "obj9", "cat1": "catX"}
)
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})

tm.assert_frame_equal(result, expected)

Expand Down
29 changes: 5 additions & 24 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,7 @@ def test_replace_mixed_types_with_string(self):
def test_replace_categorical(self, categorical, numeric):
# GH 24971, GH#23305
ser = pd.Series(pd.Categorical(categorical, categories=["A", "B"]))
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace({"A": 1, "B": 2})
result = ser.cat.rename_categories({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
if 2 not in expected.cat.categories:
# i.e. categories should be [1, 2] even if there are no "B"s present
Expand All @@ -383,14 +381,12 @@ def test_replace_categorical(self, categorical, numeric):
@pytest.mark.parametrize(
"data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])]
)
def test_replace_categorical_inplace(self, data, data_exp):
def test_replace_categorical_inplace_raises(self, data, data_exp):
# GH 53358
result = pd.Series(data, dtype="category")
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "with CategoricalDtype is not supported"
with pytest.raises(TypeError, match=msg):
result.replace(to_replace="a", value="b", inplace=True)
expected = pd.Series(data_exp, dtype="category")
tm.assert_series_equal(result, expected)

def test_replace_categorical_single(self):
# GH 26988
Expand All @@ -404,25 +400,10 @@ def test_replace_categorical_single(self):
expected = expected.cat.remove_unused_categories()
assert c[2] != "foo"

msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = c.replace(c[2], "foo")
result = c.cat.rename_categories({c.values[2]: "foo"})
tm.assert_series_equal(expected, result)
assert c[2] != "foo" # ensure non-inplace call does not alter original

msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[2], "foo", inplace=True)
assert return_value is None
tm.assert_series_equal(expected, c)

first_value = c[0]
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[1], c[0], inplace=True)
assert return_value is None
assert c[0] == c[1] == first_value # test replacing with existing value

def test_replace_with_no_overflowerror(self):
# GH 25616
# casts to object without Exception from OverflowError
Expand Down