pandas-dev · mroeschke · Sep 18, 2023 · Aug 23, 2023 · Aug 23, 2023 · Aug 23, 2023
diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -401,7 +401,7 @@ Limit the number of NA values filled
 
    df.ffill(limit=1)
 
-NA values can be replaced with corresponding value from a :class:`Series`` or :class:`DataFrame``
+NA values can be replaced with corresponding value from a :class:`Series` or :class:`DataFrame`
 where the index and column aligns between the original object and the filled object.
 
 .. ipython:: python
@@ -660,7 +660,7 @@ Pass a list of regular expressions that will replace matches with a scalar.
 
 .. ipython:: python
 
-   df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True)
+   df.replace([r"\s*\.\s*", r"a|b"], "placeholder", regex=True)
 
 All of the regular expression examples can also be passed with the
 ``to_replace`` argument as the ``regex`` argument. In this case the ``value``
@@ -669,7 +669,7 @@ dictionary.
 
 .. ipython:: python
 
-   df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan)
+   df.replace(regex=[r"\s*\.\s*", r"a|b"], value="placeholder")
 
 .. note::
 

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -162,7 +162,8 @@ Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
-- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
+- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
+- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
 - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
 - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -761,7 +761,23 @@ def replace(
             if not (self.is_object and value is None):
                 # if the user *explicitly* gave None, we keep None, otherwise
                 #  may downcast to NaN
-                blocks = blk.convert(copy=False, using_cow=using_cow)
+                if get_option("future.no_silent_downcasting") is True:
+                    blocks = [blk]
+                else:
+                    blocks = blk.convert(copy=False, using_cow=using_cow)
+                    if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
+                        warnings.warn(
+                            # GH#54710
+                            "Downcasting behavior in `replace` is deprecated and "
+                            "will be removed in a future version. To retain the old "
+                            "behavior, explicitly call "
+                            "`result.infer_objects(copy=False)`. "
+                            "To opt-in to the future "
+                            "behavior, set "
+                            "`pd.set_option('future.no_silent_downcasting', True)`",
+                            FutureWarning,
+                            stacklevel=find_stack_level(),
+                        )
             else:
                 blocks = [blk]
             return blocks
@@ -836,7 +852,21 @@ def _replace_regex(
 
         replace_regex(block.values, rx, value, mask)
 
-        return block.convert(copy=False, using_cow=using_cow)
+        nbs = block.convert(copy=False, using_cow=using_cow)
+        opt = get_option("future.no_silent_downcasting")
+        if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
+            warnings.warn(
+                # GH#54710
+                "Downcasting behavior in `replace` is deprecated and "
+                "will be removed in a future version. To retain the old "
+                "behavior, explicitly call `result.infer_objects(copy=False)`. "
+                "To opt-in to the future "
+                "behavior, set "
+                "`pd.set_option('future.no_silent_downcasting', True)`",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        return nbs
 
     @final
     def replace_list(
@@ -902,6 +932,7 @@ def replace_list(
         else:
             rb = [self if inplace else self.copy()]
 
+        opt = get_option("future.no_silent_downcasting")
         for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
             convert = i == src_len  # only convert once at the end
             new_rb: list[Block] = []
@@ -939,14 +970,33 @@ def replace_list(
                             b.refs.referenced_blocks.index(ref)
                         )
 
-                if convert and blk.is_object and not all(x is None for x in dest_list):
+                if (
+                    not opt
+                    and convert
+                    and blk.is_object
+                    and not all(x is None for x in dest_list)
+                ):
                     # GH#44498 avoid unwanted cast-back
-                    result = extend_blocks(
-                        [
-                            b.convert(copy=True and not using_cow, using_cow=using_cow)
-                            for b in result
-                        ]
-                    )
+                    nbs = []
+                    for res_blk in result:
+                        converted = res_blk.convert(
+                            copy=True and not using_cow, using_cow=using_cow
+                        )
+                        if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
+                            warnings.warn(
+                                # GH#54710
+                                "Downcasting behavior in `replace` is deprecated "
+                                "and will be removed in a future version. To "
+                                "retain the old behavior, explicitly call "
+                                "`result.infer_objects(copy=False)`. "
+                                "To opt-in to the future "
+                                "behavior, set "
+                                "`pd.set_option('future.no_silent_downcasting', True)`",
+                                FutureWarning,
+                                stacklevel=find_stack_level(),
+                            )
+                        nbs.extend(converted)
+                    result = nbs
                 new_rb.extend(result)
             rb = new_rb
         return rb

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -289,7 +289,9 @@ def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
     def test_regex_replace_dict_nested_gh4115(self):
         df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
         expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
-        result = df.replace({"Type": {"Q": 0, "T": 1}})
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace({"Type": {"Q": 0, "T": 1}})
         tm.assert_frame_equal(result, expected)
 
     def test_regex_replace_list_to_scalar(self, mix_abc):
@@ -301,16 +303,20 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
                 "c": [np.nan, np.nan, np.nan, "d"],
             }
         )
-        res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
         res2 = df.copy()
         res3 = df.copy()
-        return_value = res2.replace(
-            [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
-        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            return_value = res2.replace(
+                [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
+            )
         assert return_value is None
-        return_value = res3.replace(
-            regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
-        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            return_value = res3.replace(
+                regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
+            )
         assert return_value is None
         tm.assert_frame_equal(res, expec)
         tm.assert_frame_equal(res2, expec)
@@ -520,7 +526,9 @@ def test_replace_convert(self):
         # gh 3907
         df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
         m = {"foo": 1, "bar": 2, "bah": 3}
-        rep = df.replace(m)
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            rep = df.replace(m)
         expec = Series([np.int64] * 3)
         res = rep.dtypes
         tm.assert_series_equal(expec, res)
@@ -838,7 +846,12 @@ def test_replace_for_new_dtypes(self, datetime_frame):
         ],
     )
     def test_replace_dtypes(self, frame, to_replace, value, expected):
-        result = frame.replace(to_replace, value)
+        warn = None
+        if isinstance(to_replace, datetime) and to_replace.year == 2920:
+            warn = FutureWarning
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(warn, match=msg):
+            result = frame.replace(to_replace, value)
         tm.assert_frame_equal(result, expected)
 
     def test_replace_input_formats_listlike(self):
@@ -927,7 +940,9 @@ def test_replace_dict_no_regex(self):
             "Strongly Disagree": 1,
         }
         expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
-        result = answer.replace(weights)
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
     def test_replace_series_no_regex(self):
@@ -950,7 +965,9 @@ def test_replace_series_no_regex(self):
             }
         )
         expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
-        result = answer.replace(weights)
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = answer.replace(weights)
         tm.assert_series_equal(result, expected)
 
     def test_replace_dict_tuple_list_ordering_remains_the_same(self):
@@ -1076,7 +1093,9 @@ def test_replace_period(self):
 
         expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
         assert expected.dtypes.iloc[0] == "Period[M]"
-        result = df.replace(d)
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 
     def test_replace_datetime(self):
@@ -1106,7 +1125,9 @@ def test_replace_datetime(self):
         )
         assert set(df.fname.values) == set(d["fname"].keys())
         expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
-        result = df.replace(d)
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace(d)
         tm.assert_frame_equal(result, expected)
 
     def test_replace_datetimetz(self):
@@ -1307,10 +1328,12 @@ def test_replace_commutative(self, df, to_replace, exp):
             np.float64(1),
         ],
     )
-    def test_replace_replacer_dtype(self, request, replacer):
+    def test_replace_replacer_dtype(self, replacer):
         # GH26632
         df = DataFrame(["a"])
-        result = df.replace({"a": replacer, "b": replacer})
+        msg = "Downcasting behavior in `replace` "
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.replace({"a": replacer, "b": replacer})
         expected = DataFrame([replacer])
         tm.assert_frame_equal(result, expected)
 
@@ -1564,12 +1587,15 @@ def test_replace_regex_dtype_frame(self, regex):
         # GH-48644
         df1 = DataFrame({"A": ["0"], "B": ["0"]})
         expected_df1 = DataFrame({"A": [1], "B": [1]})
-        result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
         tm.assert_frame_equal(result_df1, expected_df1)
 
         df2 = DataFrame({"A": ["0"], "B": ["1"]})
         expected_df2 = DataFrame({"A": [1], "B": ["1"]})
-        result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
         tm.assert_frame_equal(result_df2, expected_df2)
 
     def test_replace_with_value_also_being_replaced(self):

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -836,8 +836,6 @@ def test_replace_series(self, how, to_key, from_key, replacer):
             # tested below
             return
 
-        result = obj.replace(replacer)
-
         if (from_key == "float64" and to_key in ("int64")) or (
             from_key == "complex128" and to_key in ("int64", "float64")
         ):
@@ -851,6 +849,17 @@ def test_replace_series(self, how, to_key, from_key, replacer):
             exp = pd.Series(self.rep[to_key], index=index, name="yyy")
             assert exp.dtype == to_key
 
+        msg = "Downcasting behavior in `replace`"
+        warn = FutureWarning
+        if (
+            exp.dtype == obj.dtype
+            or exp.dtype == object
+            or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
+        ):
+            warn = None
+        with tm.assert_produces_warning(warn, match=msg):
+            result = obj.replace(replacer)
+
         tm.assert_series_equal(result, exp)
 
     @pytest.mark.parametrize(
@@ -866,11 +875,14 @@ def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         assert obj.dtype == from_key
 
-        result = obj.replace(replacer)
-
         exp = pd.Series(self.rep[to_key], index=index, name="yyy")
         assert exp.dtype == to_key
 
+        msg = "Downcasting behavior in `replace`"
+        warn = FutureWarning if exp.dtype != object else None
+        with tm.assert_produces_warning(warn, match=msg):
+            result = obj.replace(replacer)
+
         tm.assert_series_equal(result, exp)
 
     @pytest.mark.parametrize(
@@ -888,16 +900,22 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer)
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         assert obj.dtype == from_key
 
-        result = obj.replace(replacer)
-
         exp = pd.Series(self.rep[to_key], index=index, name="yyy")
+        warn = FutureWarning
         if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
             exp.dtype, pd.DatetimeTZDtype
         ):
             # with mismatched tzs, we retain the original dtype as of 2.0
             exp = exp.astype(obj.dtype)
+            warn = None
         else:
             assert exp.dtype == to_key
+            if to_key == from_key:
+                warn = None
+
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = obj.replace(replacer)
 
         tm.assert_series_equal(result, exp)
 

diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -1197,7 +1197,9 @@ def test_render_as_column_name(self, path):
     def test_true_and_false_value_options(self, path):
         # see gh-13347
         df = DataFrame([["foo", "bar"]], columns=["col1", "col2"])
-        expected = df.replace({"foo": True, "bar": False})
+        msg = "Downcasting behavior in `replace`"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df.replace({"foo": True, "bar": False})
 
         df.to_excel(path)
         read_frame = pd.read_excel(