pandas-dev · jbrockmendel · May 5, 2023 · May 5, 2023 · May 5, 2023 · May 6, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -261,6 +261,8 @@ Deprecations
 - Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
 - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
+- Deprecated downcasting behavior in :meth:`Series.interpolate`, :meth:`Series.fillna`, :meth:`DataFrame.interpolate`, :meth:`DataFrame.fillna`, with downcast="infer" and floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`)
+- Deprecated downcasting behavior in :meth:`Series.where` and :meth:`DataFrame.where` with floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -145,6 +145,10 @@ def pytest_collection_modifyitems(items, config) -> None:
             "(Series|DataFrame).bool is now deprecated and will be removed "
             "in future version of pandas",
         ),
+        (
+            "pandas.core.generic.NDFrame.clip",
+            "where downcasting from floating dtype to integer dtype is deprecated",
+        ),
     ]
 
     for item in items:

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -12,6 +12,7 @@
     cast,
     final,
 )
+import warnings
 
 import numpy as np
 
@@ -41,6 +42,7 @@
 )
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.astype import (
@@ -422,7 +424,11 @@ def coerce_to_target_dtype(self, other) -> Block:
 
     @final
     def _maybe_downcast(
-        self, blocks: list[Block], downcast=None, using_cow: bool = False
+        self,
+        blocks: list[Block],
+        downcast=None,
+        using_cow: bool = False,
+        caller: str = "fillna",
     ) -> list[Block]:
         if downcast is False:
             return blocks
@@ -441,17 +447,35 @@ def _maybe_downcast(
         if downcast is None:
             return blocks
 
-        return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
+        return extend_blocks(
+            [b._downcast_2d(downcast, using_cow, caller=caller) for b in blocks]
+        )
 
     @final
     @maybe_split
-    def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:
+    def _downcast_2d(
+        self, dtype, using_cow: bool = False, *, caller: str = "fillna"
+    ) -> list[Block]:
         """
         downcast specialized to 2D case post-validation.
 
         Refactored to allow use of maybe_split.
         """
         new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)
+        if (
+            dtype == "infer"
+            and self.values.dtype.kind == "f"
+            and new_values.dtype.kind in "iu"
+        ):
+            # GH#40988
+            warnings.warn(
+                f"{caller} downcasting from floating dtype to integer dtype is "
+                "deprecated. In a future version this will retain floating "
+                "dtype. To retain the old behavior, explicitly cast the result "
+                "to integer dtype",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         new_values = maybe_coerce_values(new_values)
         refs = self.refs if using_cow and new_values is self.values else None
         return [self.make_block(new_values, refs=refs)]
@@ -1194,7 +1218,7 @@ def where(
                 block = self.coerce_to_target_dtype(other)
                 blocks = block.where(orig_other, cond, using_cow=using_cow)
                 return self._maybe_downcast(
-                    blocks, downcast=_downcast, using_cow=using_cow
+                    blocks, downcast=_downcast, using_cow=using_cow, caller="where"
                 )
 
             else:
@@ -1388,7 +1412,7 @@ def interpolate(
         )
 
         nb = self.make_block_same_class(data, refs=refs)
-        return nb._maybe_downcast([nb], downcast, using_cow)
+        return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")
 
     def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:
         """return block for the diff of the values"""
@@ -1671,7 +1695,7 @@ def where(
                     blk = self.coerce_to_target_dtype(orig_other)
                     nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
                     return self._maybe_downcast(
-                        nbs, downcast=_downcast, using_cow=using_cow
+                        nbs, downcast=_downcast, using_cow=using_cow, caller="where"
                     )
 
                 elif isinstance(self, NDArrayBackedExtensionBlock):
@@ -1680,7 +1704,7 @@ def where(
                     blk = self.coerce_to_target_dtype(orig_other)
                     nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
                     return self._maybe_downcast(
-                        nbs, downcast=_downcast, using_cow=using_cow
+                        nbs, downcast=_downcast, using_cow=using_cow, caller="where"
                     )
 
                 else:

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -207,26 +207,29 @@ def __internal_pivot_table(
                 to_unstack.append(i)
             else:
                 to_unstack.append(name)
-        table = agged.unstack(to_unstack)
+        table = agged.unstack(to_unstack, fill_value=fill_value)
 
     if not dropna:
         if isinstance(table.index, MultiIndex):
             m = MultiIndex.from_arrays(
                 cartesian_product(table.index.levels), names=table.index.names
             )
-            table = table.reindex(m, axis=0)
+            table = table.reindex(m, axis=0, fill_value=fill_value)
 
         if isinstance(table.columns, MultiIndex):
             m = MultiIndex.from_arrays(
                 cartesian_product(table.columns.levels), names=table.columns.names
             )
-            table = table.reindex(m, axis=1)
+            table = table.reindex(m, axis=1, fill_value=fill_value)
 
     if sort is True and isinstance(table, ABCDataFrame):
         table = table.sort_index(axis=1)
 
     if fill_value is not None:
-        table = table.fillna(fill_value, downcast="infer")
+        table = table.fillna(fill_value)
+        table = table.infer_objects()
+        if aggfunc is len and not observed and lib.is_integer(fill_value):
+            table = table.astype(np.int64)
 
     if margins:
         if dropna:

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -348,7 +348,9 @@ def test_where_bug_transposition(self):
         expected = a.copy()
         expected[~do_not_replace] = b
 
-        result = a.where(do_not_replace, b)
+        msg = "where downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = a.where(do_not_replace, b)
         tm.assert_frame_equal(result, expected)
 
         a = DataFrame({0: [4, 6], 1: [1, 0]})
@@ -358,7 +360,9 @@ def test_where_bug_transposition(self):
         expected = a.copy()
         expected[~do_not_replace] = b
 
-        result = a.where(do_not_replace, b)
+        msg = "where downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = a.where(do_not_replace, b)
         tm.assert_frame_equal(result, expected)
 
     def test_where_datetime(self):

diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py
@@ -145,7 +145,9 @@ def test_clip_with_na_args(self, float_frame):
         # GH#19992 and adjusted in GH#40420
         df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
 
-        result = df.clip(lower=[4, 5, np.nan], axis=0)
+        msg = "where downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.clip(lower=[4, 5, np.nan], axis=0)
         expected = DataFrame(
             {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
         )
@@ -161,7 +163,9 @@ def test_clip_with_na_args(self, float_frame):
         data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
         df = DataFrame(data)
         t = Series([2, -4, np.NaN, 6, 3])
-        result = df.clip(lower=t, axis=0)
+        msg = "where downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.clip(lower=t, axis=0)
         expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -274,13 +274,17 @@ def test_fillna_downcast(self):
         # GH#15277
         # infer int64 from float64
         df = DataFrame({"a": [1.0, np.nan]})
-        result = df.fillna(0, downcast="infer")
+        msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.fillna(0, downcast="infer")
         expected = DataFrame({"a": [1, 0]})
         tm.assert_frame_equal(result, expected)
 
         # infer int64 from float64 when fillna value is a dict
         df = DataFrame({"a": [1.0, np.nan]})
-        result = df.fillna({"a": 0}, downcast="infer")
+        msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.fillna({"a": 0}, downcast="infer")
         expected = DataFrame({"a": [1, 0]})
         tm.assert_frame_equal(result, expected)
 
@@ -306,7 +310,9 @@ def test_fillna_downcast_noop(self, frame_or_series):
         tm.assert_equal(res, expected)
 
         obj2 = obj.astype(np.float64)
-        res2 = obj2.fillna("foo", downcast="infer")
+        msg2 = "fillna downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg2):
+            res2 = obj2.fillna("foo", downcast="infer")
         expected2 = obj  # get back int64
         tm.assert_equal(res2, expected2)
 

diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
@@ -147,7 +147,11 @@ def test_interp_combo(self):
         expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
         tm.assert_series_equal(result, expected)
 
-        result = df["A"].interpolate(downcast="infer")
+        msg = (
+            "interpolate downcasting from floating dtype to integer dtype is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df["A"].interpolate(downcast="infer")
         expected = Series([1, 2, 3, 4], name="A")
         tm.assert_series_equal(result, expected)
 
@@ -228,7 +232,11 @@ def test_interp_alt_scipy(self):
         expected.loc[5, "A"] = 6
         tm.assert_frame_equal(result, expected)
 
-        result = df.interpolate(method="barycentric", downcast="infer")
+        msg = (
+            "interpolate downcasting from floating dtype to integer dtype is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.interpolate(method="barycentric", downcast="infer")
         tm.assert_frame_equal(result, expected.astype(np.int64))
 
         result = df.interpolate(method="krogh")
@@ -352,7 +360,11 @@ def test_interp_inplace(self, using_copy_on_write):
             tm.assert_frame_equal(result, expected)
 
         result = df.copy()
-        return_value = result["a"].interpolate(inplace=True, downcast="infer")
+        msg = (
+            "interpolate downcasting from floating dtype to integer dtype is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            return_value = result["a"].interpolate(inplace=True, downcast="infer")
         assert return_value is None
         if using_copy_on_write:
             tm.assert_frame_equal(result, expected_cow)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -1235,7 +1235,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
 
     expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C")
     if operation == "agg":
-        expected = expected.fillna(0, downcast="infer")
+        expected = expected.fillna(0).astype(np.int64)
     grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
     result = getattr(grouped, operation)(sum)
     tm.assert_series_equal(result, expected)

diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py
@@ -69,8 +69,14 @@ def test_clip_with_na_args(self):
         tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
 
         # GH#19992
-        tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
-        tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
+        msg = "where downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = s.clip(lower=[0, 4, np.nan])
+        tm.assert_series_equal(res, Series([1, 4, 3]))
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = s.clip(upper=[1, np.nan, 1])
+        tm.assert_series_equal(res, Series([1, 2, 1]))
 
         # GH#40420
         s = Series([1, 2, 3])

diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
@@ -172,13 +172,17 @@ def test_fillna_downcast(self):
         # GH#15277
         # infer int64 from float64
         ser = Series([1.0, np.nan])
-        result = ser.fillna(0, downcast="infer")
+        msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = ser.fillna(0, downcast="infer")
         expected = Series([1, 0])
         tm.assert_series_equal(result, expected)
 
         # infer int64 from float64 when fillna value is a dict
         ser = Series([1.0, np.nan])
-        result = ser.fillna({1: 0}, downcast="infer")
+        msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = ser.fillna({1: 0}, downcast="infer")
         expected = Series([1, 0])
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py
@@ -297,14 +297,19 @@ def test_interp_scipy_basic(self):
         result = s.interpolate(method="nearest")
         tm.assert_series_equal(result, expected.astype("float"))
 
-        result = s.interpolate(method="nearest", downcast="infer")
+        msg = (
+            "interpolate downcasting from floating dtype to integer dtype is deprecated"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = s.interpolate(method="nearest", downcast="infer")
         tm.assert_series_equal(result, expected)
         # zero
         expected = Series([1, 3, 3, 12, 12, 25])
         result = s.interpolate(method="zero")
         tm.assert_series_equal(result, expected.astype("float"))
 
-        result = s.interpolate(method="zero", downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = s.interpolate(method="zero", downcast="infer")
         tm.assert_series_equal(result, expected)
         # quadratic
         # GH #15662.

diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
@@ -140,7 +140,9 @@ def test_reindex_pad():
     result = s.reindex(new_index).ffill()
     tm.assert_series_equal(result, expected.astype("float64"))
 
-    result = s.reindex(new_index).ffill(downcast="infer")
+    msg = "interpolate downcasting from floating dtype to integer dtype is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = s.reindex(new_index).ffill(downcast="infer")
     tm.assert_series_equal(result, expected)
 
     expected = Series([1, 5, 3, 5], index=new_index)