diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2392928e17df3..c43059a5844ad 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -296,6 +296,8 @@ Deprecations - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated downcasting behavior in :meth:`Series.clip`, :meth:`DataFrame.clip`, :meth:`Series.where` and :meth:`DataFrame.where` with floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`) +- Deprecated downcasting behavior in :meth:`Series.interpolate`, :meth:`Series.fillna`, :meth:`DataFrame.interpolate`, :meth:`DataFrame.fillna`, with downcast="infer" and floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) diff --git a/pandas/conftest.py b/pandas/conftest.py index b2f1377a9fb32..7df91dab2679f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -129,6 +129,7 @@ def pytest_collection_modifyitems(items, config) -> None: # Warnings from doctests that can be ignored; place reason in comment above. # Each entry specifies (path, message) - see the ignore_doctest_warning function ignored_doctest_warnings = [ + ("NDFrame.bfill", "interpolate downcasting"), ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), ("is_period_dtype", "is_period_dtype is deprecated"), @@ -148,6 +149,14 @@ def pytest_collection_modifyitems(items, config) -> None: "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", ), + ( + "pandas.core.generic.NDFrame.clip", + # when run manually the warning produced from the example in the + # docstring says "clip downcasting" as expected. But when the doctest + # is run via pytest it says "where downcasting". + # No idea why 2023-05-09 + "where downcasting from floating dtype to integer dtype is deprecated", + ), ( "pandas.core.generic.NDFrame.first", "first is deprecated and will be removed in a future version. " diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4b4466e8d609..fee7c848f4815 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -97,7 +97,10 @@ SettingWithCopyWarning, ) from pandas.util._decorators import doc -from pandas.util._exceptions import find_stack_level +from pandas.util._exceptions import ( + find_stack_level, + rewrite_warning, +) from pandas.util._validators import ( check_dtype_backend, validate_ascending, @@ -8554,23 +8557,34 @@ def clip( ): lower, upper = min(lower, upper), max(lower, upper) - # fast-path for scalars - if (lower is None or is_number(lower)) and (upper is None or is_number(upper)): - return self._clip_with_scalar(lower, upper, inplace=inplace) + # We can get a message about "where" downcasting being deprecated, + # catch and re-issue the warning about "clip"" + msg = ( + "clip downcasting from floating dtype to integer dtype is " + "deprecated. In a future version this will retain floating " + "dtype. To retain the old behavior, explicitly cast the result " + "to integer dtype" + ) + with rewrite_warning("where downcasting", FutureWarning, msg): + # fast-path for scalars + if (lower is None or is_number(lower)) and ( + upper is None or is_number(upper) + ): + return self._clip_with_scalar(lower, upper, inplace=inplace) - result = self - if lower is not None: - result = result._clip_with_one_bound( - lower, method=self.ge, axis=axis, inplace=inplace - ) - if upper is not None: - if inplace: - result = self - result = result._clip_with_one_bound( - upper, method=self.le, axis=axis, inplace=inplace - ) + result = self + if lower is not None: + result = result._clip_with_one_bound( + lower, method=self.ge, axis=axis, inplace=inplace + ) + if upper is not None: + if inplace: + result = self + result = result._clip_with_one_bound( + upper, method=self.le, axis=axis, inplace=inplace + ) - return result + return result @final @doc(klass=_shared_doc_kwargs["klass"]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 80069f1fcacbd..ab55a7714b35e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -12,6 +12,7 @@ cast, final, ) +import warnings import numpy as np @@ -42,6 +43,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import ( @@ -428,7 +430,11 @@ def coerce_to_target_dtype(self, other) -> Block: @final def _maybe_downcast( - self, blocks: list[Block], downcast=None, using_cow: bool = False + self, + blocks: list[Block], + downcast=None, + using_cow: bool = False, + caller: str = "fillna", ) -> list[Block]: if downcast is False: return blocks @@ -447,17 +453,35 @@ def _maybe_downcast( if downcast is None: return blocks - return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) + return extend_blocks( + [b._downcast_2d(downcast, using_cow, caller=caller) for b in blocks] + ) @final @maybe_split - def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]: + def _downcast_2d( + self, dtype, using_cow: bool = False, *, caller: str = "fillna" + ) -> list[Block]: """ downcast specialized to 2D case post-validation. Refactored to allow use of maybe_split. """ new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) + if ( + dtype == "infer" + and self.values.dtype.kind == "f" + and new_values.dtype.kind in "iu" + ): + # GH#40988 + warnings.warn( + f"{caller} downcasting from floating dtype to integer dtype is " + "deprecated. In a future version this will retain floating " + "dtype. To retain the old behavior, explicitly cast the result " + "to integer dtype", + FutureWarning, + stacklevel=find_stack_level(), + ) new_values = maybe_coerce_values(new_values) refs = self.refs if using_cow and new_values is self.values else None return [self.make_block(new_values, refs=refs)] @@ -1223,7 +1247,7 @@ def where( block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond, using_cow=using_cow) return self._maybe_downcast( - blocks, downcast=_downcast, using_cow=using_cow + blocks, downcast=_downcast, using_cow=using_cow, caller="where" ) else: @@ -1448,7 +1472,7 @@ def interpolate( data = new_values._ndarray nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow) + return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") @final def diff(self, n: int) -> list[Block]: @@ -1741,7 +1765,7 @@ def where( blk = self.coerce_to_target_dtype(orig_other) nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow + nbs, downcast=_downcast, using_cow=using_cow, caller="where" ) elif isinstance(self, NDArrayBackedExtensionBlock): @@ -1750,7 +1774,7 @@ def where( blk = self.coerce_to_target_dtype(orig_other) nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow + nbs, downcast=_downcast, using_cow=using_cow, caller="where" ) else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 07985a2744069..5f9c04299ebd1 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -207,26 +207,29 @@ def __internal_pivot_table( to_unstack.append(i) else: to_unstack.append(name) - table = agged.unstack(to_unstack) + table = agged.unstack(to_unstack, fill_value=fill_value) if not dropna: if isinstance(table.index, MultiIndex): m = MultiIndex.from_arrays( cartesian_product(table.index.levels), names=table.index.names ) - table = table.reindex(m, axis=0) + table = table.reindex(m, axis=0, fill_value=fill_value) if isinstance(table.columns, MultiIndex): m = MultiIndex.from_arrays( cartesian_product(table.columns.levels), names=table.columns.names ) - table = table.reindex(m, axis=1) + table = table.reindex(m, axis=1, fill_value=fill_value) if sort is True and isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) if fill_value is not None: - table = table.fillna(fill_value, downcast="infer") + table = table.fillna(fill_value) + table = table.infer_objects() + if aggfunc is len and not observed and lib.is_integer(fill_value): + table = table.astype(np.int64) if margins: if dropna: diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 562f2fbe55c25..52053cee01d91 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -348,7 +348,9 @@ def test_where_bug_transposition(self): expected = a.copy() expected[~do_not_replace] = b - result = a.where(do_not_replace, b) + msg = "where downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) a = DataFrame({0: [4, 6], 1: [1, 0]}) @@ -358,7 +360,9 @@ def test_where_bug_transposition(self): expected = a.copy() expected[~do_not_replace] = b - result = a.where(do_not_replace, b) + msg = "where downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) def test_where_datetime(self): diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index da13711d607c5..5336ecdc8f0e3 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -145,7 +145,9 @@ def test_clip_with_na_args(self, float_frame): # GH#19992 and adjusted in GH#40420 df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) - result = df.clip(lower=[4, 5, np.nan], axis=0) + msg = "clip downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(lower=[4, 5, np.nan], axis=0) expected = DataFrame( {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]} ) @@ -161,7 +163,8 @@ def test_clip_with_na_args(self, float_frame): data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} df = DataFrame(data) t = Series([2, -4, np.NaN, 6, 3]) - result = df.clip(lower=t, axis=0) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(lower=t, axis=0) expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index d340e048994a9..e88dbf874e4be 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -292,13 +292,17 @@ def test_fillna_downcast(self): # GH#15277 # infer int64 from float64 df = DataFrame({"a": [1.0, np.nan]}) - result = df.fillna(0, downcast="infer") + msg = "fillna downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.fillna(0, downcast="infer") expected = DataFrame({"a": [1, 0]}) tm.assert_frame_equal(result, expected) # infer int64 from float64 when fillna value is a dict df = DataFrame({"a": [1.0, np.nan]}) - result = df.fillna({"a": 0}, downcast="infer") + msg = "fillna downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.fillna({"a": 0}, downcast="infer") expected = DataFrame({"a": [1, 0]}) tm.assert_frame_equal(result, expected) @@ -324,7 +328,9 @@ def test_fillna_downcast_noop(self, frame_or_series): tm.assert_equal(res, expected) obj2 = obj.astype(np.float64) - res2 = obj2.fillna("foo", downcast="infer") + msg2 = "fillna downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + res2 = obj2.fillna("foo", downcast="infer") expected2 = obj # get back int64 tm.assert_equal(res2, expected2) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 429f3678c34f9..59d9482aa3ece 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -165,7 +165,11 @@ def test_interp_combo(self): expected = Series([1.0, 2.0, 3.0, 4.0], name="A") tm.assert_series_equal(result, expected) - result = df["A"].interpolate(downcast="infer") + msg = ( + "interpolate downcasting from floating dtype to integer dtype is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df["A"].interpolate(downcast="infer") expected = Series([1, 2, 3, 4], name="A") tm.assert_series_equal(result, expected) @@ -246,7 +250,11 @@ def test_interp_alt_scipy(self): expected.loc[5, "A"] = 6 tm.assert_frame_equal(result, expected) - result = df.interpolate(method="barycentric", downcast="infer") + msg = ( + "interpolate downcasting from floating dtype to integer dtype is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.interpolate(method="barycentric", downcast="infer") tm.assert_frame_equal(result, expected.astype(np.int64)) result = df.interpolate(method="krogh") @@ -370,7 +378,11 @@ def test_interp_inplace(self, using_copy_on_write): tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result["a"].interpolate(inplace=True, downcast="infer") + msg = ( + "interpolate downcasting from floating dtype to integer dtype is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + return_value = result["a"].interpolate(inplace=True, downcast="infer") assert return_value is None if using_copy_on_write: tm.assert_frame_equal(result, expected_cow) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index c0704d9684574..a01c1da5eceb2 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1235,7 +1235,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C") if operation == "agg": - expected = expected.fillna(0, downcast="infer") + expected = expected.fillna(0).astype(np.int64) grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] result = getattr(grouped, operation)(sum) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index c88a42697dbdf..1078b30f01920 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -69,8 +69,14 @@ def test_clip_with_na_args(self): tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) # GH#19992 - tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3])) - tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1])) + msg = "clip downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.clip(lower=[0, 4, np.nan]) + tm.assert_series_equal(res, Series([1, 4, 3])) + + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.clip(upper=[1, np.nan, 1]) + tm.assert_series_equal(res, Series([1, 2, 1])) # GH#40420 s = Series([1, 2, 3]) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 7665e4d015a45..834cec7d4b832 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -175,13 +175,17 @@ def test_fillna_downcast(self): # GH#15277 # infer int64 from float64 ser = Series([1.0, np.nan]) - result = ser.fillna(0, downcast="infer") + msg = "fillna downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.fillna(0, downcast="infer") expected = Series([1, 0]) tm.assert_series_equal(result, expected) # infer int64 from float64 when fillna value is a dict ser = Series([1.0, np.nan]) - result = ser.fillna({1: 0}, downcast="infer") + msg = "fillna downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.fillna({1: 0}, downcast="infer") expected = Series([1, 0]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index abd4cec5ad45c..d6bfaad805b12 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -297,14 +297,19 @@ def test_interp_scipy_basic(self): result = s.interpolate(method="nearest") tm.assert_series_equal(result, expected.astype("float")) - result = s.interpolate(method="nearest", downcast="infer") + msg = ( + "interpolate downcasting from floating dtype to integer dtype is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.interpolate(method="nearest", downcast="infer") tm.assert_series_equal(result, expected) # zero expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method="zero") tm.assert_series_equal(result, expected.astype("float")) - result = s.interpolate(method="zero", downcast="infer") + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.interpolate(method="zero", downcast="infer") tm.assert_series_equal(result, expected) # quadratic # GH #15662. diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index c801528e6ff97..7b41d4aa9f580 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -143,7 +143,9 @@ def test_reindex_pad2(): result = s.reindex(new_index).ffill() tm.assert_series_equal(result, expected.astype("float64")) - result = s.reindex(new_index).ffill(downcast="infer") + msg = "interpolate downcasting from floating dtype to integer dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.reindex(new_index).ffill(downcast="infer") tm.assert_series_equal(result, expected) expected = Series([1, 5, 3, 5], index=new_index)