diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 54e855f61905a..70b9b322f8b64 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -169,6 +169,7 @@ Deprecations - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) +- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`) .. --------------------------------------------------------------------------- .. _whatsnew_220.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c303e2a73bd7..b8db64d13923c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10395,7 +10395,14 @@ def _where( # make sure we are boolean fill_value = bool(inplace) - cond = cond.fillna(fill_value) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + cond = cond.fillna(fill_value) + cond = cond.infer_objects(copy=False) msg = "Boolean array expected for the condition, not {dtype}" diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6399f85723ae5..3728f695eab26 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -498,7 +498,11 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: @final def _maybe_downcast( - self, blocks: list[Block], downcast, using_cow: bool, caller: str + self, + blocks: list[Block], + downcast, + using_cow: bool, + caller: str, ) -> list[Block]: if downcast is False: return blocks @@ -510,9 +514,29 @@ def _maybe_downcast( # but ATM it breaks too much existing code. # split and convert the blocks + if caller == "fillna" and get_option("future.no_silent_downcasting"): + return blocks + nbs = extend_blocks( [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] ) + if caller == "fillna": + if len(nbs) != len(blocks) or not all( + x.dtype == y.dtype for x, y in zip(nbs, blocks) + ): + # GH#54261 + warnings.warn( + "Downcasting object dtype arrays on .fillna, .ffill, .bfill " + "is deprecated and will change in a future version. " + "Call result.infer_objects(copy=False) instead. " + "To opt-in to the future " + "behavior, set " + "`pd.set_option('future.no_silent_downcasting', True)`", + FutureWarning, + stacklevel=find_stack_level(), + ) + + return nbs elif downcast is None: return blocks @@ -1499,7 +1523,7 @@ def pad_or_backfill( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill") + return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna") @final def interpolate( diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 76b938755755a..a6ee8407988ec 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -9,6 +9,7 @@ TYPE_CHECKING, Any, ) +import warnings from pandas.errors import AbstractMethodError from pandas.util._decorators import doc @@ -202,7 +203,13 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]: df = df.reset_index() if self.na_rep is not None: - df = df.fillna(self.na_rep) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + df = df.fillna(self.na_rep) return df.to_dict(orient="index") diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 52ea072d1483f..ecab14a54beff 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1217,7 +1217,16 @@ def _try_convert_data( if not self.dtype: if all(notna(data)): return data, False - return data.fillna(np.nan), True + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + filled = data.fillna(np.nan) + + return filled, True elif self.dtype is True: pass diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 7f19e62f40774..c6d9e94ccc675 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2983,7 +2983,14 @@ def _prepare_data(self) -> np.rec.recarray: for i, col in enumerate(data): typ = typlist[i] if typ <= self._max_string_length: - data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,)) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + dc = data[col].fillna("") + data[col] = dc.apply(_pad_bytes, args=(typ,)) stype = f"S{typ}" dtypes[col] = stype data[col] = data[col].astype(stype) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index c62f73271577d..d88605db60720 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1538,7 +1538,13 @@ def _kind(self) -> Literal["area"]: def __init__(self, data, **kwargs) -> None: kwargs.setdefault("stacked", True) - data = data.fillna(value=0) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + data = data.fillna(value=0) LinePlot.__init__(self, data, **kwargs) if not self.stacked: diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index f5b0b6f4efa98..d27e9b8b9e983 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -13,6 +13,8 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ +import warnings + import numpy as np import pytest @@ -186,7 +188,14 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): if sdtype.kind in "iu": if op_name in ("__rtruediv__", "__truediv__", "__div__"): - expected = expected.fillna(np.nan).astype("Float64") + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Downcasting object dtype arrays", + category=FutureWarning, + ) + filled = expected.fillna(np.nan) + expected = filled.astype("Float64") else: # combine method result in 'biggest' (int64) dtype expected = expected.astype(sdtype) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 1eb67671da0b8..4576a86ad27cd 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -96,6 +96,7 @@ def test_where_upcasting(self): tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") def test_where_alignment(self, where_frame, float_string_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): @@ -170,6 +171,7 @@ def test_where_invalid(self): with pytest.raises(ValueError, match=msg): df.mask(0) + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") def test_where_set(self, where_frame, float_string_frame, mixed_int_frame): # where inplace diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 812150bb860e9..52b4b64ee279f 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -360,7 +360,9 @@ def test_fillna_dtype_conversion(self): expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5]) tm.assert_series_equal(result, expected) - result = df.fillna(1) + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) tm.assert_frame_equal(result, expected) @@ -817,7 +819,8 @@ def test_fillna_nones_inplace(): [[None, None], [None, None]], columns=["A", "B"], ) - with tm.assert_produces_warning(False): + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): df.fillna(value={"A": 1, "B": 2}, inplace=True) expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"]) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 878e94c15e16b..1488fa65fabc0 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1254,7 +1254,9 @@ def test_operators_none_as_na(self, op): # since filling converts dtypes from object, changed expected to be # object - filled = df.fillna(np.nan) + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): + filled = df.fillna(np.nan) result = op(df, 3) expected = op(filled, 3).astype(object) expected[pd.isna(expected)] = np.nan @@ -1265,10 +1267,14 @@ def test_operators_none_as_na(self, op): expected[pd.isna(expected)] = np.nan tm.assert_frame_equal(result, expected) - result = op(df, df.fillna(7)) + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = op(df, df.fillna(7)) tm.assert_frame_equal(result, expected) - result = op(df.fillna(7), df) + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = op(df.fillna(7), df) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)]) diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index 2cc3b67e7ac02..a15d7d7f93f01 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -151,6 +151,7 @@ def _check_unary_op(op): _check_unary_op(operator.inv) # TODO: belongs elsewhere + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") def test_logical_with_nas(self): d = DataFrame({"a": [np.nan, False], "b": [True, True]}) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index e7b6a0c0b39b0..74473bc54d51e 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1155,6 +1155,7 @@ def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame): def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na): getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False) + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize("opname", ["any", "all"]) def test_any_all_bool_frame(self, opname, bool_frame_with_na): # GH#12863: numpy gives back non-boolean data for object type diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index b54a795af4fdc..9b76ae093e8c4 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1184,6 +1184,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack): ) tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize( "index, columns", [ @@ -1194,6 +1195,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack): ) def test_stack_multi_columns_non_unique_index(self, index, columns, future_stack): # GH-28301 + df = DataFrame(index=index, columns=columns).fillna(1) stacked = df.stack(future_stack=future_stack) new_index = MultiIndex.from_tuples(stacked.index.to_numpy()) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 287310a18c7df..41bbfcf6840a9 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1534,6 +1534,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys): method(*args, **kwargs) +@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize("dtype", [bool, int, float, object]) def test_deprecate_numeric_only_series(dtype, groupby_func, request): # GH#46560 diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 016208f2d2026..f3075c116883a 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -152,7 +152,9 @@ def test_reindex_inference(): # inference of new dtype s = Series([True, False, False, True], index=list("abcd")) new_index = "agc" - result = s.reindex(list(new_index)).ffill() + msg = "Downcasting object dtype arrays on" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.reindex(list(new_index)).ffill() expected = Series([True, True, False], index=list(new_index)) tm.assert_series_equal(result, expected) @@ -160,7 +162,9 @@ def test_reindex_inference(): def test_reindex_downcasting(): # GH4618 shifted series downcasting s = Series(False, index=range(5)) - result = s.shift(1).bfill() + msg = "Downcasting object dtype arrays on" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.shift(1).bfill() expected = Series(False, index=range(5)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index be63d9500ce73..a39b3ff7e6f2b 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -203,6 +203,7 @@ def test_series_datetimelike_attribute_access_invalid(self): with pytest.raises(AttributeError, match=msg): ser.weekday + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize( "kernel, has_numeric_only", [ diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 44121cb5f784f..55fc77fb5705f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -639,10 +639,12 @@ def test_comparison_operators_with_nas(self, comparison_op): result = comparison_op(ser, val) expected = comparison_op(ser.dropna(), val).reindex(ser.index) - if comparison_op is operator.ne: - expected = expected.fillna(True).astype(bool) - else: - expected = expected.fillna(False).astype(bool) + msg = "Downcasting object dtype arrays" + with tm.assert_produces_warning(FutureWarning, match=msg): + if comparison_op is operator.ne: + expected = expected.fillna(True).astype(bool) + else: + expected = expected.fillna(False).astype(bool) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 26046ef9ba295..2146e154dc7fa 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -15,6 +15,7 @@ class TestSeriesLogicalOps: + @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor]) def test_bool_operators_with_nas(self, bool_op): # boolean &, |, ^ should work with object arrays and propagate NAs