From 1bef5a30b331b0be463ecbd45b9e371a72f0d6e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 13 Jun 2023 15:40:35 -0700 Subject: [PATCH 1/6] DEPR: downcasting in NDFrame.where, mask, clip --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/internals/blocks.py | 52 +++++++++++++++++----- pandas/tests/frame/indexing/test_where.py | 30 +++++++++---- pandas/tests/frame/methods/test_clip.py | 9 +++- pandas/tests/series/indexing/test_where.py | 13 ++++-- pandas/tests/series/methods/test_clip.py | 11 ++++- 6 files changed, 89 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dd67d1f158c47..2ef02a6426045 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -287,7 +287,7 @@ Deprecations - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) -- +- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 788090bbefe27..81724bbee7ccf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -12,6 +12,7 @@ cast, final, ) +import warnings import numpy as np @@ -42,6 +43,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import ( @@ -428,7 +430,7 @@ def coerce_to_target_dtype(self, other) -> Block: @final def _maybe_downcast( - self, blocks: list[Block], downcast=None, using_cow: bool = False + self, blocks: list[Block], downcast, using_cow: bool, caller: str ) -> list[Block]: if downcast is False: return blocks @@ -440,14 +442,38 @@ def _maybe_downcast( # but ATM it breaks too much existing code. # split and convert the blocks - return extend_blocks( + nbs = extend_blocks( [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] ) - if downcast is None: + elif downcast is None: return blocks + else: + nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) + + # When _maybe_downcast is called with caller="where", it is either + # a) with downcast=False, which is a no-op (the desired future behavior) + # b) with downcast="infer", which is _not_ passed by the user. + # In the latter case the future behavior is to stop doing inference, + # so we issue a warning if and only if some inference occurred. + if caller == "where": + if len(blocks) != len(nbs) or any( + left.dtype != right.dtype for left, right in zip(blocks, nbs) + ): + # In this case _maybe_downcast was _not_ a no-op, so the behavior + # will change, so we issue a warning. + warnings.warn( + "Downcasting behavior in Series and DataFrame methods 'where', " + "'mask', and 'clip' is deprecated. In a future " + "version this will not infer object dtypes or cast all-round " + "floats to integers. For the old behavior, call " + "result.infer_objects(copy=False) for object inference, " + "or cast round floats explicitly.", + FutureWarning, + stacklevel=find_stack_level(), + ) - return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) + return nbs @final @maybe_split @@ -1223,7 +1249,7 @@ def where( block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond, using_cow=using_cow) return self._maybe_downcast( - blocks, downcast=_downcast, using_cow=using_cow + blocks, downcast=_downcast, using_cow=using_cow, caller="where" ) else: @@ -1319,7 +1345,9 @@ def fillna( else: # GH#45423 consistent downcasting on no-ops. nb = self.copy(deep=not using_cow) - nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow) + nbs = nb._maybe_downcast( + [nb], downcast=downcast, using_cow=using_cow, caller="fillna" + ) return nbs if limit is not None: @@ -1337,7 +1365,9 @@ def fillna( # different behavior in _maybe_downcast. return extend_blocks( [ - blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow) + blk._maybe_downcast( + [blk], downcast=downcast, using_cow=using_cow, caller="fillna" + ) for blk in nbs ] ) @@ -1417,7 +1447,7 @@ def interpolate( data = new_values._ndarray nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow) + return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") def diff(self, n: int, axis: AxisInt = 1) -> list[Block]: """return block for the diff of the values""" @@ -1700,7 +1730,7 @@ def where( blk = self.coerce_to_target_dtype(orig_other) nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow + nbs, downcast=_downcast, using_cow=using_cow, caller="where" ) elif isinstance(self, NDArrayBackedExtensionBlock): @@ -1709,7 +1739,7 @@ def where( blk = self.coerce_to_target_dtype(orig_other) nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow + nbs, downcast=_downcast, using_cow=using_cow, caller="where" ) else: @@ -1894,7 +1924,7 @@ def fillna( refs = None new_values = self.values.fillna(value=value, method=None, limit=limit) nb = self.make_block_same_class(new_values, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow=using_cow) + return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") @cache_readonly def shape(self) -> Shape: diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 562f2fbe55c25..bde1e6efa442a 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -169,7 +169,7 @@ def test_where_set(self, where_frame, float_string_frame): def _check_set(df, cond, check_dtypes=True): dfi = df.copy() - econd = cond.reindex_like(df).fillna(True) + econd = cond.reindex_like(df).fillna(True).infer_objects(copy=False) expected = dfi.mask(~econd) return_value = dfi.where(cond, np.nan, inplace=True) @@ -348,7 +348,9 @@ def test_where_bug_transposition(self): expected = a.copy() expected[~do_not_replace] = b - result = a.where(do_not_replace, b) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) a = DataFrame({0: [4, 6], 1: [1, 0]}) @@ -358,7 +360,8 @@ def test_where_bug_transposition(self): expected = a.copy() expected[~do_not_replace] = b - result = a.where(do_not_replace, b) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = a.where(do_not_replace, b) tm.assert_frame_equal(result, expected) def test_where_datetime(self): @@ -703,7 +706,9 @@ def test_where_ea_other(self): ser2 = Series(arr[:2], index=["A", "B"]) expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]}) expected["B"] = expected["B"].astype(object) - result = df.where(mask, ser2, axis=1) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.where(mask, ser2, axis=1) tm.assert_frame_equal(result, expected) def test_where_interval_noop(self): @@ -720,7 +725,10 @@ def test_where_interval_fullop_downcast(self, frame_or_series): # GH#45768 obj = frame_or_series([pd.Interval(0, 0)] * 2) other = frame_or_series([1.0, 2.0]) - res = obj.where(~obj.notna(), other) + + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = obj.where(~obj.notna(), other) # since all entries are being changed, we will downcast result # from object to ints (not floats) @@ -762,7 +770,9 @@ def test_where_datetimelike_noop(self, dtype): # opposite case where we are replacing *all* values -> we downcast # from object dtype # GH#45768 - res5 = df.where(mask2, 4) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + res5 = df.where(mask2, 4) expected = DataFrame(4, index=df.index, columns=df.columns) tm.assert_frame_equal(res5, expected) @@ -960,7 +970,9 @@ def test_where_downcast_to_td64(): td = pd.Timedelta(days=1) - res = ser.where(mask, td) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser.where(mask, td) expected = Series([td, td, td], dtype="m8[ns]") tm.assert_series_equal(res, expected) @@ -998,7 +1010,9 @@ def test_where_dt64_2d(): # setting all of one column, none of the other expected = DataFrame({"A": other[:, 0], "B": dta[:, 1]}) - _check_where_equivalences(df, mask, other, expected) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + _check_where_equivalences(df, mask, other, expected) # setting part of one column, none of the other mask[1, 0] = True diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index da13711d607c5..237c912419b87 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -145,7 +145,11 @@ def test_clip_with_na_args(self, float_frame): # GH#19992 and adjusted in GH#40420 df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) - result = df.clip(lower=[4, 5, np.nan], axis=0) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + # TODO: avoid this warning here? seems like we should never be upcasting + # in the first place? + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(lower=[4, 5, np.nan], axis=0) expected = DataFrame( {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]} ) @@ -161,7 +165,8 @@ def test_clip_with_na_args(self, float_frame): data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} df = DataFrame(data) t = Series([2, -4, np.NaN, 6, 3]) - result = df.clip(lower=t, axis=0) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.clip(lower=t, axis=0) expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 0c8cb493141b7..b97cf5a102fd7 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -386,16 +386,21 @@ def test_where_datetimelike_coerce(dtype): expected = Series([10, 10]) mask = np.array([False, False]) - rs = ser.where(mask, [10, 10]) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.where(mask, [10, 10]) tm.assert_series_equal(rs, expected) - rs = ser.where(mask, 10) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.where(mask, 10) tm.assert_series_equal(rs, expected) - rs = ser.where(mask, 10.0) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.where(mask, 10.0) tm.assert_series_equal(rs, expected) - rs = ser.where(mask, [10.0, 10.0]) + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = ser.where(mask, [10.0, 10.0]) tm.assert_series_equal(rs, expected) rs = ser.where(mask, [10.0, np.nan]) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index c88a42697dbdf..0a6ed8a421aa1 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -69,8 +69,15 @@ def test_clip_with_na_args(self): tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) # GH#19992 - tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3])) - tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1])) + msg = "Downcasting behavior in Series and DataFrame methods 'where'" + # TODO: avoid this warning here? seems like we should never be upcasting + # in the first place? + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.clip(lower=[0, 4, np.nan]) + tm.assert_series_equal(res, Series([1, 4, 3])) + with tm.assert_produces_warning(FutureWarning, match=msg): + res = s.clip(upper=[1, np.nan, 1]) + tm.assert_series_equal(res, Series([1, 2, 1])) # GH#40420 s = Series([1, 2, 3]) From f209ce3baab625c963187c37f854f32750569dbe Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 13 Jun 2023 15:43:10 -0700 Subject: [PATCH 2/6] GH ref --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/internals/blocks.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2ef02a6426045..c2227beff31a8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -283,11 +283,11 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints (:issue:`53656`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) -- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 81724bbee7ccf..abef7332cb56f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -457,6 +457,7 @@ def _maybe_downcast( # In the latter case the future behavior is to stop doing inference, # so we issue a warning if and only if some inference occurred. if caller == "where": + # GH#53656 if len(blocks) != len(nbs) or any( left.dtype != right.dtype for left, right in zip(blocks, nbs) ): From 79178375e9ceaf366e2a785c02c4d84fcfecc4a0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 14 Jun 2023 08:30:55 -0700 Subject: [PATCH 3/6] suppress warning in doctet --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index fbef2fb272ed6..00663c4803429 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -134,6 +134,7 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ("is_sparse", "is_sparse is deprecated"), + ("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( From fc2b428f517dc60fefae9966d07b1818807072c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Jul 2023 10:18:40 -0700 Subject: [PATCH 4/6] add caller --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cb26e0ed4a8a3..92e28b1b14bc8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1439,7 +1439,7 @@ def pad_or_backfill( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow) + return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill") @final def interpolate( From a968323c3c7811b94fbe2f74ba5fc6b915a48b5d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Aug 2023 14:41:01 -0700 Subject: [PATCH 5/6] implement future.no_silent_downcasting --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/config_init.py | 11 +++++++++++ pandas/core/internals/blocks.py | 13 ++++++++++--- pandas/tests/frame/indexing/test_where.py | 6 ++++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1ae797a4b5460..2911511ea872e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -568,7 +568,7 @@ Other Deprecations - Deprecated behavior of :func:`.testing.assert_series_equal` and :func:`.testing.assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object (:issue:`53767`) - Deprecated constructing :class:`.SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints (:issue:`53656`) +- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)``. (:issue:`53656`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead (:issue:`53409`) - Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead (:issue:`53767`) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d425a378b8d5b..27b9c2b5ce9c1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -901,3 +901,14 @@ def register_converter_cb(key) -> None: "(at which point this option will be deprecated).", validator=is_one_of_factory([True, False]), ) + + cf.register_option( + "no_silent_downcasting", + False, + "Whether to opt-in to the future behavior which will *not* silently " + "downcast results from Series and DataFrame `where`, `mask`, and `clip` " + "methods. " + "Silent downcasting will be removed in pandas 3.0 " + "(at which point this option will be deprecated).", + validator=is_one_of_factory([True, False]), + ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 090fd6d051435..d152f4e885785 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -15,7 +15,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + get_option, + using_copy_on_write, +) from pandas._libs import ( internals as libinternals, @@ -490,6 +493,8 @@ def _maybe_downcast( elif downcast is None: return blocks + elif caller == "where" and get_option("future.no_silent_downcasting") is True: + return blocks else: nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) @@ -509,9 +514,11 @@ def _maybe_downcast( "Downcasting behavior in Series and DataFrame methods 'where', " "'mask', and 'clip' is deprecated. In a future " "version this will not infer object dtypes or cast all-round " - "floats to integers. For the old behavior, call " + "floats to integers. Instead call " "result.infer_objects(copy=False) for object inference, " - "or cast round floats explicitly.", + "or cast round floats explicitly. To opt-in to the future " + "behavior, set " + "`pd.set_option('future.no_silent_downcasting', True)`", FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 2da3942af8797..1eb67671da0b8 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -1000,6 +1000,12 @@ def test_where_downcast_to_td64(): expected = Series([td, td, td], dtype="m8[ns]") tm.assert_series_equal(res, expected) + with pd.option_context("future.no_silent_downcasting", True): + with tm.assert_produces_warning(None, match=msg): + res2 = ser.where(mask, td) + expected2 = expected.astype(object) + tm.assert_series_equal(res2, expected2) + def _check_where_equivalences(df, mask, other, expected): # similar to tests.series.indexing.test_setitem.SetitemCastingEquivalences From 9e8164dba8a91f0421e9e4cd24e80b5104d79080 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Aug 2023 08:30:47 -0700 Subject: [PATCH 6/6] move whatsnew to 2.2 --- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 52dff5e9b56a9..040ca048d1224 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -574,7 +574,6 @@ Other Deprecations - Deprecated behavior of :func:`.testing.assert_series_equal` and :func:`.testing.assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object (:issue:`53767`) - Deprecated constructing :class:`.SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)``. (:issue:`53656`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead (:issue:`53409`) - Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead (:issue:`53767`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 621c9159a5fe8..6f621debc3c76 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -145,6 +145,7 @@ Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`) +- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`) - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`) - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)