From 7d350c9e5dbffb49ba470571ed2ea7d91734aee3 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 20:32:13 -0800 Subject: [PATCH 1/3] DEPR: special-cased downcasting in DataFrame.where GH#44597 --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/internals/blocks.py | 9 +++++++++ pandas/tests/frame/indexing/test_where.py | 24 +++++++++++++++++++++-- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2592be9c4a350..214fe7dd18a3a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Other Deprecations - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) +- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index abbebcefc7a87..bb625b1b18956 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1225,6 +1225,15 @@ def where(self, other, cond) -> list[Block]: if m.any(): taken = result.take(m.nonzero()[0], axis=axis) r = maybe_downcast_numeric(taken, self.dtype) + if r.dtype != taken.dtype: + warnings.warn( + "Downcasting integer-dtype results in .where is " + "deprecated and will change in a future version. " + "To retain the old behavior, explicitly cast the results " + "to the desired dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) nb = self.make_block(r.T, placement=self._mgr_locs[m]) result_blocks.append(nb) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index e668e77644082..d0d7568da49bc 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -98,7 +98,7 @@ def test_where_upcasting(self): tm.assert_series_equal(result, expected) - def test_where_alignment(self, where_frame, float_string_frame): + def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): rs = df.where(cond, other) @@ -141,7 +141,11 @@ def _check_align(df, cond, other, check_dtypes=True): # check other is ndarray cond = df > 0 - _check_align(df, cond, (_safe_add(df).values)) + warn = None + if df is mixed_int_frame: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + _check_align(df, cond, (_safe_add(df).values)) # integers are upcast, so don't check the dtypes cond = df > 0 @@ -718,6 +722,22 @@ def test_where_try_cast_deprecated(frame_or_series): obj.where(mask, -1, try_cast=False) +def test_where_int_downcasting_deprecated(): + # GH#44597 + arr = np.arange(6).astype(np.int16).reshape(3, 2) + df = DataFrame(arr) + + mask = np.zeros(arr.shape, dtype=bool) + mask[:, 0] = True + + msg = "Downcasting integer-dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.where(mask, 2 ** 17) + + expected = DataFrame({0: arr[:, 0], 1: np.array([2 ** 17] * 3, dtype=np.int32)}) + tm.assert_frame_equal(res, expected) + + def test_where_copies_with_noop(frame_or_series): # GH-39595 result = frame_or_series([1, 2, 3, 4]) From 110e8f4e663250a50e6aaa8ccef2e0025585b9c7 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Dec 2021 09:33:47 -0800 Subject: [PATCH 2/3] ArrayManager compat --- pandas/tests/frame/indexing/test_where.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index c89421f996461..baa4cc83e61c7 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -721,7 +721,7 @@ def test_where_try_cast_deprecated(frame_or_series): obj.where(mask, -1, try_cast=False) -def test_where_int_downcasting_deprecated(): +def test_where_int_downcasting_deprecated(using_array_manager): # GH#44597 arr = np.arange(6).astype(np.int16).reshape(3, 2) df = DataFrame(arr) @@ -730,7 +730,8 @@ def test_where_int_downcasting_deprecated(): mask[:, 0] = True msg = "Downcasting integer-dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): + warn = FutureWarning if not using_array_manager else None + with tm.assert_produces_warning(warn, match=msg): res = df.where(mask, 2 ** 17) expected = DataFrame({0: arr[:, 0], 1: np.array([2 ** 17] * 3, dtype=np.int32)}) From b20f6393b02f427135a7f823b7155146c1fbd4b1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Dec 2021 15:48:41 -0800 Subject: [PATCH 3/3] ArrayManager compat --- pandas/tests/frame/indexing/test_where.py | 6 ++++-- pandas/tests/frame/methods/test_clip.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index baa4cc83e61c7..3d55ff5f98407 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -465,7 +465,7 @@ def test_where_complex(self): df[df.abs() >= 5] = np.nan tm.assert_frame_equal(df, expected) - def test_where_axis(self): + def test_where_axis(self, using_array_manager): # GH 9736 df = DataFrame(np.random.randn(2, 2)) mask = DataFrame([[False, False], [False, False]]) @@ -503,8 +503,10 @@ def test_where_axis(self): assert return_value is None tm.assert_frame_equal(result, expected) + warn = FutureWarning if using_array_manager else None expected = DataFrame([[0, np.nan], [0, np.nan]]) - result = df.where(mask, s, axis="columns") + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.where(mask, s, axis="columns") tm.assert_frame_equal(result, expected) expected = DataFrame( diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index c851e65a7ad4f..e692948c92a26 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -136,7 +136,7 @@ def test_clip_against_unordered_columns(self): tm.assert_frame_equal(result_lower, expected_lower) tm.assert_frame_equal(result_lower_upper, expected_lower_upper) - def test_clip_with_na_args(self, float_frame): + def test_clip_with_na_args(self, float_frame, using_array_manager): """Should process np.nan argument as None""" # GH#17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) @@ -151,7 +151,9 @@ def test_clip_with_na_args(self, float_frame): ) tm.assert_frame_equal(result, expected) - result = df.clip(lower=[4, 5, np.nan], axis=1) + warn = FutureWarning if using_array_manager else None + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.clip(lower=[4, 5, np.nan], axis=1) expected = DataFrame( {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} )