Skip to content

Commit 5a22750

Browse files
authored
DEPR: special-cased downcasting in DataFrame.where GH#44597 (#45009)
1 parent d2abc92 commit 5a22750

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ Other Deprecations
546546
- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
547547
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
548548
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
549+
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
549550
-
550551

551552
.. ---------------------------------------------------------------------------

pandas/core/internals/blocks.py

+9
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,15 @@ def where(self, other, cond) -> list[Block]:
12271227
if m.any():
12281228
taken = result.take(m.nonzero()[0], axis=axis)
12291229
r = maybe_downcast_numeric(taken, self.dtype)
1230+
if r.dtype != taken.dtype:
1231+
warnings.warn(
1232+
"Downcasting integer-dtype results in .where is "
1233+
"deprecated and will change in a future version. "
1234+
"To retain the old behavior, explicitly cast the results "
1235+
"to the desired dtype.",
1236+
FutureWarning,
1237+
stacklevel=find_stack_level(),
1238+
)
12301239
nb = self.make_block(r.T, placement=self._mgr_locs[m])
12311240
result_blocks.append(nb)
12321241

pandas/tests/frame/indexing/test_where.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_where_upcasting(self):
9898

9999
tm.assert_series_equal(result, expected)
100100

101-
def test_where_alignment(self, where_frame, float_string_frame):
101+
def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame):
102102
# aligning
103103
def _check_align(df, cond, other, check_dtypes=True):
104104
rs = df.where(cond, other)
@@ -141,7 +141,11 @@ def _check_align(df, cond, other, check_dtypes=True):
141141

142142
# check other is ndarray
143143
cond = df > 0
144-
_check_align(df, cond, (_safe_add(df).values))
144+
warn = None
145+
if df is mixed_int_frame:
146+
warn = FutureWarning
147+
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
148+
_check_align(df, cond, (_safe_add(df).values))
145149

146150
# integers are upcast, so don't check the dtypes
147151
cond = df > 0
@@ -461,7 +465,7 @@ def test_where_complex(self):
461465
df[df.abs() >= 5] = np.nan
462466
tm.assert_frame_equal(df, expected)
463467

464-
def test_where_axis(self):
468+
def test_where_axis(self, using_array_manager):
465469
# GH 9736
466470
df = DataFrame(np.random.randn(2, 2))
467471
mask = DataFrame([[False, False], [False, False]])
@@ -499,8 +503,10 @@ def test_where_axis(self):
499503
assert return_value is None
500504
tm.assert_frame_equal(result, expected)
501505

506+
warn = FutureWarning if using_array_manager else None
502507
expected = DataFrame([[0, np.nan], [0, np.nan]])
503-
result = df.where(mask, s, axis="columns")
508+
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
509+
result = df.where(mask, s, axis="columns")
504510
tm.assert_frame_equal(result, expected)
505511

506512
expected = DataFrame(
@@ -717,6 +723,23 @@ def test_where_try_cast_deprecated(frame_or_series):
717723
obj.where(mask, -1, try_cast=False)
718724

719725

726+
def test_where_int_downcasting_deprecated(using_array_manager):
727+
# GH#44597
728+
arr = np.arange(6).astype(np.int16).reshape(3, 2)
729+
df = DataFrame(arr)
730+
731+
mask = np.zeros(arr.shape, dtype=bool)
732+
mask[:, 0] = True
733+
734+
msg = "Downcasting integer-dtype"
735+
warn = FutureWarning if not using_array_manager else None
736+
with tm.assert_produces_warning(warn, match=msg):
737+
res = df.where(mask, 2 ** 17)
738+
739+
expected = DataFrame({0: arr[:, 0], 1: np.array([2 ** 17] * 3, dtype=np.int32)})
740+
tm.assert_frame_equal(res, expected)
741+
742+
720743
def test_where_copies_with_noop(frame_or_series):
721744
# GH-39595
722745
result = frame_or_series([1, 2, 3, 4])

pandas/tests/frame/methods/test_clip.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_clip_against_unordered_columns(self):
136136
tm.assert_frame_equal(result_lower, expected_lower)
137137
tm.assert_frame_equal(result_lower_upper, expected_lower_upper)
138138

139-
def test_clip_with_na_args(self, float_frame):
139+
def test_clip_with_na_args(self, float_frame, using_array_manager):
140140
"""Should process np.nan argument as None"""
141141
# GH#17276
142142
tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
@@ -151,7 +151,9 @@ def test_clip_with_na_args(self, float_frame):
151151
)
152152
tm.assert_frame_equal(result, expected)
153153

154-
result = df.clip(lower=[4, 5, np.nan], axis=1)
154+
warn = FutureWarning if using_array_manager else None
155+
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
156+
result = df.clip(lower=[4, 5, np.nan], axis=1)
155157
expected = DataFrame(
156158
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
157159
)

0 commit comments

Comments
 (0)