Skip to content

Commit 8d0e7bc

Browse files
jbrockmendelmroeschke
authored andcommitted
DEPR: downcasting in NDFrame.where, mask, clip (pandas-dev#53656)
* DEPR: downcasting in NDFrame.where, mask, clip * GH ref * suppress warning in doctet * add caller * implement future.no_silent_downcasting * move whatsnew to 2.2
1 parent 385b69e commit 8d0e7bc

File tree

8 files changed

+112
-28
lines changed

8 files changed

+112
-28
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ Deprecations
145145
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
146146
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
147147
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
148+
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
148149
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
149150
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
150151
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)

pandas/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def pytest_collection_modifyitems(items, config) -> None:
142142
("is_sparse", "is_sparse is deprecated"),
143143
("NDFrame.replace", "The 'method' keyword"),
144144
("NDFrame.replace", "Series.replace without 'value'"),
145+
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
145146
("Series.idxmin", "The behavior of Series.idxmin"),
146147
("Series.idxmax", "The behavior of Series.idxmax"),
147148
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),

pandas/core/config_init.py

+11
Original file line numberDiff line numberDiff line change
@@ -902,3 +902,14 @@ def register_converter_cb(key) -> None:
902902
"(at which point this option will be deprecated).",
903903
validator=is_one_of_factory([True, False]),
904904
)
905+
906+
cf.register_option(
907+
"no_silent_downcasting",
908+
False,
909+
"Whether to opt-in to the future behavior which will *not* silently "
910+
"downcast results from Series and DataFrame `where`, `mask`, and `clip` "
911+
"methods. "
912+
"Silent downcasting will be removed in pandas 3.0 "
913+
"(at which point this option will be deprecated).",
914+
validator=is_one_of_factory([True, False]),
915+
)

pandas/core/internals/blocks.py

+49-13
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515

1616
import numpy as np
1717

18-
from pandas._config import using_copy_on_write
18+
from pandas._config import (
19+
get_option,
20+
using_copy_on_write,
21+
)
1922

2023
from pandas._libs import (
2124
NaT,
@@ -495,7 +498,7 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
495498

496499
@final
497500
def _maybe_downcast(
498-
self, blocks: list[Block], downcast=None, using_cow: bool = False
501+
self, blocks: list[Block], downcast, using_cow: bool, caller: str
499502
) -> list[Block]:
500503
if downcast is False:
501504
return blocks
@@ -507,14 +510,43 @@ def _maybe_downcast(
507510
# but ATM it breaks too much existing code.
508511
# split and convert the blocks
509512

510-
return extend_blocks(
513+
nbs = extend_blocks(
511514
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
512515
)
513516

514-
if downcast is None:
517+
elif downcast is None:
518+
return blocks
519+
elif caller == "where" and get_option("future.no_silent_downcasting") is True:
515520
return blocks
521+
else:
522+
nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
523+
524+
# When _maybe_downcast is called with caller="where", it is either
525+
# a) with downcast=False, which is a no-op (the desired future behavior)
526+
# b) with downcast="infer", which is _not_ passed by the user.
527+
# In the latter case the future behavior is to stop doing inference,
528+
# so we issue a warning if and only if some inference occurred.
529+
if caller == "where":
530+
# GH#53656
531+
if len(blocks) != len(nbs) or any(
532+
left.dtype != right.dtype for left, right in zip(blocks, nbs)
533+
):
534+
# In this case _maybe_downcast was _not_ a no-op, so the behavior
535+
# will change, so we issue a warning.
536+
warnings.warn(
537+
"Downcasting behavior in Series and DataFrame methods 'where', "
538+
"'mask', and 'clip' is deprecated. In a future "
539+
"version this will not infer object dtypes or cast all-round "
540+
"floats to integers. Instead call "
541+
"result.infer_objects(copy=False) for object inference, "
542+
"or cast round floats explicitly. To opt-in to the future "
543+
"behavior, set "
544+
"`pd.set_option('future.no_silent_downcasting', True)`",
545+
FutureWarning,
546+
stacklevel=find_stack_level(),
547+
)
516548

517-
return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
549+
return nbs
518550

519551
@final
520552
@maybe_split
@@ -1308,7 +1340,7 @@ def where(
13081340
block = self.coerce_to_target_dtype(other)
13091341
blocks = block.where(orig_other, cond, using_cow=using_cow)
13101342
return self._maybe_downcast(
1311-
blocks, downcast=_downcast, using_cow=using_cow
1343+
blocks, downcast=_downcast, using_cow=using_cow, caller="where"
13121344
)
13131345

13141346
else:
@@ -1404,7 +1436,9 @@ def fillna(
14041436
else:
14051437
# GH#45423 consistent downcasting on no-ops.
14061438
nb = self.copy(deep=not using_cow)
1407-
nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow)
1439+
nbs = nb._maybe_downcast(
1440+
[nb], downcast=downcast, using_cow=using_cow, caller="fillna"
1441+
)
14081442
return nbs
14091443

14101444
if limit is not None:
@@ -1422,7 +1456,9 @@ def fillna(
14221456
# different behavior in _maybe_downcast.
14231457
return extend_blocks(
14241458
[
1425-
blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)
1459+
blk._maybe_downcast(
1460+
[blk], downcast=downcast, using_cow=using_cow, caller="fillna"
1461+
)
14261462
for blk in nbs
14271463
]
14281464
)
@@ -1463,7 +1499,7 @@ def pad_or_backfill(
14631499
data = extract_array(new_values, extract_numpy=True)
14641500

14651501
nb = self.make_block_same_class(data, refs=refs)
1466-
return nb._maybe_downcast([nb], downcast, using_cow)
1502+
return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill")
14671503

14681504
@final
14691505
def interpolate(
@@ -1516,7 +1552,7 @@ def interpolate(
15161552
data = extract_array(new_values, extract_numpy=True)
15171553

15181554
nb = self.make_block_same_class(data, refs=refs)
1519-
return nb._maybe_downcast([nb], downcast, using_cow)
1555+
return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")
15201556

15211557
@final
15221558
def diff(self, n: int) -> list[Block]:
@@ -1805,7 +1841,7 @@ def where(
18051841
blk = self.coerce_to_target_dtype(orig_other)
18061842
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
18071843
return self._maybe_downcast(
1808-
nbs, downcast=_downcast, using_cow=using_cow
1844+
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
18091845
)
18101846

18111847
elif isinstance(self, NDArrayBackedExtensionBlock):
@@ -1814,7 +1850,7 @@ def where(
18141850
blk = self.coerce_to_target_dtype(orig_other)
18151851
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
18161852
return self._maybe_downcast(
1817-
nbs, downcast=_downcast, using_cow=using_cow
1853+
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
18181854
)
18191855

18201856
else:
@@ -2013,7 +2049,7 @@ def fillna(
20132049
)
20142050

20152051
nb = self.make_block_same_class(new_values, refs=refs)
2016-
return nb._maybe_downcast([nb], downcast, using_cow=using_cow)
2052+
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
20172053

20182054
@cache_readonly
20192055
def shape(self) -> Shape:

pandas/tests/frame/indexing/test_where.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
175175

176176
def _check_set(df, cond, check_dtypes=True):
177177
dfi = df.copy()
178-
econd = cond.reindex_like(df).fillna(True)
178+
econd = cond.reindex_like(df).fillna(True).infer_objects(copy=False)
179179
expected = dfi.mask(~econd)
180180

181181
return_value = dfi.where(cond, np.nan, inplace=True)
@@ -356,7 +356,9 @@ def test_where_bug_transposition(self):
356356
expected = a.copy()
357357
expected[~do_not_replace] = b
358358

359-
result = a.where(do_not_replace, b)
359+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
360+
with tm.assert_produces_warning(FutureWarning, match=msg):
361+
result = a.where(do_not_replace, b)
360362
tm.assert_frame_equal(result, expected)
361363

362364
a = DataFrame({0: [4, 6], 1: [1, 0]})
@@ -366,7 +368,8 @@ def test_where_bug_transposition(self):
366368
expected = a.copy()
367369
expected[~do_not_replace] = b
368370

369-
result = a.where(do_not_replace, b)
371+
with tm.assert_produces_warning(FutureWarning, match=msg):
372+
result = a.where(do_not_replace, b)
370373
tm.assert_frame_equal(result, expected)
371374

372375
def test_where_datetime(self):
@@ -718,7 +721,9 @@ def test_where_ea_other(self):
718721
ser2 = Series(arr[:2], index=["A", "B"])
719722
expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]})
720723
expected["B"] = expected["B"].astype(object)
721-
result = df.where(mask, ser2, axis=1)
724+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
725+
with tm.assert_produces_warning(FutureWarning, match=msg):
726+
result = df.where(mask, ser2, axis=1)
722727
tm.assert_frame_equal(result, expected)
723728

724729
def test_where_interval_noop(self):
@@ -735,7 +740,10 @@ def test_where_interval_fullop_downcast(self, frame_or_series):
735740
# GH#45768
736741
obj = frame_or_series([pd.Interval(0, 0)] * 2)
737742
other = frame_or_series([1.0, 2.0])
738-
res = obj.where(~obj.notna(), other)
743+
744+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
745+
with tm.assert_produces_warning(FutureWarning, match=msg):
746+
res = obj.where(~obj.notna(), other)
739747

740748
# since all entries are being changed, we will downcast result
741749
# from object to ints (not floats)
@@ -780,7 +788,9 @@ def test_where_datetimelike_noop(self, dtype):
780788

781789
# opposite case where we are replacing *all* values -> we downcast
782790
# from object dtype # GH#45768
783-
res5 = df.where(mask2, 4)
791+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
792+
with tm.assert_produces_warning(FutureWarning, match=msg):
793+
res5 = df.where(mask2, 4)
784794
expected = DataFrame(4, index=df.index, columns=df.columns)
785795
tm.assert_frame_equal(res5, expected)
786796

@@ -984,10 +994,18 @@ def test_where_downcast_to_td64():
984994

985995
td = pd.Timedelta(days=1)
986996

987-
res = ser.where(mask, td)
997+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
998+
with tm.assert_produces_warning(FutureWarning, match=msg):
999+
res = ser.where(mask, td)
9881000
expected = Series([td, td, td], dtype="m8[ns]")
9891001
tm.assert_series_equal(res, expected)
9901002

1003+
with pd.option_context("future.no_silent_downcasting", True):
1004+
with tm.assert_produces_warning(None, match=msg):
1005+
res2 = ser.where(mask, td)
1006+
expected2 = expected.astype(object)
1007+
tm.assert_series_equal(res2, expected2)
1008+
9911009

9921010
def _check_where_equivalences(df, mask, other, expected):
9931011
# similar to tests.series.indexing.test_setitem.SetitemCastingEquivalences

pandas/tests/frame/methods/test_clip.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,11 @@ def test_clip_with_na_args(self, float_frame):
151151
# GH#19992 and adjusted in GH#40420
152152
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
153153

154-
result = df.clip(lower=[4, 5, np.nan], axis=0)
154+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
155+
# TODO: avoid this warning here? seems like we should never be upcasting
156+
# in the first place?
157+
with tm.assert_produces_warning(FutureWarning, match=msg):
158+
result = df.clip(lower=[4, 5, np.nan], axis=0)
155159
expected = DataFrame(
156160
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
157161
)
@@ -167,7 +171,8 @@ def test_clip_with_na_args(self, float_frame):
167171
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
168172
df = DataFrame(data)
169173
t = Series([2, -4, np.nan, 6, 3])
170-
result = df.clip(lower=t, axis=0)
174+
with tm.assert_produces_warning(FutureWarning, match=msg):
175+
result = df.clip(lower=t, axis=0)
171176
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
172177
tm.assert_frame_equal(result, expected)
173178

pandas/tests/series/indexing/test_where.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -393,16 +393,21 @@ def test_where_datetimelike_coerce(dtype):
393393
expected = Series([10, 10])
394394
mask = np.array([False, False])
395395

396-
rs = ser.where(mask, [10, 10])
396+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
397+
with tm.assert_produces_warning(FutureWarning, match=msg):
398+
rs = ser.where(mask, [10, 10])
397399
tm.assert_series_equal(rs, expected)
398400

399-
rs = ser.where(mask, 10)
401+
with tm.assert_produces_warning(FutureWarning, match=msg):
402+
rs = ser.where(mask, 10)
400403
tm.assert_series_equal(rs, expected)
401404

402-
rs = ser.where(mask, 10.0)
405+
with tm.assert_produces_warning(FutureWarning, match=msg):
406+
rs = ser.where(mask, 10.0)
403407
tm.assert_series_equal(rs, expected)
404408

405-
rs = ser.where(mask, [10.0, 10.0])
409+
with tm.assert_produces_warning(FutureWarning, match=msg):
410+
rs = ser.where(mask, [10.0, 10.0])
406411
tm.assert_series_equal(rs, expected)
407412

408413
rs = ser.where(mask, [10.0, np.nan])

pandas/tests/series/methods/test_clip.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,15 @@ def test_clip_with_na_args(self):
6969
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
7070

7171
# GH#19992
72-
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
73-
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
72+
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
73+
# TODO: avoid this warning here? seems like we should never be upcasting
74+
# in the first place?
75+
with tm.assert_produces_warning(FutureWarning, match=msg):
76+
res = s.clip(lower=[0, 4, np.nan])
77+
tm.assert_series_equal(res, Series([1, 4, 3]))
78+
with tm.assert_produces_warning(FutureWarning, match=msg):
79+
res = s.clip(upper=[1, np.nan, 1])
80+
tm.assert_series_equal(res, Series([1, 2, 1]))
7481

7582
# GH#40420
7683
s = Series([1, 2, 3])

0 commit comments

Comments
 (0)