Skip to content

Commit 1496630

Browse files
DEPR: fillna downcasting from object dtype (#54261)
* DEPR: fillna downcasting from object dtype * GH ref * suppress warning * update test * Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 95b6057 commit 1496630

File tree

19 files changed

+113
-19
lines changed

19 files changed

+113
-19
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ Deprecations
198198
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
199199
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
200200
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
201+
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
201202

202203
.. ---------------------------------------------------------------------------
203204
.. _whatsnew_220.performance:

pandas/core/generic.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -10393,7 +10393,14 @@ def _where(
1039310393

1039410394
# make sure we are boolean
1039510395
fill_value = bool(inplace)
10396-
cond = cond.fillna(fill_value)
10396+
with warnings.catch_warnings():
10397+
warnings.filterwarnings(
10398+
"ignore",
10399+
"Downcasting object dtype arrays",
10400+
category=FutureWarning,
10401+
)
10402+
cond = cond.fillna(fill_value)
10403+
cond = cond.infer_objects(copy=False)
1039710404

1039810405
msg = "Boolean array expected for the condition, not {dtype}"
1039910406

pandas/core/internals/blocks.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,11 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
498498

499499
@final
500500
def _maybe_downcast(
501-
self, blocks: list[Block], downcast, using_cow: bool, caller: str
501+
self,
502+
blocks: list[Block],
503+
downcast,
504+
using_cow: bool,
505+
caller: str,
502506
) -> list[Block]:
503507
if downcast is False:
504508
return blocks
@@ -510,9 +514,29 @@ def _maybe_downcast(
510514
# but ATM it breaks too much existing code.
511515
# split and convert the blocks
512516

517+
if caller == "fillna" and get_option("future.no_silent_downcasting"):
518+
return blocks
519+
513520
nbs = extend_blocks(
514521
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
515522
)
523+
if caller == "fillna":
524+
if len(nbs) != len(blocks) or not all(
525+
x.dtype == y.dtype for x, y in zip(nbs, blocks)
526+
):
527+
# GH#54261
528+
warnings.warn(
529+
"Downcasting object dtype arrays on .fillna, .ffill, .bfill "
530+
"is deprecated and will change in a future version. "
531+
"Call result.infer_objects(copy=False) instead. "
532+
"To opt-in to the future "
533+
"behavior, set "
534+
"`pd.set_option('future.no_silent_downcasting', True)`",
535+
FutureWarning,
536+
stacklevel=find_stack_level(),
537+
)
538+
539+
return nbs
516540

517541
elif downcast is None:
518542
return blocks
@@ -1549,7 +1573,7 @@ def pad_or_backfill(
15491573
data = extract_array(new_values, extract_numpy=True)
15501574

15511575
nb = self.make_block_same_class(data, refs=refs)
1552-
return nb._maybe_downcast([nb], downcast, using_cow, caller="pad_or_backfill")
1576+
return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")
15531577

15541578
@final
15551579
def interpolate(

pandas/io/formats/xml.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
TYPE_CHECKING,
1010
Any,
1111
)
12+
import warnings
1213

1314
from pandas.errors import AbstractMethodError
1415
from pandas.util._decorators import doc
@@ -202,7 +203,13 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]:
202203
df = df.reset_index()
203204

204205
if self.na_rep is not None:
205-
df = df.fillna(self.na_rep)
206+
with warnings.catch_warnings():
207+
warnings.filterwarnings(
208+
"ignore",
209+
"Downcasting object dtype arrays",
210+
category=FutureWarning,
211+
)
212+
df = df.fillna(self.na_rep)
206213

207214
return df.to_dict(orient="index")
208215

pandas/io/json/_json.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1217,7 +1217,16 @@ def _try_convert_data(
12171217
if not self.dtype:
12181218
if all(notna(data)):
12191219
return data, False
1220-
return data.fillna(np.nan), True
1220+
1221+
with warnings.catch_warnings():
1222+
warnings.filterwarnings(
1223+
"ignore",
1224+
"Downcasting object dtype arrays",
1225+
category=FutureWarning,
1226+
)
1227+
filled = data.fillna(np.nan)
1228+
1229+
return filled, True
12211230

12221231
elif self.dtype is True:
12231232
pass

pandas/io/stata.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2983,7 +2983,14 @@ def _prepare_data(self) -> np.rec.recarray:
29832983
for i, col in enumerate(data):
29842984
typ = typlist[i]
29852985
if typ <= self._max_string_length:
2986-
data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,))
2986+
with warnings.catch_warnings():
2987+
warnings.filterwarnings(
2988+
"ignore",
2989+
"Downcasting object dtype arrays",
2990+
category=FutureWarning,
2991+
)
2992+
dc = data[col].fillna("")
2993+
data[col] = dc.apply(_pad_bytes, args=(typ,))
29872994
stype = f"S{typ}"
29882995
dtypes[col] = stype
29892996
data[col] = data[col].astype(stype)

pandas/plotting/_matplotlib/core.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1538,7 +1538,13 @@ def _kind(self) -> Literal["area"]:
15381538

15391539
def __init__(self, data, **kwargs) -> None:
15401540
kwargs.setdefault("stacked", True)
1541-
data = data.fillna(value=0)
1541+
with warnings.catch_warnings():
1542+
warnings.filterwarnings(
1543+
"ignore",
1544+
"Downcasting object dtype arrays",
1545+
category=FutureWarning,
1546+
)
1547+
data = data.fillna(value=0)
15421548
LinePlot.__init__(self, data, **kwargs)
15431549

15441550
if not self.stacked:

pandas/tests/extension/test_masked.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
be added to the array-specific tests in `pandas/tests/arrays/`.
1414
1515
"""
16+
import warnings
17+
1618
import numpy as np
1719
import pytest
1820

@@ -186,7 +188,14 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
186188

187189
if sdtype.kind in "iu":
188190
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
189-
expected = expected.fillna(np.nan).astype("Float64")
191+
with warnings.catch_warnings():
192+
warnings.filterwarnings(
193+
"ignore",
194+
"Downcasting object dtype arrays",
195+
category=FutureWarning,
196+
)
197+
filled = expected.fillna(np.nan)
198+
expected = filled.astype("Float64")
190199
else:
191200
# combine method result in 'biggest' (int64) dtype
192201
expected = expected.astype(sdtype)

pandas/tests/frame/indexing/test_where.py

+2
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def test_where_upcasting(self):
9696

9797
tm.assert_series_equal(result, expected)
9898

99+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
99100
def test_where_alignment(self, where_frame, float_string_frame):
100101
# aligning
101102
def _check_align(df, cond, other, check_dtypes=True):
@@ -170,6 +171,7 @@ def test_where_invalid(self):
170171
with pytest.raises(ValueError, match=msg):
171172
df.mask(0)
172173

174+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
173175
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
174176
# where inplace
175177

pandas/tests/frame/methods/test_fillna.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,9 @@ def test_fillna_dtype_conversion(self):
360360
expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
361361
tm.assert_series_equal(result, expected)
362362

363-
result = df.fillna(1)
363+
msg = "Downcasting object dtype arrays"
364+
with tm.assert_produces_warning(FutureWarning, match=msg):
365+
result = df.fillna(1)
364366
expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
365367
tm.assert_frame_equal(result, expected)
366368

@@ -817,7 +819,8 @@ def test_fillna_nones_inplace():
817819
[[None, None], [None, None]],
818820
columns=["A", "B"],
819821
)
820-
with tm.assert_produces_warning(False):
822+
msg = "Downcasting object dtype arrays"
823+
with tm.assert_produces_warning(FutureWarning, match=msg):
821824
df.fillna(value={"A": 1, "B": 2}, inplace=True)
822825

823826
expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])

pandas/tests/frame/test_arithmetic.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,9 @@ def test_operators_none_as_na(self, op):
12541254

12551255
# since filling converts dtypes from object, changed expected to be
12561256
# object
1257-
filled = df.fillna(np.nan)
1257+
msg = "Downcasting object dtype arrays"
1258+
with tm.assert_produces_warning(FutureWarning, match=msg):
1259+
filled = df.fillna(np.nan)
12581260
result = op(df, 3)
12591261
expected = op(filled, 3).astype(object)
12601262
expected[pd.isna(expected)] = np.nan
@@ -1265,10 +1267,14 @@ def test_operators_none_as_na(self, op):
12651267
expected[pd.isna(expected)] = np.nan
12661268
tm.assert_frame_equal(result, expected)
12671269

1268-
result = op(df, df.fillna(7))
1270+
msg = "Downcasting object dtype arrays"
1271+
with tm.assert_produces_warning(FutureWarning, match=msg):
1272+
result = op(df, df.fillna(7))
12691273
tm.assert_frame_equal(result, expected)
12701274

1271-
result = op(df.fillna(7), df)
1275+
msg = "Downcasting object dtype arrays"
1276+
with tm.assert_produces_warning(FutureWarning, match=msg):
1277+
result = op(df.fillna(7), df)
12721278
tm.assert_frame_equal(result, expected)
12731279

12741280
@pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])

pandas/tests/frame/test_logical_ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def _check_unary_op(op):
151151

152152
_check_unary_op(operator.inv) # TODO: belongs elsewhere
153153

154+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
154155
def test_logical_with_nas(self):
155156
d = DataFrame({"a": [np.nan, False], "b": [True, True]})
156157

pandas/tests/frame/test_reductions.py

+1
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,7 @@ def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame):
11551155
def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na):
11561156
getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False)
11571157

1158+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
11581159
@pytest.mark.parametrize("opname", ["any", "all"])
11591160
def test_any_all_bool_frame(self, opname, bool_frame_with_na):
11601161
# GH#12863: numpy gives back non-boolean data for object type

pandas/tests/frame/test_stack_unstack.py

+2
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):
11841184
)
11851185
tm.assert_series_equal(result, expected)
11861186

1187+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
11871188
@pytest.mark.parametrize(
11881189
"index, columns",
11891190
[
@@ -1194,6 +1195,7 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):
11941195
)
11951196
def test_stack_multi_columns_non_unique_index(self, index, columns, future_stack):
11961197
# GH-28301
1198+
11971199
df = DataFrame(index=index, columns=columns).fillna(1)
11981200
stacked = df.stack(future_stack=future_stack)
11991201
new_index = MultiIndex.from_tuples(stacked.index.to_numpy())

pandas/tests/groupby/test_function.py

+1
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
15341534
method(*args, **kwargs)
15351535

15361536

1537+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
15371538
@pytest.mark.parametrize("dtype", [bool, int, float, object])
15381539
def test_deprecate_numeric_only_series(dtype, groupby_func, request):
15391540
# GH#46560

pandas/tests/series/methods/test_reindex.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,19 @@ def test_reindex_inference():
152152
# inference of new dtype
153153
s = Series([True, False, False, True], index=list("abcd"))
154154
new_index = "agc"
155-
result = s.reindex(list(new_index)).ffill()
155+
msg = "Downcasting object dtype arrays on"
156+
with tm.assert_produces_warning(FutureWarning, match=msg):
157+
result = s.reindex(list(new_index)).ffill()
156158
expected = Series([True, True, False], index=list(new_index))
157159
tm.assert_series_equal(result, expected)
158160

159161

160162
def test_reindex_downcasting():
161163
# GH4618 shifted series downcasting
162164
s = Series(False, index=range(5))
163-
result = s.shift(1).bfill()
165+
msg = "Downcasting object dtype arrays on"
166+
with tm.assert_produces_warning(FutureWarning, match=msg):
167+
result = s.shift(1).bfill()
164168
expected = Series(False, index=range(5))
165169
tm.assert_series_equal(result, expected)
166170

pandas/tests/series/test_api.py

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def test_series_datetimelike_attribute_access_invalid(self):
203203
with pytest.raises(AttributeError, match=msg):
204204
ser.weekday
205205

206+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
206207
@pytest.mark.parametrize(
207208
"kernel, has_numeric_only",
208209
[

pandas/tests/series/test_arithmetic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -639,10 +639,12 @@ def test_comparison_operators_with_nas(self, comparison_op):
639639
result = comparison_op(ser, val)
640640
expected = comparison_op(ser.dropna(), val).reindex(ser.index)
641641

642-
if comparison_op is operator.ne:
643-
expected = expected.fillna(True).astype(bool)
644-
else:
645-
expected = expected.fillna(False).astype(bool)
642+
msg = "Downcasting object dtype arrays"
643+
with tm.assert_produces_warning(FutureWarning, match=msg):
644+
if comparison_op is operator.ne:
645+
expected = expected.fillna(True).astype(bool)
646+
else:
647+
expected = expected.fillna(False).astype(bool)
646648

647649
tm.assert_series_equal(result, expected)
648650

pandas/tests/series/test_logical_ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616

1717
class TestSeriesLogicalOps:
18+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
1819
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
1920
def test_bool_operators_with_nas(self, bool_op):
2021
# boolean &, |, ^ should work with object arrays and propagate NAs

0 commit comments

Comments
 (0)