Skip to content

Commit bc7912c

Browse files
committed
DEPR: fillna downcasting from object dtype
1 parent b379f99 commit bc7912c

File tree

19 files changed

+117
-23
lines changed

19 files changed

+117
-23
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ Deprecations
375375
- Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`)
376376
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
377377
- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`)
378+
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases (:issue:`??`)
378379
-
379380

380381
.. ---------------------------------------------------------------------------

pandas/core/generic.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -10174,7 +10174,14 @@ def _where(
1017410174

1017510175
# make sure we are boolean
1017610176
fill_value = bool(inplace)
10177-
cond = cond.fillna(fill_value)
10177+
with warnings.catch_warnings():
10178+
warnings.filterwarnings(
10179+
"ignore",
10180+
"Downcasting object dtype arrays",
10181+
category=FutureWarning,
10182+
)
10183+
cond = cond.fillna(fill_value)
10184+
cond = cond.infer_objects(copy=False)
1017810185

1017910186
msg = "Boolean array expected for the condition, not {dtype}"
1018010187

pandas/core/internals/blocks.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
cast,
1111
final,
1212
)
13+
import warnings
1314

1415
import numpy as np
1516

@@ -41,6 +42,7 @@
4142
)
4243
from pandas.errors import AbstractMethodError
4344
from pandas.util._decorators import cache_readonly
45+
from pandas.util._exceptions import find_stack_level
4446
from pandas.util._validators import validate_bool_kwarg
4547

4648
from pandas.core.dtypes.astype import (
@@ -455,7 +457,11 @@ def coerce_to_target_dtype(self, other) -> Block:
455457

456458
@final
457459
def _maybe_downcast(
458-
self, blocks: list[Block], downcast=None, using_cow: bool = False
460+
self,
461+
blocks: list[Block],
462+
downcast=None,
463+
using_cow: bool = False,
464+
caller: str | None = None,
459465
) -> list[Block]:
460466
if downcast is False:
461467
return blocks
@@ -467,9 +473,23 @@ def _maybe_downcast(
467473
# but ATM it breaks too much existing code.
468474
# split and convert the blocks
469475

470-
return extend_blocks(
476+
casted = extend_blocks(
471477
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
472478
)
479+
if caller == "fillna":
480+
if len(casted) != len(blocks) or not all(
481+
x.dtype == y.dtype for x, y in zip(casted, blocks)
482+
):
483+
# GH#11537
484+
warnings.warn(
485+
"Downcasting object dtype arrays on .fillna, .ffill, .bfill "
486+
"is deprecated and will change in a future version. "
487+
"Call result.infer_objects(copy=False) instead.",
488+
FutureWarning,
489+
stacklevel=find_stack_level(),
490+
)
491+
492+
return casted
473493

474494
if downcast is None:
475495
return blocks
@@ -1349,7 +1369,9 @@ def fillna(
13491369
else:
13501370
# GH#45423 consistent downcasting on no-ops.
13511371
nb = self.copy(deep=not using_cow)
1352-
nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow)
1372+
nbs = nb._maybe_downcast(
1373+
[nb], downcast=downcast, using_cow=using_cow, caller="fillna"
1374+
)
13531375
return nbs
13541376

13551377
if limit is not None:
@@ -1367,7 +1389,9 @@ def fillna(
13671389
# different behavior in _maybe_downcast.
13681390
return extend_blocks(
13691391
[
1370-
blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)
1392+
blk._maybe_downcast(
1393+
[blk], downcast=downcast, using_cow=using_cow, caller="fillna"
1394+
)
13711395
for blk in nbs
13721396
]
13731397
)
@@ -1408,7 +1432,7 @@ def pad_or_backfill(
14081432
data = extract_array(new_values, extract_numpy=True)
14091433

14101434
nb = self.make_block_same_class(data, refs=refs)
1411-
return nb._maybe_downcast([nb], downcast, using_cow)
1435+
return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")
14121436

14131437
@final
14141438
def interpolate(
@@ -1941,7 +1965,7 @@ def fillna(
19411965
refs = None
19421966
new_values = self.values.fillna(value=value, method=None, limit=limit)
19431967
nb = self.make_block_same_class(new_values, refs=refs)
1944-
return nb._maybe_downcast([nb], downcast, using_cow=using_cow)
1968+
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
19451969

19461970
@cache_readonly
19471971
def shape(self) -> Shape:

pandas/io/formats/xml.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
TYPE_CHECKING,
1010
Any,
1111
)
12+
import warnings
1213

1314
from pandas.errors import AbstractMethodError
1415
from pandas.util._decorators import doc
@@ -202,7 +203,13 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]:
202203
df = df.reset_index()
203204

204205
if self.na_rep is not None:
205-
df = df.fillna(self.na_rep)
206+
with warnings.catch_warnings():
207+
warnings.filterwarnings(
208+
"ignore",
209+
"Downcasting object dtype arrays",
210+
category=FutureWarning,
211+
)
212+
df = df.fillna(self.na_rep)
206213

207214
return df.to_dict(orient="index")
208215

pandas/io/json/_json.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,16 @@ def _try_convert_data(
12161216
if not self.dtype:
12171217
if all(notna(data)):
12181218
return data, False
1219-
return data.fillna(np.nan), True
1219+
1220+
with warnings.catch_warnings():
1221+
warnings.filterwarnings(
1222+
"ignore",
1223+
"Downcasting object dtype arrays",
1224+
category=FutureWarning,
1225+
)
1226+
filled = data.fillna(np.nan)
1227+
1228+
return filled, True
12201229

12211230
elif self.dtype is True:
12221231
pass

pandas/io/stata.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2912,7 +2912,14 @@ def _prepare_data(self) -> np.recarray:
29122912
for i, col in enumerate(data):
29132913
typ = typlist[i]
29142914
if typ <= self._max_string_length:
2915-
data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,))
2915+
with warnings.catch_warnings():
2916+
warnings.filterwarnings(
2917+
"ignore",
2918+
"Downcasting object dtype arrays",
2919+
category=FutureWarning,
2920+
)
2921+
dc = data[col].fillna("")
2922+
data[col] = dc.apply(_pad_bytes, args=(typ,))
29162923
stype = f"S{typ}"
29172924
dtypes[col] = stype
29182925
data[col] = data[col].astype(stype)

pandas/plotting/_matplotlib/core.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1538,7 +1538,13 @@ def _kind(self) -> Literal["area"]:
15381538

15391539
def __init__(self, data, **kwargs) -> None:
15401540
kwargs.setdefault("stacked", True)
1541-
data = data.fillna(value=0)
1541+
with warnings.catch_warnings():
1542+
warnings.filterwarnings(
1543+
"ignore",
1544+
"Downcasting object dtype arrays",
1545+
category=FutureWarning,
1546+
)
1547+
data = data.fillna(value=0)
15421548
LinePlot.__init__(self, data, **kwargs)
15431549

15441550
if not self.stacked:

pandas/tests/extension/test_integer.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
be added to the array-specific tests in `pandas/tests/arrays/`.
1414
1515
"""
16+
import warnings
17+
1618
import numpy as np
1719
import pytest
1820

@@ -136,7 +138,14 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
136138
expected = self._combine(s, other, op)
137139

138140
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
139-
expected = expected.fillna(np.nan).astype("Float64")
141+
with warnings.catch_warnings():
142+
warnings.filterwarnings(
143+
"ignore",
144+
"Downcasting object dtype arrays",
145+
category=FutureWarning,
146+
)
147+
filled = expected.fillna(np.nan)
148+
expected = filled.astype("Float64")
140149
else:
141150
# combine method result in 'biggest' (int64) dtype
142151
expected = expected.astype(sdtype)

pandas/tests/frame/indexing/test_where.py

+2
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def test_where_upcasting(self):
9494

9595
tm.assert_series_equal(result, expected)
9696

97+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
9798
def test_where_alignment(self, where_frame, float_string_frame):
9899
# aligning
99100
def _check_align(df, cond, other, check_dtypes=True):
@@ -164,6 +165,7 @@ def test_where_invalid(self):
164165
with pytest.raises(ValueError, match=msg):
165166
df.mask(0)
166167

168+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
167169
def test_where_set(self, where_frame, float_string_frame):
168170
# where inplace
169171

pandas/tests/frame/methods/test_fillna.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,9 @@ def test_fillna_dtype_conversion(self):
360360
expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
361361
tm.assert_series_equal(result, expected)
362362

363-
result = df.fillna(1)
363+
msg = "Downcasting object dtype arrays"
364+
with tm.assert_produces_warning(FutureWarning, match=msg):
365+
result = df.fillna(1)
364366
expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
365367
tm.assert_frame_equal(result, expected)
366368

@@ -817,7 +819,8 @@ def test_fillna_nones_inplace():
817819
[[None, None], [None, None]],
818820
columns=["A", "B"],
819821
)
820-
with tm.assert_produces_warning(False):
822+
msg = "Downcasting object dtype arrays"
823+
with tm.assert_produces_warning(FutureWarning, match=msg):
821824
df.fillna(value={"A": 1, "B": 2}, inplace=True)
822825

823826
expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])

pandas/tests/frame/test_arithmetic.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,9 @@ def test_operators_none_as_na(self, op):
12421242

12431243
# since filling converts dtypes from object, changed expected to be
12441244
# object
1245-
filled = df.fillna(np.nan)
1245+
msg = "Downcasting object dtype arrays"
1246+
with tm.assert_produces_warning(FutureWarning, match=msg):
1247+
filled = df.fillna(np.nan)
12461248
result = op(df, 3)
12471249
expected = op(filled, 3).astype(object)
12481250
expected[pd.isna(expected)] = np.nan
@@ -1253,10 +1255,14 @@ def test_operators_none_as_na(self, op):
12531255
expected[pd.isna(expected)] = np.nan
12541256
tm.assert_frame_equal(result, expected)
12551257

1256-
result = op(df, df.fillna(7))
1258+
msg = "Downcasting object dtype arrays"
1259+
with tm.assert_produces_warning(FutureWarning, match=msg):
1260+
result = op(df, df.fillna(7))
12571261
tm.assert_frame_equal(result, expected)
12581262

1259-
result = op(df.fillna(7), df)
1263+
msg = "Downcasting object dtype arrays"
1264+
with tm.assert_produces_warning(FutureWarning, match=msg):
1265+
result = op(df.fillna(7), df)
12601266
tm.assert_frame_equal(result, expected)
12611267

12621268
@pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])

pandas/tests/frame/test_logical_ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def _check_unary_op(op):
151151

152152
_check_unary_op(operator.inv) # TODO: belongs elsewhere
153153

154+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
154155
def test_logical_with_nas(self):
155156
d = DataFrame({"a": [np.nan, False], "b": [True, True]})
156157

pandas/tests/frame/test_reductions.py

+1
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,7 @@ def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame):
11521152
def test_any_all_bool_with_na(self, opname, axis, bool_frame_with_na):
11531153
getattr(bool_frame_with_na, opname)(axis=axis, bool_only=False)
11541154

1155+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
11551156
@pytest.mark.parametrize("opname", ["any", "all"])
11561157
def test_any_all_bool_frame(self, opname, bool_frame_with_na):
11571158
# GH#12863: numpy gives back non-boolean data for object type

pandas/tests/frame/test_stack_unstack.py

+2
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,7 @@ def test_stack_preserve_categorical_dtype_values(self):
11261126
)
11271127
tm.assert_series_equal(result, expected)
11281128

1129+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
11291130
@pytest.mark.parametrize(
11301131
"index, columns",
11311132
[
@@ -1136,6 +1137,7 @@ def test_stack_preserve_categorical_dtype_values(self):
11361137
)
11371138
def test_stack_multi_columns_non_unique_index(self, index, columns):
11381139
# GH-28301
1140+
11391141
df = DataFrame(index=index, columns=columns).fillna(1)
11401142
stacked = df.stack()
11411143
new_index = MultiIndex.from_tuples(stacked.index.to_numpy())

pandas/tests/groupby/test_function.py

+1
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,7 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
15051505
method(*args, **kwargs)
15061506

15071507

1508+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
15081509
@pytest.mark.parametrize("dtype", [bool, int, float, object])
15091510
def test_deprecate_numeric_only_series(dtype, groupby_func, request):
15101511
# GH#46560

pandas/tests/series/methods/test_reindex.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,19 @@ def test_reindex_inference():
157157
# inference of new dtype
158158
s = Series([True, False, False, True], index=list("abcd"))
159159
new_index = "agc"
160-
result = s.reindex(list(new_index)).ffill()
160+
msg = "Downcasting object dtype arrays on"
161+
with tm.assert_produces_warning(FutureWarning, match=msg):
162+
result = s.reindex(list(new_index)).ffill()
161163
expected = Series([True, True, False], index=list(new_index))
162164
tm.assert_series_equal(result, expected)
163165

164166

165167
def test_reindex_downcasting():
166168
# GH4618 shifted series downcasting
167169
s = Series(False, index=range(0, 5))
168-
result = s.shift(1).bfill()
170+
msg = "Downcasting object dtype arrays on"
171+
with tm.assert_produces_warning(FutureWarning, match=msg):
172+
result = s.shift(1).bfill()
169173
expected = Series(False, index=range(0, 5))
170174
tm.assert_series_equal(result, expected)
171175

pandas/tests/series/test_api.py

+1
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ def test_series_datetimelike_attribute_access_invalid(self):
205205
with pytest.raises(AttributeError, match=msg):
206206
ser.weekday
207207

208+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
208209
@pytest.mark.parametrize(
209210
"kernel, has_numeric_only",
210211
[

pandas/tests/series/test_arithmetic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -639,10 +639,12 @@ def test_comparison_operators_with_nas(self, comparison_op):
639639
result = comparison_op(ser, val)
640640
expected = comparison_op(ser.dropna(), val).reindex(ser.index)
641641

642-
if comparison_op is operator.ne:
643-
expected = expected.fillna(True).astype(bool)
644-
else:
645-
expected = expected.fillna(False).astype(bool)
642+
msg = "Downcasting object dtype arrays"
643+
with tm.assert_produces_warning(FutureWarning, match=msg):
644+
if comparison_op is operator.ne:
645+
expected = expected.fillna(True).astype(bool)
646+
else:
647+
expected = expected.fillna(False).astype(bool)
646648

647649
tm.assert_series_equal(result, expected)
648650

pandas/tests/series/test_logical_ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616

1717
class TestSeriesLogicalOps:
18+
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
1819
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
1920
def test_bool_operators_with_nas(self, bool_op):
2021
# boolean &, |, ^ should work with object arrays and propagate NAs

0 commit comments

Comments
 (0)