Skip to content

Commit 7c257d4

Browse files
authored
BUG: consistent downcast on fillna no-ops (#45673)
1 parent 9a98aca commit 7c257d4

File tree

5 files changed

+53
-16
lines changed

5 files changed

+53
-16
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ Indexing
278278

279279
Missing
280280
^^^^^^^
281-
-
281+
- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
282282
-
283283

284284
MultiIndex

pandas/core/internals/array_manager.py

+6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from pandas._libs import (
1717
NaT,
18+
algos as libalgos,
1819
lib,
1920
)
2021
from pandas._typing import (
@@ -382,6 +383,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
382383
)
383384

384385
def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
386+
387+
if limit is not None:
388+
# Do this validation even if we go through one of the no-op paths
389+
limit = libalgos.validate_limit(None, limit=limit)
390+
385391
return self.apply_with_block(
386392
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
387393
)

pandas/core/internals/blocks.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
from pandas._libs import (
1919
Timestamp,
20-
algos as libalgos,
2120
internals as libinternals,
2221
lib,
2322
writers,
@@ -448,36 +447,42 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
448447
return [nb]
449448

450449
def fillna(
451-
self, value, limit=None, inplace: bool = False, downcast=None
450+
self, value, limit: int | None = None, inplace: bool = False, downcast=None
452451
) -> list[Block]:
453452
"""
454453
fillna on the block with the value. If we fail, then convert to
455454
ObjectBlock and try again
456455
"""
456+
# Caller is responsible for validating limit; if int it is strictly positive
457457
inplace = validate_bool_kwarg(inplace, "inplace")
458458

459-
mask = isna(self.values)
460-
mask, noop = validate_putmask(self.values, mask)
461-
462-
if limit is not None:
463-
limit = libalgos.validate_limit(None, limit=limit)
464-
mask[mask.cumsum(self.ndim - 1) > limit] = False
465-
466459
if not self._can_hold_na:
460+
# can short-circuit the isna call
461+
noop = True
462+
else:
463+
mask = isna(self.values)
464+
mask, noop = validate_putmask(self.values, mask)
465+
466+
if noop:
467+
# we can't process the value, but nothing to do
467468
if inplace:
469+
# Arbitrarily imposing the convention that we ignore downcast
470+
# on no-op when inplace=True
468471
return [self]
469472
else:
470-
return [self.copy()]
473+
# GH#45423 consistent downcasting on no-ops.
474+
nb = self.copy()
475+
nbs = nb._maybe_downcast([nb], downcast=downcast)
476+
return nbs
477+
478+
if limit is not None:
479+
mask[mask.cumsum(self.ndim - 1) > limit] = False
471480

472481
if self._can_hold_element(value):
473482
nb = self if inplace else self.copy()
474483
putmask_inplace(nb.values, mask, value)
475484
return nb._maybe_downcast([nb], downcast)
476485

477-
if noop:
478-
# we can't process the value, but nothing to do
479-
return [self] if inplace else [self.copy()]
480-
481486
elif self.ndim == 1 or self.shape[0] == 1:
482487
blk = self.coerce_to_target_dtype(value)
483488
# bc we have already cast, inplace=True may avoid an extra copy
@@ -1449,8 +1454,9 @@ def putmask(self, mask, new) -> list[Block]:
14491454
return [self]
14501455

14511456
def fillna(
1452-
self, value, limit=None, inplace: bool = False, downcast=None
1457+
self, value, limit: int | None = None, inplace: bool = False, downcast=None
14531458
) -> list[Block]:
1459+
# Caller is responsible for validating limit; if int it is strictly positive
14541460

14551461
try:
14561462
new_values = self.values.fillna(value=value, limit=limit)

pandas/core/internals/managers.py

+6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import numpy as np
1515

1616
from pandas._libs import (
17+
algos as libalgos,
1718
internals as libinternals,
1819
lib,
1920
)
@@ -368,6 +369,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
368369
return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)
369370

370371
def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
372+
373+
if limit is not None:
374+
# Do this validation even if we go through one of the no-op paths
375+
limit = libalgos.validate_limit(None, limit=limit)
376+
371377
return self.apply(
372378
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
373379
)

pandas/tests/frame/methods/test_fillna.py

+19
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,25 @@ def test_fillna_downcast_false(self, frame_or_series):
250250
result = obj.fillna("", downcast=False)
251251
tm.assert_equal(result, obj)
252252

253+
def test_fillna_downcast_noop(self, frame_or_series):
254+
# GH#45423
255+
# Two relevant paths:
256+
# 1) not _can_hold_na (e.g. integer)
257+
# 2) _can_hold_na + noop + not can_hold_element
258+
259+
obj = frame_or_series([1, 2, 3], dtype=np.int64)
260+
res = obj.fillna("foo", downcast=np.dtype(np.int32))
261+
expected = obj.astype(np.int32)
262+
tm.assert_equal(res, expected)
263+
264+
obj2 = obj.astype(np.float64)
265+
res2 = obj2.fillna("foo", downcast="infer")
266+
expected2 = obj # get back int64
267+
tm.assert_equal(res2, expected2)
268+
269+
res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
270+
tm.assert_equal(res3, expected)
271+
253272
@pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
254273
def test_fillna_dictlike_value_duplicate_colnames(self, columns):
255274
# GH#43476

0 commit comments

Comments
 (0)