diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 919ed926f8195..f274c5c4c665e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -278,7 +278,7 @@ Indexing Missing ^^^^^^^ -- +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`) - MultiIndex diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 80e1d0bede2cd..c7ec4f35e0ff1 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -15,6 +15,7 @@ from pandas._libs import ( NaT, + algos as libalgos, lib, ) from pandas._typing import ( @@ -382,6 +383,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: ) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + return self.apply_with_block( "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d5bae63976e63..d0c7e69567169 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -17,7 +17,6 @@ from pandas._libs import ( Timestamp, - algos as libalgos, internals as libinternals, lib, writers, @@ -447,36 +446,42 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]: return [nb] def fillna( - self, value, limit=None, inplace: bool = False, downcast=None + self, value, limit: int | None = None, inplace: bool = False, downcast=None ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again """ + # Caller is responsible for validating limit; if int it is strictly positive inplace = validate_bool_kwarg(inplace, "inplace") - mask = isna(self.values) - mask, noop = validate_putmask(self.values, mask) - - if limit is not None: - limit = libalgos.validate_limit(None, limit=limit) - mask[mask.cumsum(self.ndim - 1) > limit] = False - if not self._can_hold_na: + # can short-circuit the isna call + noop = True + else: + mask = isna(self.values) + mask, noop = validate_putmask(self.values, mask) + + if noop: + # we can't process the value, but nothing to do if inplace: + # Arbitrarily imposing the convention that we ignore downcast + # on no-op when inplace=True return [self] else: - return [self.copy()] + # GH#45423 consistent downcasting on no-ops. + nb = self.copy() + nbs = nb._maybe_downcast([nb], downcast=downcast) + return nbs + + if limit is not None: + mask[mask.cumsum(self.ndim - 1) > limit] = False if self._can_hold_element(value): nb = self if inplace else self.copy() putmask_inplace(nb.values, mask, value) return nb._maybe_downcast([nb], downcast) - if noop: - # we can't process the value, but nothing to do - return [self] if inplace else [self.copy()] - elif self.ndim == 1 or self.shape[0] == 1: blk = self.coerce_to_target_dtype(value) # bc we have already cast, inplace=True may avoid an extra copy @@ -1448,8 +1453,9 @@ def putmask(self, mask, new) -> list[Block]: return [self] def fillna( - self, value, limit=None, inplace: bool = False, downcast=None + self, value, limit: int | None = None, inplace: bool = False, downcast=None ) -> list[Block]: + # Caller is responsible for validating limit; if int it is strictly positive try: new_values = self.values.fillna(value=value, limit=limit) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1d1d955c9024a..6297a7578ccd4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -14,6 +14,7 @@ import numpy as np from pandas._libs import ( + algos as libalgos, internals as libinternals, lib, ) @@ -368,6 +369,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + return self.apply( "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 21a45d9ee1f20..77ae9fb4c7eff 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -250,6 +250,25 @@ def test_fillna_downcast_false(self, frame_or_series): result = obj.fillna("", downcast=False) tm.assert_equal(result, obj) + def test_fillna_downcast_noop(self, frame_or_series): + # GH#45423 + # Two relevant paths: + # 1) not _can_hold_na (e.g. integer) + # 2) _can_hold_na + noop + not can_hold_element + + obj = frame_or_series([1, 2, 3], dtype=np.int64) + res = obj.fillna("foo", downcast=np.dtype(np.int32)) + expected = obj.astype(np.int32) + tm.assert_equal(res, expected) + + obj2 = obj.astype(np.float64) + res2 = obj2.fillna("foo", downcast="infer") + expected2 = obj # get back int64 + tm.assert_equal(res2, expected2) + + res3 = obj2.fillna("foo", downcast=np.dtype(np.int32)) + tm.assert_equal(res3, expected) + @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): # GH#43476