BUG: consistent downcast on fillna no-ops (#45673)

jbrockmendel · web-flow · commit 7c257d475856 · 2022-01-31T19:03:48.000-05:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -278,7 +278,7 @@ Indexing
 
 Missing
 ^^^^^^^
--
+- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
 -
 
 MultiIndex
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -15,6 +15,7 @@
 
 from pandas._libs import (
     NaT,
+    algos as libalgos,
     lib,
 )
 from pandas._typing import (
@@ -382,6 +383,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
         )
 
     def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
+
+        if limit is not None:
+            # Do this validation even if we go through one of the no-op paths
+            limit = libalgos.validate_limit(None, limit=limit)
+
         return self.apply_with_block(
             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
         )
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -17,7 +17,6 @@
 
 from pandas._libs import (
     Timestamp,
-    algos as libalgos,
     internals as libinternals,
     lib,
     writers,
@@ -448,36 +447,42 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
         return [nb]
 
     def fillna(
-        self, value, limit=None, inplace: bool = False, downcast=None
+        self, value, limit: int | None = None, inplace: bool = False, downcast=None
     ) -> list[Block]:
         """
         fillna on the block with the value. If we fail, then convert to
         ObjectBlock and try again
         """
+        # Caller is responsible for validating limit; if int it is strictly positive
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        mask = isna(self.values)
-        mask, noop = validate_putmask(self.values, mask)
-
-        if limit is not None:
-            limit = libalgos.validate_limit(None, limit=limit)
-            mask[mask.cumsum(self.ndim - 1) > limit] = False
-
         if not self._can_hold_na:
+            # can short-circuit the isna call
+            noop = True
+        else:
+            mask = isna(self.values)
+            mask, noop = validate_putmask(self.values, mask)
+
+        if noop:
+            # we can't process the value, but nothing to do
             if inplace:
+                # Arbitrarily imposing the convention that we ignore downcast
+                #  on no-op when inplace=True
                 return [self]
             else:
-                return [self.copy()]
+                # GH#45423 consistent downcasting on no-ops.
+                nb = self.copy()
+                nbs = nb._maybe_downcast([nb], downcast=downcast)
+                return nbs
+
+        if limit is not None:
+            mask[mask.cumsum(self.ndim - 1) > limit] = False
 
         if self._can_hold_element(value):
             nb = self if inplace else self.copy()
             putmask_inplace(nb.values, mask, value)
             return nb._maybe_downcast([nb], downcast)
 
-        if noop:
-            # we can't process the value, but nothing to do
-            return [self] if inplace else [self.copy()]
-
         elif self.ndim == 1 or self.shape[0] == 1:
             blk = self.coerce_to_target_dtype(value)
             # bc we have already cast, inplace=True may avoid an extra copy
@@ -1449,8 +1454,9 @@ def putmask(self, mask, new) -> list[Block]:
         return [self]
 
     def fillna(
-        self, value, limit=None, inplace: bool = False, downcast=None
+        self, value, limit: int | None = None, inplace: bool = False, downcast=None
     ) -> list[Block]:
+        # Caller is responsible for validating limit; if int it is strictly positive
 
         try:
             new_values = self.values.fillna(value=value, limit=limit)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 from pandas._libs import (
+    algos as libalgos,
     internals as libinternals,
     lib,
 )
@@ -368,6 +369,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
         return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)
 
     def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
+
+        if limit is not None:
+            # Do this validation even if we go through one of the no-op paths
+            limit = libalgos.validate_limit(None, limit=limit)
+
         return self.apply(
             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
         )
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -250,6 +250,25 @@ def test_fillna_downcast_false(self, frame_or_series):
         result = obj.fillna("", downcast=False)
         tm.assert_equal(result, obj)
 
+    def test_fillna_downcast_noop(self, frame_or_series):
+        # GH#45423
+        # Two relevant paths:
+        #  1) not _can_hold_na (e.g. integer)
+        #  2) _can_hold_na + noop + not can_hold_element
+
+        obj = frame_or_series([1, 2, 3], dtype=np.int64)
+        res = obj.fillna("foo", downcast=np.dtype(np.int32))
+        expected = obj.astype(np.int32)
+        tm.assert_equal(res, expected)
+
+        obj2 = obj.astype(np.float64)
+        res2 = obj2.fillna("foo", downcast="infer")
+        expected2 = obj  # get back int64
+        tm.assert_equal(res2, expected2)
+
+        res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
+        tm.assert_equal(res3, expected)
+
     @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
     def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         # GH#43476

Original file line number	Diff line number	Diff line change
`@@ -278,7 +278,7 @@ Indexing`
`278`	`278`
`279`	`279`	`Missing`
`280`	`280`	`^^^^^^^`
`281`		`--`
	`281`	+- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
`282`	`282`	`-`
`283`	`283`
`284`	`284`	`MultiIndex`
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`
`16`	`16`	`from pandas._libs import (`
`17`	`17`	`NaT,`
	`18`	`+ algos as libalgos,`
`18`	`19`	`lib,`
`19`	`20`	`)`
`20`	`21`	`from pandas._typing import (`
`@@ -382,6 +383,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:`
`382`	`383`	`)`
`383`	`384`
`384`	`385`	`def fillna(self: T, value, limit, inplace: bool, downcast) -> T:`
	`386`	`+`
	`387`	`+ if limit is not None:`
	`388`	`+ # Do this validation even if we go through one of the no-op paths`
	`389`	`+ limit = libalgos.validate_limit(None, limit=limit)`
	`390`	`+`
`385`	`391`	`return self.apply_with_block(`
`386`	`392`	`"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast`
`387`	`393`	`)`