Skip to content

BUG: consistent downcast on fillna no-ops #45673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ Indexing

Missing
^^^^^^^
-
- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
-

MultiIndex
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pandas._libs import (
NaT,
algos as libalgos,
lib,
)
from pandas._typing import (
Expand Down Expand Up @@ -382,6 +383,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
)

def fillna(self: T, value, limit, inplace: bool, downcast) -> T:

if limit is not None:
# Do this validation even if we go through one of the no-op paths
limit = libalgos.validate_limit(None, limit=limit)

return self.apply_with_block(
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
)
Expand Down
36 changes: 21 additions & 15 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from pandas._libs import (
Timestamp,
algos as libalgos,
internals as libinternals,
lib,
writers,
Expand Down Expand Up @@ -447,36 +446,42 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
return [nb]

def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
self, value, limit: int | None = None, inplace: bool = False, downcast=None
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
ObjectBlock and try again
"""
# Caller is responsible for validating limit; if int it is strictly positive
inplace = validate_bool_kwarg(inplace, "inplace")

mask = isna(self.values)
mask, noop = validate_putmask(self.values, mask)

if limit is not None:
limit = libalgos.validate_limit(None, limit=limit)
mask[mask.cumsum(self.ndim - 1) > limit] = False

if not self._can_hold_na:
# can short-circuit the isna call
noop = True
else:
mask = isna(self.values)
mask, noop = validate_putmask(self.values, mask)

if noop:
# we can't process the value, but nothing to do
if inplace:
# Arbitrarily imposing the convention that we ignore downcast
# on no-op when inplace=True
return [self]
else:
return [self.copy()]
# GH#45423 consistent downcasting on no-ops.
nb = self.copy()
nbs = nb._maybe_downcast([nb], downcast=downcast)
return nbs

if limit is not None:
mask[mask.cumsum(self.ndim - 1) > limit] = False

if self._can_hold_element(value):
nb = self if inplace else self.copy()
putmask_inplace(nb.values, mask, value)
return nb._maybe_downcast([nb], downcast)

if noop:
# we can't process the value, but nothing to do
return [self] if inplace else [self.copy()]

elif self.ndim == 1 or self.shape[0] == 1:
blk = self.coerce_to_target_dtype(value)
# bc we have already cast, inplace=True may avoid an extra copy
Expand Down Expand Up @@ -1448,8 +1453,9 @@ def putmask(self, mask, new) -> list[Block]:
return [self]

def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
self, value, limit: int | None = None, inplace: bool = False, downcast=None
) -> list[Block]:
# Caller is responsible for validating limit; if int it is strictly positive

try:
new_values = self.values.fillna(value=value, limit=limit)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np

from pandas._libs import (
algos as libalgos,
internals as libinternals,
lib,
)
Expand Down Expand Up @@ -368,6 +369,11 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)

def fillna(self: T, value, limit, inplace: bool, downcast) -> T:

if limit is not None:
# Do this validation even if we go through one of the no-op paths
limit = libalgos.validate_limit(None, limit=limit)

return self.apply(
"fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
)
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,25 @@ def test_fillna_downcast_false(self, frame_or_series):
result = obj.fillna("", downcast=False)
tm.assert_equal(result, obj)

def test_fillna_downcast_noop(self, frame_or_series):
# GH#45423
# Two relevant paths:
# 1) not _can_hold_na (e.g. integer)
# 2) _can_hold_na + noop + not can_hold_element

obj = frame_or_series([1, 2, 3], dtype=np.int64)
res = obj.fillna("foo", downcast=np.dtype(np.int32))
expected = obj.astype(np.int32)
tm.assert_equal(res, expected)

obj2 = obj.astype(np.float64)
res2 = obj2.fillna("foo", downcast="infer")
expected2 = obj # get back int64
tm.assert_equal(res2, expected2)

res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
tm.assert_equal(res3, expected)

@pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
def test_fillna_dictlike_value_duplicate_colnames(self, columns):
# GH#43476
Expand Down