From d4fdb66e32f841829ac0677ba87cceba2bb0f055 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Dec 2021 15:05:20 -0800 Subject: [PATCH 1/2] BUG: Series[object].fillna ignoring downcast='infer' --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/generic.py | 5 +++ pandas/core/internals/blocks.py | 8 ++--- pandas/tests/series/methods/test_fillna.py | 36 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ccad93d83eb5b..a2a5660a3404e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -732,6 +732,8 @@ Missing - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`) - Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`) - Bug in :meth:`DataFrame.dropna` changing :class:`Index` even if no entries were dropped (:issue:`41965`) +- Bug in :meth:`Series.fillna` with an object-dtype incorrectly ignoring ``downcast="infer"`` (:issue:`44241`) +- MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc15c846b1907..c5f461cadb517 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6399,6 +6399,11 @@ def fillna( else: if self.ndim == 1: if isinstance(value, (dict, ABCSeries)): + if not len(value): + # test_fillna_nonscalar + if inplace: + return + return self.copy() value = create_series_with_explicit_dtype( value, dtype_if_empty=object ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3f50cb2c601b5..9558b82d95fde 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -518,11 +518,11 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]: def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: if self.dtype == _dtype_obj: - # TODO: why is behavior different for object dtype? - if downcast is not None: - return blocks - + # GH#44241 We downcast regardless of the argument; + # respecting 'downcast=None' may be worthwhile at some point, + # but ATM it breaks too much existing code. # split and convert the blocks + return extend_blocks( [blk.convert(datetime=True, numeric=False) for blk in blocks] ) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 160eb4774d1a0..19f61a0a2d6fc 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -189,6 +189,42 @@ def test_fillna_downcast(self): expected = Series([1, 0]) tm.assert_series_equal(result, expected) + def test_fillna_downcast_infer_objects_to_numeric(self): + # GH#44241 if we have object-dtype, 'downcast="infer"' should + # _actually_ infer + + arr = np.arange(5).astype(object) + arr[3] = np.nan + + ser = Series(arr) + + res = ser.fillna(3, downcast="infer") + expected = Series(np.arange(5), dtype=np.int64) + tm.assert_series_equal(res, expected) + + res = ser.ffill(downcast="infer") + expected = Series([0, 1, 2, 2, 4], dtype=np.int64) + tm.assert_series_equal(res, expected) + + res = ser.bfill(downcast="infer") + expected = Series([0, 1, 2, 4, 4], dtype=np.int64) + tm.assert_series_equal(res, expected) + + # with a non-round float present, we will downcast to float64 + ser[2] = 2.5 + + expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64) + res = ser.fillna(3, downcast="infer") + tm.assert_series_equal(res, expected) + + res = ser.ffill(downcast="infer") + expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64) + tm.assert_series_equal(res, expected) + + res = ser.bfill(downcast="infer") + expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64) + tm.assert_series_equal(res, expected) + def test_timedelta_fillna(self, frame_or_series): # GH#3371 ser = Series( From 5c26503b7cc6b578e2aab0891ed8a5cd061ba347 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Dec 2021 17:54:02 -0800 Subject: [PATCH 2/2] mypy fixup --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5f461cadb517..7efbb299727e8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6402,7 +6402,7 @@ def fillna( if not len(value): # test_fillna_nonscalar if inplace: - return + return None return self.copy() value = create_series_with_explicit_dtype( value, dtype_if_empty=object