From 5ab2cdf015dd89b4b1964928817aee85314be6af Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 18 Mar 2021 20:55:42 -0400 Subject: [PATCH 1/5] BUG: fillna other missing vals --- pandas/core/generic.py | 6 +++++- pandas/core/internals/blocks.py | 1 + pandas/tests/frame/methods/test_fillna.py | 13 +++++++++++++ pandas/tests/series/methods/test_fillna.py | 11 +++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7138995d1b018..231aa35adf0e3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6524,6 +6524,11 @@ def fillna( value, dtype_if_empty=object ) value = value.reindex(self.index, copy=False) + print(value) + # GH-40498: Indices to not apply fillna to are marked with NaN, + # but that will still cause other missing values to be replaced with NaN, + value.loc[self.isna() & value.isna()] = self + # print(value) value = value._values elif not is_list_like(value): pass @@ -6533,7 +6538,6 @@ def fillna( "or Series, but you passed a " f'"{type(value).__name__}"' ) - new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3fd1ebaca19f0..69f2cc454536b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -72,6 +72,7 @@ is_valid_na_for_dtype, isna, na_value_for_dtype, + notna, ) import pandas.core.algorithms as algos diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 564481d01abc8..253b34639faf2 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -153,6 +153,19 @@ def test_fillna_tzaware_different_column(self): ) tm.assert_frame_equal(result, expected) + def test_other_missing_vals_not_modified( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH-40498 + missing_val1, missing_val2 = unique_nulls_fixture, unique_nulls_fixture2 + df = DataFrame( + {"A": [1, missing_val1, missing_val2], "B": [2, missing_val1, missing_val2]} + ) + filler = {"A": {1: 0}, "B": {2: 0}} + result = df.fillna(filler) + expected = DataFrame({"A": [1, 0, missing_val2], "B": [2, missing_val1, 0]}) + tm.assert_frame_equal(result, expected) + def test_na_actions_categorical(self): cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index cf6b357d0a418..7e40ec509ddb2 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -9,6 +9,7 @@ import pytz from pandas import ( + NA, Categorical, DataFrame, DatetimeIndex, @@ -620,6 +621,16 @@ def test_fillna_numeric_inplace(self): expected = x.fillna(value=0) tm.assert_series_equal(y, expected) + def test_fillna_does_not_modify_other_missing_vals( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH-40498 + missing_val1, missing_val2 = unique_nulls_fixture, unique_nulls_fixture2 + ser = Series([1, missing_val1, missing_val2, ""]) + result = ser.fillna({2: 0}) + expected = Series([1, missing_val1, 0, ""]) + tm.assert_series_equal(result, expected) + # --------------------------------------------------------------- # CategoricalDtype From 70411067a4a35d174a79fc16f0517cb659e6746c Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 18 Mar 2021 21:09:47 -0400 Subject: [PATCH 2/5] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/generic.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9c8968f7f8223..b1decefc13df1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -541,6 +541,7 @@ Missing - Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`) - Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`) +- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` overwriting missing values in indices skipped by the ``value`` argument (:issue:`40498`) - MultiIndex diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 231aa35adf0e3..64ee2f2645e6b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6524,11 +6524,9 @@ def fillna( value, dtype_if_empty=object ) value = value.reindex(self.index, copy=False) - print(value) # GH-40498: Indices to not apply fillna to are marked with NaN, - # but that will still cause other missing values to be replaced with NaN, + # but that will still cause other missing values to be replaced with NaN value.loc[self.isna() & value.isna()] = self - # print(value) value = value._values elif not is_list_like(value): pass From 3e869344ecfa708d6f8da9856591fdf18fd1aa89 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 18 Mar 2021 21:13:36 -0400 Subject: [PATCH 3/5] precommit fixup --- pandas/core/generic.py | 3 ++- pandas/core/internals/blocks.py | 1 - pandas/tests/series/methods/test_fillna.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 64ee2f2645e6b..8b111cea82bb7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6525,7 +6525,8 @@ def fillna( ) value = value.reindex(self.index, copy=False) # GH-40498: Indices to not apply fillna to are marked with NaN, - # but that will still cause other missing values to be replaced with NaN + # but that will still cause other missing values to be replaced + # with NaN (which is problematic if those aren't NaN) value.loc[self.isna() & value.isna()] = self value = value._values elif not is_list_like(value): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 69f2cc454536b..3fd1ebaca19f0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -72,7 +72,6 @@ is_valid_na_for_dtype, isna, na_value_for_dtype, - notna, ) import pandas.core.algorithms as algos diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 7e40ec509ddb2..d7b873ff14357 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -9,7 +9,6 @@ import pytz from pandas import ( - NA, Categorical, DataFrame, DatetimeIndex, From 39893a7a6d30be6956f7c118793cf6ec26104a28 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 18 Mar 2021 21:14:59 -0400 Subject: [PATCH 4/5] Fix whitespace --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8b111cea82bb7..3a8404d5d6a67 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6524,6 +6524,7 @@ def fillna( value, dtype_if_empty=object ) value = value.reindex(self.index, copy=False) + # GH-40498: Indices to not apply fillna to are marked with NaN, # but that will still cause other missing values to be replaced # with NaN (which is problematic if those aren't NaN) @@ -6537,6 +6538,7 @@ def fillna( "or Series, but you passed a " f'"{type(value).__name__}"' ) + new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast ) From d365360fa4aec25c7272140f4005b8ac8b71f0ab Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 18 Mar 2021 21:15:36 -0400 Subject: [PATCH 5/5] Fix whitespace --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3a8404d5d6a67..5d95412f4b1ee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6538,7 +6538,7 @@ def fillna( "or Series, but you passed a " f'"{type(value).__name__}"' ) - + new_data = self._mgr.fillna( value=value, limit=limit, inplace=inplace, downcast=downcast )