From 6ed5f08fcdd6e90addf83bbadf6e6c384f590f81 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 15 Aug 2019 14:26:58 +0100 Subject: [PATCH 1/6] Ensure that :func: in :class: only replaces null values --- doc/source/whatsnew/v0.25.1.rst | 2 ++ pandas/core/arrays/categorical.py | 6 ++---- pandas/tests/series/test_missing.py | 22 ++++++++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 21f1fa7ddec1f..323e73f225717 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -21,6 +21,8 @@ Other enhancements Bug fixes ~~~~~~~~~ +- Bug in :func:`fill_na` in :class:`Categorical` would replace all values, not just those that are NaN (:issue:`26215`) + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bbbeb812d1fe9..1f65d9e553d5e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1824,7 +1824,6 @@ def fillna(self, value=None, method=None, limit=None): # pad / bfill if method is not None: - values = self.to_dense().reshape(-1, len(self)) values = interpolate_2d(values, method, 0, None, value).astype( self.categories.dtype @@ -1838,10 +1837,9 @@ def fillna(self, value=None, method=None, limit=None): if isinstance(value, ABCSeries): if not value[~value.isin(self.categories)].isna().all(): raise ValueError("fill value must be in categories") - values_codes = _get_codes_for_values(value, self.categories) - indexer = np.where(values_codes != -1) - codes[indexer] = values_codes[values_codes != -1] + indexer = np.where(codes == -1) + codes[indexer] = values_codes[codes == -1] # If value is not a dict or Series it should be a scalar elif is_hashable(value): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index f1b84acf68755..e5d33001b7b80 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -578,6 +578,28 @@ def test_fillna_categorical(self, fill_value, expected_output): exp = Series(Categorical(expected_output, categories=["a", "b"])) tm.assert_series_equal(s.fillna(fill_value), exp) + @pytest.mark.parametrize( + "new_categories, fill_value, expected_output", + [ + ( + ["c", "d", "e"], + Series(["a", "b", "c", "d", "e"]), + ["a", "b", "b", "d", "e"], + ) + ], + ) + def test_fillna_categorical_with_new_categories( + self, new_categories, fill_value, expected_output + ): + # GH 26215 + data = ["a", np.nan, "b", np.nan, np.nan] + s = Series(Categorical(data, categories=["a", "b"])) + s.cat.add_categories(new_categories, inplace=True) + exp = Series( + Categorical(expected_output, categories=["a", "b"] + new_categories) + ) + tm.assert_series_equal(s.fillna(fill_value), exp) + def test_fillna_categorical_raise(self): data = ["a", np.nan, "b", np.nan, np.nan] s = Series(Categorical(data, categories=["a", "b"])) From 90bfabcadd84f70f031027c08263e1b4bb2b422d Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 15 Aug 2019 15:05:39 +0100 Subject: [PATCH 2/6] Remove argument new_categories from test function, add test case where fill value series values are in different order --- pandas/tests/series/test_missing.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index e5d33001b7b80..41c2e899c21ef 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -579,25 +579,17 @@ def test_fillna_categorical(self, fill_value, expected_output): tm.assert_series_equal(s.fillna(fill_value), exp) @pytest.mark.parametrize( - "new_categories, fill_value, expected_output", + "fill_value, expected_output", [ - ( - ["c", "d", "e"], - Series(["a", "b", "c", "d", "e"]), - ["a", "b", "b", "d", "e"], - ) + (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), + (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), ], ) - def test_fillna_categorical_with_new_categories( - self, new_categories, fill_value, expected_output - ): + def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): # GH 26215 data = ["a", np.nan, "b", np.nan, np.nan] - s = Series(Categorical(data, categories=["a", "b"])) - s.cat.add_categories(new_categories, inplace=True) - exp = Series( - Categorical(expected_output, categories=["a", "b"] + new_categories) - ) + s = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) + exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) tm.assert_series_equal(s.fillna(fill_value), exp) def test_fillna_categorical_raise(self): From e088eaba1ba16618c9aa05c587b900975ba33931 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Thu, 15 Aug 2019 20:22:36 +0100 Subject: [PATCH 3/6] Reformat whatsnew entry --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 323e73f225717..047f3c2e19cb2 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -21,7 +21,7 @@ Other enhancements Bug fixes ~~~~~~~~~ -- Bug in :func:`fill_na` in :class:`Categorical` would replace all values, not just those that are NaN (:issue:`26215`) +- Bug in :func:`fill_na` in :meth:`Categorical.fillna` would replace all values, not just those that are NaN (:issue:`26215`) Categorical From 550302ab498e662b0305c72928a6200bdb9753b8 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Thu, 15 Aug 2019 20:27:21 +0100 Subject: [PATCH 4/6] Clean up --- pandas/core/arrays/categorical.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1f65d9e553d5e..a895da6184eeb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1824,6 +1824,7 @@ def fillna(self, value=None, method=None, limit=None): # pad / bfill if method is not None: + values = self.to_dense().reshape(-1, len(self)) values = interpolate_2d(values, method, 0, None, value).astype( self.categories.dtype @@ -1837,9 +1838,10 @@ def fillna(self, value=None, method=None, limit=None): if isinstance(value, ABCSeries): if not value[~value.isin(self.categories)].isna().all(): raise ValueError("fill value must be in categories") + values_codes = _get_codes_for_values(value, self.categories) indexer = np.where(codes == -1) - codes[indexer] = values_codes[codes == -1] + codes[indexer] = values_codes[indexer] # If value is not a dict or Series it should be a scalar elif is_hashable(value): From 9e0885794574205588203e5269a7848b160e0a77 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Thu, 15 Aug 2019 22:10:42 +0100 Subject: [PATCH 5/6] Add test case in which categories of fill values are in different order to categories of origin series --- pandas/tests/series/test_missing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 41c2e899c21ef..ddd2c566f4cda 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -583,6 +583,14 @@ def test_fillna_categorical(self, fill_value, expected_output): [ (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), + ( + Series( + Categorical( + ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] + ) + ), + ["a", "d", "b", "d", "a"], + ), ], ) def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): From a679508f2cf7c3b056106cd92fab6671f72cff66 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 16 Aug 2019 09:35:44 +0100 Subject: [PATCH 6/6] Correct typo and reformat whatsnew entry --- doc/source/whatsnew/v0.25.1.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 047f3c2e19cb2..647bc750907ce 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -21,13 +21,11 @@ Other enhancements Bug fixes ~~~~~~~~~ -- Bug in :func:`fill_na` in :meth:`Categorical.fillna` would replace all values, not just those that are NaN (:issue:`26215`) - Categorical ^^^^^^^^^^^ -- +- Bug in :meth:`Categorical.fillna` would replace all values, not just those that are ``NaN`` (:issue:`26215`) - -