From a40577751f0b42664d3f3d727d99f56e512214d8 Mon Sep 17 00:00:00 2001 From: John Liekezer Date: Fri, 19 Aug 2016 19:46:43 +0300 Subject: [PATCH] BUG: Dataframe.fillna with np.nan for dtype=category(GH 14021) --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/categorical.py | 5 ++++- pandas/tests/test_categorical.py | 25 +++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index cc3cc631b9575..7ba832c39f759 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1061,6 +1061,7 @@ Bug Fixes - Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) - Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`) - Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`) +- Bug in ``DataFrame(..., dtype='category').fillna(value=np.nan)`` raise ``KeyError`` (:issue:`14021`) - Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) - Bug in invalid frequency offset string like "D1", "-2-3H" may not raise ``ValueError (:issue:`13930`) - Bug in ``concat`` and ``groupby`` for hierarchical frames with ``RangeIndex`` levels (:issue:`13542`). diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 6ea0a5e96672d..f91ca0feed34e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1444,7 +1444,10 @@ def fillna(self, value=None, method=None, limit=None): mask = values == -1 if mask.any(): values = values.copy() - values[mask] = self.categories.get_loc(value) + if isnull(value): + values[mask] = -1 + else: + values[mask] = self.categories.get_loc(value) return self._constructor(values, categories=self.categories, ordered=self.ordered, fastpath=True) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index b630e0914259e..bba0bbfb41503 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4100,6 +4100,31 @@ def f(): res = df.fillna("a") tm.assert_frame_equal(res, df_exp) + # GH 14021 + cat = Categorical([np.nan, 2, np.nan]) + val = Categorical([np.nan, np.nan, np.nan]) + df = DataFrame({"cats": cat, "vals": val}) + res = df.fillna(df.median()) + v_exp = [np.nan, np.nan, np.nan] + df_exp = pd.DataFrame({"cats": [2, 2, 2], "vals": v_exp}, + dtype='category') + tm.assert_frame_equal(res, df_exp) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', + '2011-01-01 09:00', pd.NaT, pd.NaT]) + df = DataFrame({'a': pd.Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', + pd.NaT, pd.NaT], freq='M') + df = DataFrame({'a': pd.Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.TimedeltaIndex(['1 days', '2 days', + '1 days', pd.NaT, pd.NaT]) + df = pd.DataFrame({'a': pd.Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + def test_astype_to_other(self): s = self.cat['value_group']