|
10 | 10 |
|
11 | 11 | from pandas.compat import lrange
|
12 | 12 | from pandas import (DataFrame, Series, Timestamp,
|
13 |
| - date_range) |
| 13 | + date_range, Categorical) |
14 | 14 | import pandas as pd
|
15 | 15 |
|
16 | 16 | from pandas.util.testing import assert_series_equal, assert_frame_equal
|
@@ -270,6 +270,81 @@ def test_fillna(self):
|
270 | 270 | pd.Timestamp('2012-11-11 00:00:00+01:00')]})
|
271 | 271 | assert_frame_equal(df.fillna(method='bfill'), exp)
|
272 | 272 |
|
| 273 | + def test_na_actions_categorical(self): |
| 274 | + |
| 275 | + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) |
| 276 | + vals = ["a", "b", np.nan, "d"] |
| 277 | + df = DataFrame({"cats": cat, "vals": vals}) |
| 278 | + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) |
| 279 | + vals2 = ["a", "b", "b", "d"] |
| 280 | + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) |
| 281 | + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) |
| 282 | + vals3 = ["a", "b", np.nan] |
| 283 | + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) |
| 284 | + cat4 = Categorical([1, 2], categories=[1, 2, 3]) |
| 285 | + vals4 = ["a", "b"] |
| 286 | + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) |
| 287 | + |
| 288 | + # fillna |
| 289 | + res = df.fillna(value={"cats": 3, "vals": "b"}) |
| 290 | + tm.assert_frame_equal(res, df_exp_fill) |
| 291 | + |
| 292 | + with tm.assert_raises_regex(ValueError, "fill value must be " |
| 293 | + "in categories"): |
| 294 | + df.fillna(value={"cats": 4, "vals": "c"}) |
| 295 | + |
| 296 | + res = df.fillna(method='pad') |
| 297 | + tm.assert_frame_equal(res, df_exp_fill) |
| 298 | + |
| 299 | + # dropna |
| 300 | + res = df.dropna(subset=["cats"]) |
| 301 | + tm.assert_frame_equal(res, df_exp_drop_cats) |
| 302 | + |
| 303 | + res = df.dropna() |
| 304 | + tm.assert_frame_equal(res, df_exp_drop_all) |
| 305 | + |
| 306 | + # make sure that fillna takes missing values into account |
| 307 | + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) |
| 308 | + df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) |
| 309 | + |
| 310 | + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) |
| 311 | + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) |
| 312 | + |
| 313 | + res = df.fillna("a") |
| 314 | + tm.assert_frame_equal(res, df_exp) |
| 315 | + |
| 316 | + def test_fillna_categorical_nan(self): |
| 317 | + # GH 14021 |
| 318 | + # np.nan should always be a valid filler |
| 319 | + cat = Categorical([np.nan, 2, np.nan]) |
| 320 | + val = Categorical([np.nan, np.nan, np.nan]) |
| 321 | + df = DataFrame({"cats": cat, "vals": val}) |
| 322 | + res = df.fillna(df.median()) |
| 323 | + v_exp = [np.nan, np.nan, np.nan] |
| 324 | + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, |
| 325 | + dtype='category') |
| 326 | + tm.assert_frame_equal(res, df_exp) |
| 327 | + |
| 328 | + result = df.cats.fillna(np.nan) |
| 329 | + tm.assert_series_equal(result, df.cats) |
| 330 | + result = df.vals.fillna(np.nan) |
| 331 | + tm.assert_series_equal(result, df.vals) |
| 332 | + |
| 333 | + idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', |
| 334 | + '2011-01-01 09:00', pd.NaT, pd.NaT]) |
| 335 | + df = DataFrame({'a': Categorical(idx)}) |
| 336 | + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) |
| 337 | + |
| 338 | + idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', |
| 339 | + pd.NaT, pd.NaT], freq='M') |
| 340 | + df = DataFrame({'a': Categorical(idx)}) |
| 341 | + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) |
| 342 | + |
| 343 | + idx = pd.TimedeltaIndex(['1 days', '2 days', |
| 344 | + '1 days', pd.NaT, pd.NaT]) |
| 345 | + df = DataFrame({'a': Categorical(idx)}) |
| 346 | + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) |
| 347 | + |
273 | 348 | def test_fillna_downcast(self):
|
274 | 349 | # GH 15277
|
275 | 350 | # infer int64 from float64
|
@@ -489,7 +564,7 @@ def test_fillna_invalid_value(self):
|
489 | 564 | # tuple
|
490 | 565 | pytest.raises(TypeError, self.frame.fillna, (1, 2))
|
491 | 566 | # frame with series
|
492 |
| - pytest.raises(ValueError, self.frame.iloc[:, 0].fillna, self.frame) |
| 567 | + pytest.raises(TypeError, self.frame.iloc[:, 0].fillna, self.frame) |
493 | 568 |
|
494 | 569 | def test_fillna_col_reordering(self):
|
495 | 570 | cols = ["COL." + str(i) for i in range(5, 0, -1)]
|
|
0 commit comments