@@ -187,17 +187,21 @@ def f():
187
187
cat = pd .Categorical ([np .nan , 1. , 2. , 3. ])
188
188
self .assertTrue (com .is_float_dtype (cat .categories ))
189
189
190
+ # Deprecating NaNs in categoires (GH #10748)
190
191
# preserve int as far as possible by converting to object if NaN is in categories
191
- cat = pd .Categorical ([np .nan , 1 , 2 , 3 ], categories = [np .nan , 1 , 2 , 3 ])
192
+ with tm .assert_produces_warning (FutureWarning ):
193
+ cat = pd .Categorical ([np .nan , 1 , 2 , 3 ], categories = [np .nan , 1 , 2 , 3 ])
192
194
self .assertTrue (com .is_object_dtype (cat .categories ))
193
195
# This doesn't work -> this would probably need some kind of "remember the original type"
194
196
# feature to try to cast the array interface result to...
195
197
#vals = np.asarray(cat[cat.notnull()])
196
198
#self.assertTrue(com.is_integer_dtype(vals))
197
- cat = pd .Categorical ([np .nan ,"a" , "b" , "c" ], categories = [np .nan ,"a" , "b" , "c" ])
199
+ with tm .assert_produces_warning (FutureWarning ):
200
+ cat = pd .Categorical ([np .nan ,"a" , "b" , "c" ], categories = [np .nan ,"a" , "b" , "c" ])
198
201
self .assertTrue (com .is_object_dtype (cat .categories ))
199
202
# but don't do it for floats
200
- cat = pd .Categorical ([np .nan , 1. , 2. , 3. ], categories = [np .nan , 1. , 2. , 3. ])
203
+ with tm .assert_produces_warning (FutureWarning ):
204
+ cat = pd .Categorical ([np .nan , 1. , 2. , 3. ], categories = [np .nan , 1. , 2. , 3. ])
201
205
self .assertTrue (com .is_float_dtype (cat .categories ))
202
206
203
207
@@ -465,17 +469,19 @@ def test_describe(self):
465
469
tm .assert_frame_equal (desc , expected )
466
470
467
471
# NA as a category
468
- cat = pd .Categorical (["a" ,"c" ,"c" ,np .nan ], categories = ["b" ,"a" ,"c" ,np .nan ])
469
- result = cat .describe ()
472
+ with tm .assert_produces_warning (FutureWarning ):
473
+ cat = pd .Categorical (["a" ,"c" ,"c" ,np .nan ], categories = ["b" ,"a" ,"c" ,np .nan ])
474
+ result = cat .describe ()
470
475
471
476
expected = DataFrame ([[0 ,0 ],[1 ,0.25 ],[2 ,0.5 ],[1 ,0.25 ]],
472
477
columns = ['counts' ,'freqs' ],
473
478
index = Index (['b' ,'a' ,'c' ,np .nan ],name = 'categories' ))
474
479
tm .assert_frame_equal (result ,expected )
475
480
476
481
# NA as an unused category
477
- cat = pd .Categorical (["a" ,"c" ,"c" ], categories = ["b" ,"a" ,"c" ,np .nan ])
478
- result = cat .describe ()
482
+ with tm .assert_produces_warning (FutureWarning ):
483
+ cat = pd .Categorical (["a" ,"c" ,"c" ], categories = ["b" ,"a" ,"c" ,np .nan ])
484
+ result = cat .describe ()
479
485
480
486
expected = DataFrame ([[0 ,0 ],[1 ,1 / 3. ],[2 ,2 / 3. ],[0 ,0 ]],
481
487
columns = ['counts' ,'freqs' ],
@@ -827,29 +833,37 @@ def test_nan_handling(self):
827
833
self .assert_numpy_array_equal (c ._codes , np .array ([0 ,- 1 ,- 1 ,0 ]))
828
834
829
835
# If categories have nan included, the code should point to that instead
830
- c = Categorical (["a" ,"b" ,np .nan ,"a" ], categories = ["a" ,"b" ,np .nan ])
831
- self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
832
- self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
836
+ with tm .assert_produces_warning (FutureWarning ):
837
+ c = Categorical (["a" ,"b" ,np .nan ,"a" ], categories = ["a" ,"b" ,np .nan ])
838
+ self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],
839
+ dtype = np .object_ ))
840
+ self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
833
841
c [1 ] = np .nan
834
- self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
835
- self .assert_numpy_array_equal (c ._codes , np .array ([0 ,2 ,2 ,0 ]))
842
+ self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],
843
+ dtype = np .object_ ))
844
+ self .assert_numpy_array_equal (c ._codes , np .array ([0 ,2 ,2 ,0 ]))
836
845
837
846
# Changing categories should also make the replaced category np.nan
838
847
c = Categorical (["a" ,"b" ,"c" ,"a" ])
839
- c .categories = ["a" ,"b" ,np .nan ]
840
- self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
841
- self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
848
+ with tm .assert_produces_warning (FutureWarning ):
849
+ c .categories = ["a" ,"b" ,np .nan ]
850
+ self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],
851
+ dtype = np .object_ ))
852
+ self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
842
853
843
854
# Adding nan to categories should make assigned nan point to the category!
844
855
c = Categorical (["a" ,"b" ,np .nan ,"a" ])
845
856
self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ]))
846
857
self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,- 1 ,0 ]))
847
- c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
848
- self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
849
- self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,- 1 ,0 ]))
858
+ with tm .assert_produces_warning (FutureWarning ):
859
+ c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
860
+ self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],
861
+ dtype = np .object_ ))
862
+ self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,- 1 ,0 ]))
850
863
c [1 ] = np .nan
851
- self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
852
- self .assert_numpy_array_equal (c ._codes , np .array ([0 ,2 ,- 1 ,0 ]))
864
+ self .assert_numpy_array_equal (c .categories , np .array (["a" ,"b" ,np .nan ],
865
+ dtype = np .object_ ))
866
+ self .assert_numpy_array_equal (c ._codes , np .array ([0 ,2 ,- 1 ,0 ]))
853
867
854
868
# Remove null categories (GH 10156)
855
869
cases = [
@@ -861,11 +875,13 @@ def test_nan_handling(self):
861
875
null_values = [np .nan , None , pd .NaT ]
862
876
863
877
for with_null , without in cases :
864
- base = Categorical ([], with_null )
878
+ with tm .assert_produces_warning (FutureWarning ):
879
+ base = Categorical ([], with_null )
865
880
expected = Categorical ([], without )
866
881
867
- for nullval in null_values :
868
- result = base .remove_categories (nullval )
882
+ with tm .assert_produces_warning (FutureWarning ):
883
+ for nullval in null_values :
884
+ result = base .remove_categories (nullval )
869
885
self .assert_categorical_equal (result , expected )
870
886
871
887
# Different null values are indistinguishable
@@ -880,14 +896,16 @@ def test_isnull(self):
880
896
res = c .isnull ()
881
897
self .assert_numpy_array_equal (res , exp )
882
898
883
- c = Categorical (["a" ,"b" ,np .nan ], categories = ["a" ,"b" ,np .nan ])
899
+ with tm .assert_produces_warning (FutureWarning ):
900
+ c = Categorical (["a" ,"b" ,np .nan ], categories = ["a" ,"b" ,np .nan ])
884
901
res = c .isnull ()
885
902
self .assert_numpy_array_equal (res , exp )
886
903
887
904
# test both nan in categories and as -1
888
905
exp = np .array ([True , False , True ])
889
906
c = Categorical (["a" ,"b" ,np .nan ])
890
- c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
907
+ with tm .assert_produces_warning (FutureWarning ):
908
+ c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
891
909
c [0 ] = np .nan
892
910
res = c .isnull ()
893
911
self .assert_numpy_array_equal (res , exp )
@@ -1087,31 +1105,36 @@ def test_set_item_nan(self):
1087
1105
1088
1106
# if nan in categories, the proper code should be set!
1089
1107
cat = pd .Categorical ([1 ,2 ,3 , np .nan ], categories = [1 ,2 ,3 ])
1090
- cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1108
+ with tm .assert_produces_warning (FutureWarning ):
1109
+ cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1091
1110
cat [1 ] = np .nan
1092
1111
exp = np .array ([0 ,3 ,2 ,- 1 ])
1093
1112
self .assert_numpy_array_equal (cat .codes , exp )
1094
1113
1095
1114
cat = pd .Categorical ([1 ,2 ,3 , np .nan ], categories = [1 ,2 ,3 ])
1096
- cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1115
+ with tm .assert_produces_warning (FutureWarning ):
1116
+ cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1097
1117
cat [1 :3 ] = np .nan
1098
1118
exp = np .array ([0 ,3 ,3 ,- 1 ])
1099
1119
self .assert_numpy_array_equal (cat .codes , exp )
1100
1120
1101
1121
cat = pd .Categorical ([1 ,2 ,3 , np .nan ], categories = [1 ,2 ,3 ])
1102
- cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1122
+ with tm .assert_produces_warning (FutureWarning ):
1123
+ cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1103
1124
cat [1 :3 ] = [np .nan , 1 ]
1104
1125
exp = np .array ([0 ,3 ,0 ,- 1 ])
1105
1126
self .assert_numpy_array_equal (cat .codes , exp )
1106
1127
1107
1128
cat = pd .Categorical ([1 ,2 ,3 , np .nan ], categories = [1 ,2 ,3 ])
1108
- cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1129
+ with tm .assert_produces_warning (FutureWarning ):
1130
+ cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1109
1131
cat [1 :3 ] = [np .nan , np .nan ]
1110
1132
exp = np .array ([0 ,3 ,3 ,- 1 ])
1111
1133
self .assert_numpy_array_equal (cat .codes , exp )
1112
1134
1113
1135
cat = pd .Categorical ([1 ,2 , np .nan , 3 ], categories = [1 ,2 ,3 ])
1114
- cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1136
+ with tm .assert_produces_warning (FutureWarning ):
1137
+ cat .set_categories ([1 ,2 ,3 , np .nan ], rename = True , inplace = True )
1115
1138
cat [pd .isnull (cat )] = np .nan
1116
1139
exp = np .array ([0 ,1 ,3 ,2 ])
1117
1140
self .assert_numpy_array_equal (cat .codes , exp )
@@ -1555,14 +1578,16 @@ def test_nan_handling(self):
1555
1578
self .assert_numpy_array_equal (s .values .codes , np .array ([0 ,1 ,- 1 ,0 ]))
1556
1579
1557
1580
# If categories have nan included, the label should point to that instead
1558
- s2 = Series (Categorical (["a" ,"b" ,np .nan ,"a" ], categories = ["a" ,"b" ,np .nan ]))
1581
+ with tm .assert_produces_warning (FutureWarning ):
1582
+ s2 = Series (Categorical (["a" ,"b" ,np .nan ,"a" ], categories = ["a" ,"b" ,np .nan ]))
1559
1583
self .assert_numpy_array_equal (s2 .cat .categories ,
1560
1584
np .array (["a" ,"b" ,np .nan ], dtype = np .object_ ))
1561
1585
self .assert_numpy_array_equal (s2 .values .codes , np .array ([0 ,1 ,2 ,0 ]))
1562
1586
1563
1587
# Changing categories should also make the replaced category np.nan
1564
1588
s3 = Series (Categorical (["a" ,"b" ,"c" ,"a" ]))
1565
- s3 .cat .categories = ["a" ,"b" ,np .nan ]
1589
+ with tm .assert_produces_warning (FutureWarning ):
1590
+ s3 .cat .categories = ["a" ,"b" ,np .nan ]
1566
1591
self .assert_numpy_array_equal (s3 .cat .categories ,
1567
1592
np .array (["a" ,"b" ,np .nan ], dtype = np .object_ ))
1568
1593
self .assert_numpy_array_equal (s3 .values .codes , np .array ([0 ,1 ,2 ,0 ]))
@@ -2415,28 +2440,32 @@ def test_value_counts_with_nan(self):
2415
2440
s .value_counts (dropna = False , sort = False ),
2416
2441
pd .Series ([2 , 1 , 3 ], index = ["a" , "b" , np .nan ]))
2417
2442
2418
- s = pd .Series (pd .Categorical (["a" , "b" , "a" ], categories = ["a" , "b" , np .nan ]))
2419
- tm .assert_series_equal (
2420
- s .value_counts (dropna = True ),
2421
- pd .Series ([2 , 1 ], index = ["a" , "b" ]))
2422
- tm .assert_series_equal (
2423
- s .value_counts (dropna = False ),
2424
- pd .Series ([2 , 1 , 0 ], index = ["a" , "b" , np .nan ]))
2443
+ with tm .assert_produces_warning (FutureWarning ):
2444
+ s = pd .Series (pd .Categorical (["a" , "b" , "a" ], categories = ["a" , "b" , np .nan ]))
2445
+ tm .assert_series_equal (
2446
+ s .value_counts (dropna = True ),
2447
+ pd .Series ([2 , 1 ], index = ["a" , "b" ]))
2448
+ tm .assert_series_equal (
2449
+ s .value_counts (dropna = False ),
2450
+ pd .Series ([2 , 1 , 0 ], index = ["a" , "b" , np .nan ]))
2425
2451
2426
- s = pd .Series (pd .Categorical (["a" , "b" , None , "a" , None , None ], categories = ["a" , "b" , np .nan ]))
2427
- tm .assert_series_equal (
2428
- s .value_counts (dropna = True ),
2429
- pd .Series ([2 , 1 ], index = ["a" , "b" ]))
2430
- tm .assert_series_equal (
2431
- s .value_counts (dropna = False ),
2432
- pd .Series ([3 , 2 , 1 ], index = [np .nan , "a" , "b" ]))
2452
+ with tm .assert_produces_warning (FutureWarning ):
2453
+ s = pd .Series (pd .Categorical (["a" , "b" , None , "a" , None , None ],
2454
+ categories = ["a" , "b" , np .nan ]))
2455
+ tm .assert_series_equal (
2456
+ s .value_counts (dropna = True ),
2457
+ pd .Series ([2 , 1 ], index = ["a" , "b" ]))
2458
+ tm .assert_series_equal (
2459
+ s .value_counts (dropna = False ),
2460
+ pd .Series ([3 , 2 , 1 ], index = [np .nan , "a" , "b" ]))
2433
2461
2434
2462
def test_groupby (self ):
2435
2463
2436
2464
cats = Categorical (["a" , "a" , "a" , "b" , "b" , "b" , "c" , "c" , "c" ], categories = ["a" ,"b" ,"c" ,"d" ], ordered = True )
2437
2465
data = DataFrame ({"a" :[1 ,1 ,1 ,2 ,2 ,2 ,3 ,4 ,5 ], "b" :cats })
2438
2466
2439
- expected = DataFrame ({ 'a' : Series ([1 ,2 ,4 ,np .nan ],index = Index (['a' ,'b' ,'c' ,'d' ],name = 'b' )) })
2467
+ expected = DataFrame ({'a' : Series ([1 , 2 , 4 , np .nan ],
2468
+ index = Index (['a' , 'b' , 'c' , 'd' ], name = 'b' ))})
2440
2469
result = data .groupby ("b" ).mean ()
2441
2470
tm .assert_frame_equal (result , expected )
2442
2471
@@ -3454,11 +3483,13 @@ def f():
3454
3483
3455
3484
# make sure that fillna takes both missing values and NA categories into account
3456
3485
c = Categorical (["a" ,"b" ,np .nan ])
3457
- c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
3486
+ with tm .assert_produces_warning (FutureWarning ):
3487
+ c .set_categories (["a" ,"b" ,np .nan ], rename = True , inplace = True )
3458
3488
c [0 ] = np .nan
3459
3489
df = pd .DataFrame ({"cats" :c , "vals" :[1 ,2 ,3 ]})
3460
3490
df_exp = pd .DataFrame ({"cats" : Categorical (["a" ,"b" ,"a" ]), "vals" : [1 ,2 ,3 ]})
3461
- res = df .fillna ("a" )
3491
+ with tm .assert_produces_warning (FutureWarning ):
3492
+ res = df .fillna ("a" )
3462
3493
tm .assert_frame_equal (res , df_exp )
3463
3494
3464
3495
0 commit comments