@@ -160,12 +160,6 @@ def f():
160
160
161
161
self .assertRaises (ValueError , f )
162
162
163
- def f ():
164
- with tm .assert_produces_warning (FutureWarning ):
165
- Categorical ([1 , 2 ], [1 , 2 , np .nan , np .nan ])
166
-
167
- self .assertRaises (ValueError , f )
168
-
169
163
# The default should be unordered
170
164
c1 = Categorical (["a" , "b" , "c" , "a" ])
171
165
self .assertFalse (c1 .ordered )
@@ -222,29 +216,19 @@ def f():
222
216
cat = pd .Categorical ([np .nan , 1. , 2. , 3. ])
223
217
self .assertTrue (is_float_dtype (cat .categories ))
224
218
225
- # Deprecating NaNs in categoires (GH #10748)
226
- # preserve int as far as possible by converting to object if NaN is in
227
- # categories
228
- with tm .assert_produces_warning (FutureWarning ):
229
- cat = pd .Categorical ([np .nan , 1 , 2 , 3 ],
230
- categories = [np .nan , 1 , 2 , 3 ])
231
- self .assertTrue (is_object_dtype (cat .categories ))
232
-
233
219
# This doesn't work -> this would probably need some kind of "remember
234
220
# the original type" feature to try to cast the array interface result
235
221
# to...
236
222
237
223
# vals = np.asarray(cat[cat.notnull()])
238
224
# self.assertTrue(is_integer_dtype(vals))
239
- with tm .assert_produces_warning (FutureWarning ):
240
- cat = pd .Categorical ([np .nan , "a" , "b" , "c" ],
241
- categories = [np .nan , "a" , "b" , "c" ])
242
- self .assertTrue (is_object_dtype (cat .categories ))
243
- # but don't do it for floats
244
- with tm .assert_produces_warning (FutureWarning ):
245
- cat = pd .Categorical ([np .nan , 1. , 2. , 3. ],
246
- categories = [np .nan , 1. , 2. , 3. ])
247
- self .assertTrue (is_float_dtype (cat .categories ))
225
+
226
+ # Cannot have NaN in categories
227
+ def f ():
228
+ pd .Categorical ([np .nan , "a" , "b" , "c" ],
229
+ categories = [np .nan , "a" , "b" , "c" ])
230
+
231
+ self .assertRaises (ValueError , f )
248
232
249
233
# corner cases
250
234
cat = pd .Categorical ([1 ])
@@ -418,6 +402,12 @@ def f():
418
402
419
403
self .assertRaises (ValueError , f )
420
404
405
+ # NaN categories included
406
+ def f ():
407
+ Categorical .from_codes ([0 , 1 , 2 ], ["a" , "b" , np .nan ])
408
+
409
+ self .assertRaises (ValueError , f )
410
+
421
411
# too negative
422
412
def f ():
423
413
Categorical .from_codes ([- 2 , 1 , 2 ], ["a" , "b" , "c" ])
@@ -649,30 +639,6 @@ def test_describe(self):
649
639
name = 'categories' ))
650
640
tm .assert_frame_equal (desc , expected )
651
641
652
- # NA as a category
653
- with tm .assert_produces_warning (FutureWarning ):
654
- cat = pd .Categorical (["a" , "c" , "c" , np .nan ],
655
- categories = ["b" , "a" , "c" , np .nan ])
656
- result = cat .describe ()
657
-
658
- expected = DataFrame ([[0 , 0 ], [1 , 0.25 ], [2 , 0.5 ], [1 , 0.25 ]],
659
- columns = ['counts' , 'freqs' ],
660
- index = pd .CategoricalIndex (['b' , 'a' , 'c' , np .nan ],
661
- name = 'categories' ))
662
- tm .assert_frame_equal (result , expected , check_categorical = False )
663
-
664
- # NA as an unused category
665
- with tm .assert_produces_warning (FutureWarning ):
666
- cat = pd .Categorical (["a" , "c" , "c" ],
667
- categories = ["b" , "a" , "c" , np .nan ])
668
- result = cat .describe ()
669
-
670
- exp_idx = pd .CategoricalIndex (
671
- ['b' , 'a' , 'c' , np .nan ], name = 'categories' )
672
- expected = DataFrame ([[0 , 0 ], [1 , 1 / 3. ], [2 , 2 / 3. ], [0 , 0 ]],
673
- columns = ['counts' , 'freqs' ], index = exp_idx )
674
- tm .assert_frame_equal (result , expected , check_categorical = False )
675
-
676
642
def test_print (self ):
677
643
expected = ["[a, b, b, a, a, c, c, c]" ,
678
644
"Categories (3, object): [a < b < c]" ]
@@ -1119,90 +1085,18 @@ def test_nan_handling(self):
1119
1085
self .assert_numpy_array_equal (c ._codes ,
1120
1086
np .array ([0 , - 1 , - 1 , 0 ], dtype = np .int8 ))
1121
1087
1122
- # If categories have nan included, the code should point to that
1123
- # instead
1124
- with tm .assert_produces_warning (FutureWarning ):
1125
- c = Categorical (["a" , "b" , np .nan , "a" ],
1126
- categories = ["a" , "b" , np .nan ])
1127
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1128
- self .assert_numpy_array_equal (c ._codes ,
1129
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1130
- c [1 ] = np .nan
1131
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1132
- self .assert_numpy_array_equal (c ._codes ,
1133
- np .array ([0 , 2 , 2 , 0 ], dtype = np .int8 ))
1134
-
1135
- # Changing categories should also make the replaced category np.nan
1136
- c = Categorical (["a" , "b" , "c" , "a" ])
1137
- with tm .assert_produces_warning (FutureWarning ):
1138
- c .categories = ["a" , "b" , np .nan ] # noqa
1139
-
1140
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1141
- self .assert_numpy_array_equal (c ._codes ,
1142
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1143
-
1144
1088
# Adding nan to categories should make assigned nan point to the
1145
1089
# category!
1146
1090
c = Categorical (["a" , "b" , np .nan , "a" ])
1147
1091
self .assert_index_equal (c .categories , Index (["a" , "b" ]))
1148
1092
self .assert_numpy_array_equal (c ._codes ,
1149
1093
np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1150
- with tm .assert_produces_warning (FutureWarning ):
1151
- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1152
-
1153
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1154
- self .assert_numpy_array_equal (c ._codes ,
1155
- np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1156
- c [1 ] = np .nan
1157
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1158
- self .assert_numpy_array_equal (c ._codes ,
1159
- np .array ([0 , 2 , - 1 , 0 ], dtype = np .int8 ))
1160
-
1161
- # Remove null categories (GH 10156)
1162
- cases = [([1.0 , 2.0 , np .nan ], [1.0 , 2.0 ]),
1163
- (['a' , 'b' , None ], ['a' , 'b' ]),
1164
- ([pd .Timestamp ('2012-05-01' ), pd .NaT ],
1165
- [pd .Timestamp ('2012-05-01' )])]
1166
-
1167
- null_values = [np .nan , None , pd .NaT ]
1168
-
1169
- for with_null , without in cases :
1170
- with tm .assert_produces_warning (FutureWarning ):
1171
- base = Categorical ([], with_null )
1172
- expected = Categorical ([], without )
1173
-
1174
- for nullval in null_values :
1175
- result = base .remove_categories (nullval )
1176
- self .assert_categorical_equal (result , expected )
1177
-
1178
- # Different null values are indistinguishable
1179
- for i , j in [(0 , 1 ), (0 , 2 ), (1 , 2 )]:
1180
- nulls = [null_values [i ], null_values [j ]]
1181
-
1182
- def f ():
1183
- with tm .assert_produces_warning (FutureWarning ):
1184
- Categorical ([], categories = nulls )
1185
-
1186
- self .assertRaises (ValueError , f )
1187
1094
1188
1095
def test_isnull (self ):
1189
1096
exp = np .array ([False , False , True ])
1190
1097
c = Categorical (["a" , "b" , np .nan ])
1191
1098
res = c .isnull ()
1192
- self .assert_numpy_array_equal (res , exp )
1193
-
1194
- with tm .assert_produces_warning (FutureWarning ):
1195
- c = Categorical (["a" , "b" , np .nan ], categories = ["a" , "b" , np .nan ])
1196
- res = c .isnull ()
1197
- self .assert_numpy_array_equal (res , exp )
1198
1099
1199
- # test both nan in categories and as -1
1200
- exp = np .array ([True , False , True ])
1201
- c = Categorical (["a" , "b" , np .nan ])
1202
- with tm .assert_produces_warning (FutureWarning ):
1203
- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1204
- c [0 ] = np .nan
1205
- res = c .isnull ()
1206
1100
self .assert_numpy_array_equal (res , exp )
1207
1101
1208
1102
def test_codes_immutable (self ):
@@ -1487,45 +1381,10 @@ def test_slicing_directly(self):
1487
1381
1488
1382
def test_set_item_nan (self ):
1489
1383
cat = pd .Categorical ([1 , 2 , 3 ])
1490
- exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1491
- cat [1 ] = np .nan
1492
- tm .assert_categorical_equal (cat , exp )
1493
-
1494
- # if nan in categories, the proper code should be set!
1495
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1496
- with tm .assert_produces_warning (FutureWarning ):
1497
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1498
1384
cat [1 ] = np .nan
1499
- exp = np .array ([0 , 3 , 2 , - 1 ], dtype = np .int8 )
1500
- self .assert_numpy_array_equal (cat .codes , exp )
1501
-
1502
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1503
- with tm .assert_produces_warning (FutureWarning ):
1504
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1505
- cat [1 :3 ] = np .nan
1506
- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1507
- self .assert_numpy_array_equal (cat .codes , exp )
1508
-
1509
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1510
- with tm .assert_produces_warning (FutureWarning ):
1511
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1512
- cat [1 :3 ] = [np .nan , 1 ]
1513
- exp = np .array ([0 , 3 , 0 , - 1 ], dtype = np .int8 )
1514
- self .assert_numpy_array_equal (cat .codes , exp )
1515
-
1516
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1517
- with tm .assert_produces_warning (FutureWarning ):
1518
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1519
- cat [1 :3 ] = [np .nan , np .nan ]
1520
- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1521
- self .assert_numpy_array_equal (cat .codes , exp )
1522
1385
1523
- cat = pd .Categorical ([1 , 2 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1524
- with tm .assert_produces_warning (FutureWarning ):
1525
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1526
- cat [pd .isnull (cat )] = np .nan
1527
- exp = np .array ([0 , 1 , 3 , 2 ], dtype = np .int8 )
1528
- self .assert_numpy_array_equal (cat .codes , exp )
1386
+ exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1387
+ tm .assert_categorical_equal (cat , exp )
1529
1388
1530
1389
def test_shift (self ):
1531
1390
# GH 9416
@@ -2026,33 +1885,12 @@ def test_sideeffects_free(self):
2026
1885
2027
1886
def test_nan_handling (self ):
2028
1887
2029
- # Nans are represented as -1 in labels
1888
+ # NaNs are represented as -1 in labels
2030
1889
s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
2031
1890
self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
2032
1891
self .assert_numpy_array_equal (s .values .codes ,
2033
1892
np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
2034
1893
2035
- # If categories have nan included, the label should point to that
2036
- # instead
2037
- with tm .assert_produces_warning (FutureWarning ):
2038
- s2 = Series (Categorical (["a" , "b" , np .nan , "a" ],
2039
- categories = ["a" , "b" , np .nan ]))
2040
-
2041
- exp_cat = Index (["a" , "b" , np .nan ])
2042
- self .assert_index_equal (s2 .cat .categories , exp_cat )
2043
- self .assert_numpy_array_equal (s2 .values .codes ,
2044
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2045
-
2046
- # Changing categories should also make the replaced category np.nan
2047
- s3 = Series (Categorical (["a" , "b" , "c" , "a" ]))
2048
- with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
2049
- s3 .cat .categories = ["a" , "b" , np .nan ]
2050
-
2051
- exp_cat = Index (["a" , "b" , np .nan ])
2052
- self .assert_index_equal (s3 .cat .categories , exp_cat )
2053
- self .assert_numpy_array_equal (s3 .values .codes ,
2054
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2055
-
2056
1894
def test_cat_accessor (self ):
2057
1895
s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
2058
1896
self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
0 commit comments