@@ -160,12 +160,6 @@ def f():
160
160
161
161
self .assertRaises (ValueError , f )
162
162
163
- def f ():
164
- with tm .assert_produces_warning (FutureWarning ):
165
- Categorical ([1 , 2 ], [1 , 2 , np .nan , np .nan ])
166
-
167
- self .assertRaises (ValueError , f )
168
-
169
163
# The default should be unordered
170
164
c1 = Categorical (["a" , "b" , "c" , "a" ])
171
165
self .assertFalse (c1 .ordered )
@@ -222,29 +216,21 @@ def f():
222
216
cat = pd .Categorical ([np .nan , 1. , 2. , 3. ])
223
217
self .assertTrue (is_float_dtype (cat .categories ))
224
218
225
- # Deprecating NaNs in categoires (GH #10748)
226
- # preserve int as far as possible by converting to object if NaN is in
227
- # categories
228
- with tm .assert_produces_warning (FutureWarning ):
229
- cat = pd .Categorical ([np .nan , 1 , 2 , 3 ],
230
- categories = [np .nan , 1 , 2 , 3 ])
231
- self .assertTrue (is_object_dtype (cat .categories ))
232
-
233
219
# This doesn't work -> this would probably need some kind of "remember
234
220
# the original type" feature to try to cast the array interface result
235
221
# to...
236
222
237
223
# vals = np.asarray(cat[cat.notnull()])
238
224
# self.assertTrue(is_integer_dtype(vals))
239
- with tm . assert_produces_warning ( FutureWarning ):
240
- cat = pd . Categorical ([ np . nan , "a" , "b" , "c" ],
241
- categories = [ np . nan , "a" , "b" , "c" ])
242
- self . assertTrue ( is_object_dtype ( cat . categories ))
243
- # but don't do it for floats
244
- with tm . assert_produces_warning ( FutureWarning ):
245
- cat = pd . Categorical ([ np . nan , 1. , 2. , 3. ],
246
- categories = [ np . nan , 1. , 2. , 3. ] )
247
- self .assertTrue ( is_float_dtype ( cat . categories ) )
225
+
226
+ # Cannot have NaN in categories
227
+ def f ( null_value ):
228
+ pd . Categorical ([ null_value , "a" , "b" , "c" ],
229
+ categories = [ null_value , "a" , "b" , "c" ])
230
+
231
+ self . assertRaises ( ValueError , f , np . nan )
232
+ self . assertRaises ( ValueError , f , pd . NaT )
233
+ self .assertRaises ( ValueError , f , None )
248
234
249
235
# corner cases
250
236
cat = pd .Categorical ([1 ])
@@ -418,6 +404,12 @@ def f():
418
404
419
405
self .assertRaises (ValueError , f )
420
406
407
+ # NaN categories included
408
+ def f ():
409
+ Categorical .from_codes ([0 , 1 , 2 ], ["a" , "b" , np .nan ])
410
+
411
+ self .assertRaises (ValueError , f )
412
+
421
413
# too negative
422
414
def f ():
423
415
Categorical .from_codes ([- 2 , 1 , 2 ], ["a" , "b" , "c" ])
@@ -649,30 +641,6 @@ def test_describe(self):
649
641
name = 'categories' ))
650
642
tm .assert_frame_equal (desc , expected )
651
643
652
- # NA as a category
653
- with tm .assert_produces_warning (FutureWarning ):
654
- cat = pd .Categorical (["a" , "c" , "c" , np .nan ],
655
- categories = ["b" , "a" , "c" , np .nan ])
656
- result = cat .describe ()
657
-
658
- expected = DataFrame ([[0 , 0 ], [1 , 0.25 ], [2 , 0.5 ], [1 , 0.25 ]],
659
- columns = ['counts' , 'freqs' ],
660
- index = pd .CategoricalIndex (['b' , 'a' , 'c' , np .nan ],
661
- name = 'categories' ))
662
- tm .assert_frame_equal (result , expected , check_categorical = False )
663
-
664
- # NA as an unused category
665
- with tm .assert_produces_warning (FutureWarning ):
666
- cat = pd .Categorical (["a" , "c" , "c" ],
667
- categories = ["b" , "a" , "c" , np .nan ])
668
- result = cat .describe ()
669
-
670
- exp_idx = pd .CategoricalIndex (
671
- ['b' , 'a' , 'c' , np .nan ], name = 'categories' )
672
- expected = DataFrame ([[0 , 0 ], [1 , 1 / 3. ], [2 , 2 / 3. ], [0 , 0 ]],
673
- columns = ['counts' , 'freqs' ], index = exp_idx )
674
- tm .assert_frame_equal (result , expected , check_categorical = False )
675
-
676
644
def test_print (self ):
677
645
expected = ["[a, b, b, a, a, c, c, c]" ,
678
646
"Categories (3, object): [a < b < c]" ]
@@ -1119,90 +1087,18 @@ def test_nan_handling(self):
1119
1087
self .assert_numpy_array_equal (c ._codes ,
1120
1088
np .array ([0 , - 1 , - 1 , 0 ], dtype = np .int8 ))
1121
1089
1122
- # If categories have nan included, the code should point to that
1123
- # instead
1124
- with tm .assert_produces_warning (FutureWarning ):
1125
- c = Categorical (["a" , "b" , np .nan , "a" ],
1126
- categories = ["a" , "b" , np .nan ])
1127
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1128
- self .assert_numpy_array_equal (c ._codes ,
1129
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1130
- c [1 ] = np .nan
1131
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1132
- self .assert_numpy_array_equal (c ._codes ,
1133
- np .array ([0 , 2 , 2 , 0 ], dtype = np .int8 ))
1134
-
1135
- # Changing categories should also make the replaced category np.nan
1136
- c = Categorical (["a" , "b" , "c" , "a" ])
1137
- with tm .assert_produces_warning (FutureWarning ):
1138
- c .categories = ["a" , "b" , np .nan ] # noqa
1139
-
1140
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1141
- self .assert_numpy_array_equal (c ._codes ,
1142
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1143
-
1144
1090
# Adding nan to categories should make assigned nan point to the
1145
1091
# category!
1146
1092
c = Categorical (["a" , "b" , np .nan , "a" ])
1147
1093
self .assert_index_equal (c .categories , Index (["a" , "b" ]))
1148
1094
self .assert_numpy_array_equal (c ._codes ,
1149
1095
np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1150
- with tm .assert_produces_warning (FutureWarning ):
1151
- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1152
-
1153
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1154
- self .assert_numpy_array_equal (c ._codes ,
1155
- np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1156
- c [1 ] = np .nan
1157
- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1158
- self .assert_numpy_array_equal (c ._codes ,
1159
- np .array ([0 , 2 , - 1 , 0 ], dtype = np .int8 ))
1160
-
1161
- # Remove null categories (GH 10156)
1162
- cases = [([1.0 , 2.0 , np .nan ], [1.0 , 2.0 ]),
1163
- (['a' , 'b' , None ], ['a' , 'b' ]),
1164
- ([pd .Timestamp ('2012-05-01' ), pd .NaT ],
1165
- [pd .Timestamp ('2012-05-01' )])]
1166
-
1167
- null_values = [np .nan , None , pd .NaT ]
1168
-
1169
- for with_null , without in cases :
1170
- with tm .assert_produces_warning (FutureWarning ):
1171
- base = Categorical ([], with_null )
1172
- expected = Categorical ([], without )
1173
-
1174
- for nullval in null_values :
1175
- result = base .remove_categories (nullval )
1176
- self .assert_categorical_equal (result , expected )
1177
-
1178
- # Different null values are indistinguishable
1179
- for i , j in [(0 , 1 ), (0 , 2 ), (1 , 2 )]:
1180
- nulls = [null_values [i ], null_values [j ]]
1181
-
1182
- def f ():
1183
- with tm .assert_produces_warning (FutureWarning ):
1184
- Categorical ([], categories = nulls )
1185
-
1186
- self .assertRaises (ValueError , f )
1187
1096
1188
1097
def test_isnull (self ):
1189
1098
exp = np .array ([False , False , True ])
1190
1099
c = Categorical (["a" , "b" , np .nan ])
1191
1100
res = c .isnull ()
1192
- self .assert_numpy_array_equal (res , exp )
1193
-
1194
- with tm .assert_produces_warning (FutureWarning ):
1195
- c = Categorical (["a" , "b" , np .nan ], categories = ["a" , "b" , np .nan ])
1196
- res = c .isnull ()
1197
- self .assert_numpy_array_equal (res , exp )
1198
1101
1199
- # test both nan in categories and as -1
1200
- exp = np .array ([True , False , True ])
1201
- c = Categorical (["a" , "b" , np .nan ])
1202
- with tm .assert_produces_warning (FutureWarning ):
1203
- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1204
- c [0 ] = np .nan
1205
- res = c .isnull ()
1206
1102
self .assert_numpy_array_equal (res , exp )
1207
1103
1208
1104
def test_codes_immutable (self ):
@@ -1487,45 +1383,10 @@ def test_slicing_directly(self):
1487
1383
1488
1384
def test_set_item_nan (self ):
1489
1385
cat = pd .Categorical ([1 , 2 , 3 ])
1490
- exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1491
1386
cat [1 ] = np .nan
1492
- tm .assert_categorical_equal (cat , exp )
1493
1387
1494
- # if nan in categories, the proper code should be set!
1495
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1496
- with tm .assert_produces_warning (FutureWarning ):
1497
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1498
- cat [1 ] = np .nan
1499
- exp = np .array ([0 , 3 , 2 , - 1 ], dtype = np .int8 )
1500
- self .assert_numpy_array_equal (cat .codes , exp )
1501
-
1502
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1503
- with tm .assert_produces_warning (FutureWarning ):
1504
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1505
- cat [1 :3 ] = np .nan
1506
- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1507
- self .assert_numpy_array_equal (cat .codes , exp )
1508
-
1509
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1510
- with tm .assert_produces_warning (FutureWarning ):
1511
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1512
- cat [1 :3 ] = [np .nan , 1 ]
1513
- exp = np .array ([0 , 3 , 0 , - 1 ], dtype = np .int8 )
1514
- self .assert_numpy_array_equal (cat .codes , exp )
1515
-
1516
- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1517
- with tm .assert_produces_warning (FutureWarning ):
1518
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1519
- cat [1 :3 ] = [np .nan , np .nan ]
1520
- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1521
- self .assert_numpy_array_equal (cat .codes , exp )
1522
-
1523
- cat = pd .Categorical ([1 , 2 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1524
- with tm .assert_produces_warning (FutureWarning ):
1525
- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1526
- cat [pd .isnull (cat )] = np .nan
1527
- exp = np .array ([0 , 1 , 3 , 2 ], dtype = np .int8 )
1528
- self .assert_numpy_array_equal (cat .codes , exp )
1388
+ exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1389
+ tm .assert_categorical_equal (cat , exp )
1529
1390
1530
1391
def test_shift (self ):
1531
1392
# GH 9416
@@ -2026,33 +1887,12 @@ def test_sideeffects_free(self):
2026
1887
2027
1888
def test_nan_handling (self ):
2028
1889
2029
- # Nans are represented as -1 in labels
1890
+ # NaNs are represented as -1 in labels
2030
1891
s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
2031
1892
self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
2032
1893
self .assert_numpy_array_equal (s .values .codes ,
2033
1894
np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
2034
1895
2035
- # If categories have nan included, the label should point to that
2036
- # instead
2037
- with tm .assert_produces_warning (FutureWarning ):
2038
- s2 = Series (Categorical (["a" , "b" , np .nan , "a" ],
2039
- categories = ["a" , "b" , np .nan ]))
2040
-
2041
- exp_cat = Index (["a" , "b" , np .nan ])
2042
- self .assert_index_equal (s2 .cat .categories , exp_cat )
2043
- self .assert_numpy_array_equal (s2 .values .codes ,
2044
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2045
-
2046
- # Changing categories should also make the replaced category np.nan
2047
- s3 = Series (Categorical (["a" , "b" , "c" , "a" ]))
2048
- with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
2049
- s3 .cat .categories = ["a" , "b" , np .nan ]
2050
-
2051
- exp_cat = Index (["a" , "b" , np .nan ])
2052
- self .assert_index_equal (s3 .cat .categories , exp_cat )
2053
- self .assert_numpy_array_equal (s3 .values .codes ,
2054
- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2055
-
2056
1896
def test_cat_accessor (self ):
2057
1897
s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
2058
1898
self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
0 commit comments