@@ -217,6 +217,16 @@ def test_describe(self):
217
217
).set_index ('levels' )
218
218
tm .assert_frame_equal (desc , expected )
219
219
220
+ # check unused levels
221
+ cat = self .factor .copy ()
222
+ cat .levels = ["a" ,"b" ,"c" ,"d" ]
223
+ desc = cat .describe ()
224
+ expected = DataFrame .from_dict (dict (counts = [3 , 2 , 3 , np .nan ],
225
+ freqs = [3 / 8. , 2 / 8. , 3 / 8. , np .nan ],
226
+ levels = ['a' , 'b' , 'c' , 'd' ])
227
+ ).set_index ('levels' )
228
+ tm .assert_frame_equal (desc , expected )
229
+
220
230
# check an integer one
221
231
desc = Categorical ([1 ,2 ,3 ,1 ,2 ,3 ,3 ,2 ,1 ,1 ,1 ]).describe ()
222
232
expected = DataFrame .from_dict (dict (counts = [5 , 3 , 3 ],
@@ -226,6 +236,29 @@ def test_describe(self):
226
236
).set_index ('levels' )
227
237
tm .assert_frame_equal (desc , expected )
228
238
239
+ # https://github.com/pydata/pandas/issues/3678
240
+ # describe should work with NaN
241
+ cat = pd .Categorical ([np .nan ,1 , 2 , 2 ])
242
+ desc = cat .describe ()
243
+ expected = DataFrame .from_dict (dict (counts = [1 , 2 , 1 ],
244
+ freqs = [1 / 4. , 2 / 4. , 1 / 4. ],
245
+ levels = [1 ,2 ,np .nan ]
246
+ )
247
+ ).set_index ('levels' )
248
+ tm .assert_frame_equal (desc , expected )
249
+
250
+ # having NaN as level and as "not available" should also print two NaNs in describe!
251
+ cat = pd .Categorical ([np .nan ,1 , 2 , 2 ])
252
+ cat .levels = [1 ,2 ,np .nan ]
253
+ desc = cat .describe ()
254
+ expected = DataFrame .from_dict (dict (counts = [1 , 2 , np .nan , 1 ],
255
+ freqs = [1 / 4. , 2 / 4. , np .nan , 1 / 4. ],
256
+ levels = [1 ,2 ,np .nan ,np .nan ]
257
+ )
258
+ ).set_index ('levels' )
259
+ tm .assert_frame_equal (desc , expected )
260
+
261
+
229
262
def test_print (self ):
230
263
expected = [" a" , " b" , " b" , " a" , " a" , " c" , " c" , " c" ,
231
264
"Levels (3, object): [a < b < c]" ]
0 commit comments