7
7
import numpy as np
8
8
import pandas as pd
9
9
10
- from pandas import (Categorical , Index , Series , DataFrame , PeriodIndex , Timestamp )
10
+ from pandas import (Categorical , Index , Series , DataFrame , PeriodIndex ,
11
+ Timestamp , _np_version_under1p7 )
11
12
12
13
import pandas .core .common as com
13
14
import pandas .compat as compat
@@ -345,12 +346,12 @@ def test_remove_unused_levels(self):
345
346
346
347
def test_nan_handling (self ):
347
348
348
- # Nans are represented as -1 in labels
349
+ # Nans are represented as -1 in codes
349
350
c = Categorical (["a" ,"b" ,np .nan ,"a" ])
350
351
self .assert_numpy_array_equal (c .levels , np .array (["a" ,"b" ]))
351
352
self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,- 1 ,0 ]))
352
353
353
- # If levels have nan included, the label should point to that instead
354
+ # If levels have nan included, the code should point to that instead
354
355
c = Categorical (["a" ,"b" ,np .nan ,"a" ], levels = ["a" ,"b" ,np .nan ])
355
356
self .assert_numpy_array_equal (c .levels , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
356
357
self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
@@ -361,6 +362,36 @@ def test_nan_handling(self):
361
362
self .assert_numpy_array_equal (c .levels , np .array (["a" ,"b" ,np .nan ],dtype = np .object_ ))
362
363
self .assert_numpy_array_equal (c ._codes , np .array ([0 ,1 ,2 ,0 ]))
363
364
365
+ def test_codes_immutable (self ):
366
+
367
+ # Codes should be read only
368
+ c = Categorical (["a" ,"b" ,"c" ,"a" , np .nan ])
369
+ exp = np .array ([0 ,1 ,2 ,0 , - 1 ])
370
+ self .assert_numpy_array_equal (c .codes , exp )
371
+
372
+ # Assignments to codes should raise
373
+ def f ():
374
+ c .codes = np .array ([0 ,1 ,2 ,0 ,1 ])
375
+ self .assertRaises (ValueError , f )
376
+
377
+ # changes in the codes array should raise
378
+ # np 1.6.1 raises RuntimeError rather than ValueError
379
+ codes = c .codes
380
+ def f ():
381
+ codes [4 ] = 1
382
+ if _np_version_under1p7 :
383
+ self .assertRaises (RuntimeError , f )
384
+ else :
385
+ self .assertRaises (ValueError , f )
386
+
387
+ # But even after getting the codes, the original array should still be writeable!
388
+ c [4 ] = "a"
389
+ exp = np .array ([0 ,1 ,2 ,0 , 0 ])
390
+ self .assert_numpy_array_equal (c .codes , exp )
391
+ c ._codes [4 ] = 2
392
+ exp = np .array ([0 ,1 ,2 ,0 , 2 ])
393
+ self .assert_numpy_array_equal (c .codes , exp )
394
+
364
395
365
396
def test_min_max (self ):
366
397
@@ -549,6 +580,19 @@ def test_creation_astype(self):
549
580
res = s .astype ('category' )
550
581
tm .assert_series_equal (res , exp )
551
582
583
+ df = pd .DataFrame ({"cats" :[1 ,2 ,3 ,4 ,5 ,6 ], "vals" :[1 ,2 ,3 ,4 ,5 ,6 ]})
584
+ cats = Categorical ([1 ,2 ,3 ,4 ,5 ,6 ])
585
+ exp_df = pd .DataFrame ({"cats" :cats , "vals" :[1 ,2 ,3 ,4 ,5 ,6 ]})
586
+ df ["cats" ] = df ["cats" ].astype ("category" )
587
+ tm .assert_frame_equal (exp_df , df )
588
+
589
+
590
+ df = pd .DataFrame ({"cats" :['a' , 'b' , 'b' , 'a' , 'a' , 'd' ], "vals" :[1 ,2 ,3 ,4 ,5 ,6 ]})
591
+ cats = Categorical (['a' , 'b' , 'b' , 'a' , 'a' , 'd' ])
592
+ exp_df = pd .DataFrame ({"cats" :cats , "vals" :[1 ,2 ,3 ,4 ,5 ,6 ]})
593
+ df ["cats" ] = df ["cats" ].astype ("category" )
594
+ tm .assert_frame_equal (exp_df , df )
595
+
552
596
def test_sideeffects_free (self ):
553
597
554
598
# Passing a categorical to a Series and then changing values in either the series or the
0 commit comments