@@ -2174,51 +2174,86 @@ def test_basic(self):
2174
2174
result = x .person_name .loc [0 ]
2175
2175
assert result == expected
2176
2176
2177
- def test_creation_astype (self ):
2178
- l = ["a" , "b" , "c" , "a" ]
2179
- s = pd .Series (l )
2180
- exp = pd .Series (Categorical (l ))
2181
- res = s .astype ('category' )
2177
+ def test_series_creation_astype (self ):
2178
+ labels = list ('abca' )
2179
+ exp = Series (Categorical (labels ))
2180
+ res = Series (labels ).astype ('category' )
2182
2181
tm .assert_series_equal (res , exp )
2183
2182
2184
- l = [1 , 2 , 3 , 1 ]
2185
- s = pd .Series (l )
2186
- exp = pd .Series (Categorical (l ))
2187
- res = s .astype ('category' )
2183
+ labels = [1 , 2 , 3 , 1 ]
2184
+ exp = Series (Categorical (labels ))
2185
+ res = Series (labels ).astype ('category' )
2188
2186
tm .assert_series_equal (res , exp )
2189
2187
2190
- df = pd .DataFrame ({"cats" : [1 , 2 , 3 , 4 , 5 , 6 ],
2191
- "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2192
- cats = Categorical ([1 , 2 , 3 , 4 , 5 , 6 ])
2193
- exp_df = pd .DataFrame ({"cats" : cats , "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2194
- df ["cats" ] = df ["cats" ].astype ("category" )
2195
- tm .assert_frame_equal (exp_df , df )
2188
+ labels_int = [1 , 2 , 3 , 4 , 5 , 6 ]
2189
+ exp = DataFrame ({"cats" : Categorical (labels_int ), "vals" : labels_int })
2190
+ res = DataFrame ({"cats" : labels_int , "vals" : labels_int })
2191
+ res ["cats" ] = res ["cats" ].astype ("category" )
2192
+ tm .assert_frame_equal (res , exp )
2196
2193
2197
- df = pd .DataFrame ({"cats" : ['a' , 'b' , 'b' , 'a' , 'a' , 'd' ],
2198
- "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2199
- cats = Categorical (['a' , 'b' , 'b' , 'a' , 'a' , 'd' ])
2200
- exp_df = pd .DataFrame ({"cats" : cats , "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2201
- df ["cats" ] = df ["cats" ].astype ("category" )
2202
- tm .assert_frame_equal (exp_df , df )
2194
+ labels_str = list ('abbaad' )
2195
+ exp = DataFrame ({"cats" : Categorical (labels_str ), "vals" : labels_int })
2196
+ res = DataFrame ({"cats" : labels_str , "vals" : labels_int })
2197
+ res ["cats" ] = res ["cats" ].astype ("category" )
2198
+ tm .assert_frame_equal (res , exp )
2203
2199
2204
2200
# with keywords
2205
- l = [ "a" , "b" , "c" , "a" ]
2206
- s = pd . Series (l )
2207
- exp = pd . Series (Categorical (l , ordered = True ))
2201
+ labels = list ( 'abca' )
2202
+ s = Series (labels )
2203
+ exp = Series (Categorical (labels , ordered = True ))
2208
2204
res = s .astype (CategoricalDtype (None , ordered = True ))
2209
2205
tm .assert_series_equal (res , exp )
2210
2206
2211
- exp = pd . Series ( Categorical (
2212
- l , categories = list ( 'abcdef' ) , ordered = True ))
2213
- res = s .astype (CategoricalDtype (list ( 'abcdef' ) , ordered = True ))
2207
+ cats = list ( 'abcdef' )
2208
+ exp = Series ( Categorical ( labels , categories = cats , ordered = True ))
2209
+ res = s .astype (CategoricalDtype (cats , ordered = True ))
2214
2210
tm .assert_series_equal (res , exp )
2215
2211
2212
+ def test_frame_creation_astype (self ):
2213
+ # GH 12860
2214
+ cats = list ('abcde' )
2215
+ x = Categorical (list ('abcd' ), categories = cats )
2216
+ y = Categorical (list ('bcde' ), categories = cats )
2217
+ exp = DataFrame ({'x' : x , 'y' : y })
2218
+
2219
+ data = {'x' : list ('abcd' ), 'y' : list ('bcde' )}
2220
+ res = DataFrame (data ).astype ('category' )
2221
+ tm .assert_frame_equal (res , exp )
2222
+
2223
+ res = DataFrame (data ).astype (CategoricalDtype ())
2224
+ tm .assert_frame_equal (res , exp )
2225
+
2226
+ # categories keyword
2227
+ cats = list ('abdef' )
2228
+ x = Categorical (['a' , 'b' , np .nan , 'd' ], categories = cats )
2229
+ y = Categorical (['b' , np .nan , 'd' , 'e' ], categories = cats )
2230
+ exp = DataFrame ({'x' : x , 'y' : y })
2231
+
2232
+ res = DataFrame (data ).astype ('category' , categories = cats )
2233
+ tm .assert_frame_equal (res , exp )
2234
+
2235
+ res = DataFrame (data ).astype (CategoricalDtype (categories = cats ))
2236
+ tm .assert_frame_equal (res , exp )
2237
+
2238
+ # ordered keyword
2239
+ cats = [1 , 2 , 3 , 4 , 0 ]
2240
+ x = Categorical (range (1 , 5 ), categories = cats , ordered = True )
2241
+ y = Categorical (range (4 ), categories = cats , ordered = True )
2242
+ exp = DataFrame ({'x' : x , 'y' : y })
2243
+
2244
+ data = {'x' : range (1 , 5 ), 'y' : range (4 )}
2245
+ res = DataFrame (data ).astype ('category' , ordered = True )
2246
+ tm .assert_frame_equal (res , exp )
2247
+
2248
+ res = DataFrame (data ).astype (CategoricalDtype (ordered = True ))
2249
+ tm .assert_frame_equal (res , exp )
2250
+
2216
2251
@pytest .mark .parametrize ('columns' , [['x' ], ['x' , 'y' ], ['x' , 'y' , 'z' ]])
2217
2252
def test_empty_astype (self , columns ):
2218
2253
# GH 18004
2219
- msg = '> 1 ndim Categorical are not supported at this time'
2220
- with tm . assert_raises_regex ( NotImplementedError , msg ):
2221
- DataFrame ( columns = columns ). astype ( 'category' )
2254
+ exp = DataFrame ({ c : Categorical ([]) for c in columns }, index = [])
2255
+ res = DataFrame ( columns = columns ). astype ( 'category' )
2256
+ tm . assert_frame_equal ( res , exp )
2222
2257
2223
2258
def test_construction_series (self ):
2224
2259
0 commit comments