@@ -2162,50 +2162,86 @@ def test_basic(self):
2162
2162
result = x .person_name .loc [0 ]
2163
2163
assert result == expected
2164
2164
2165
- def test_creation_astype (self ):
2166
- l = ["a" , "b" , "c" , "a" ]
2167
- s = Series (l )
2168
- exp = Series (Categorical (l ))
2169
- res = s .astype ('category' )
2165
+ def test_series_creation_astype (self ):
2166
+ labels = list ('abca' )
2167
+ exp = Series (Categorical (labels ))
2168
+ res = Series (labels ).astype ('category' )
2170
2169
tm .assert_series_equal (res , exp )
2171
2170
2172
- l = [1 , 2 , 3 , 1 ]
2173
- s = Series (l )
2174
- exp = Series (Categorical (l ))
2175
- res = s .astype ('category' )
2171
+ labels = [1 , 2 , 3 , 1 ]
2172
+ exp = Series (Categorical (labels ))
2173
+ res = Series (labels ).astype ('category' )
2176
2174
tm .assert_series_equal (res , exp )
2177
2175
2178
- df = DataFrame ({"cats" : [1 , 2 , 3 , 4 , 5 , 6 ],
2179
- "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2180
- cats = Categorical ([1 , 2 , 3 , 4 , 5 , 6 ])
2181
- exp_df = DataFrame ({"cats" : cats , "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2182
- df ["cats" ] = df ["cats" ].astype ("category" )
2183
- tm .assert_frame_equal (exp_df , df )
2176
+ labels_int = [1 , 2 , 3 , 4 , 5 , 6 ]
2177
+ exp = DataFrame ({"cats" : Categorical (labels_int ), "vals" : labels_int })
2178
+ res = DataFrame ({"cats" : labels_int , "vals" : labels_int })
2179
+ res ["cats" ] = res ["cats" ].astype ("category" )
2180
+ tm .assert_frame_equal (res , exp )
2184
2181
2185
- df = DataFrame ({"cats" : ['a' , 'b' , 'b' , 'a' , 'a' , 'd' ],
2186
- "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2187
- cats = Categorical (['a' , 'b' , 'b' , 'a' , 'a' , 'd' ])
2188
- exp_df = DataFrame ({"cats" : cats , "vals" : [1 , 2 , 3 , 4 , 5 , 6 ]})
2189
- df ["cats" ] = df ["cats" ].astype ("category" )
2190
- tm .assert_frame_equal (exp_df , df )
2182
+ labels_str = list ('abbaad' )
2183
+ exp = DataFrame ({"cats" : Categorical (labels_str ), "vals" : labels_int })
2184
+ res = DataFrame ({"cats" : labels_str , "vals" : labels_int })
2185
+ res ["cats" ] = res ["cats" ].astype ("category" )
2186
+ tm .assert_frame_equal (res , exp )
2191
2187
2192
2188
# with keywords
2193
- l = [ "a" , "b" , "c" , "a" ]
2194
- s = Series (l )
2195
- exp = Series (Categorical (l , ordered = True ))
2189
+ labels = list ( 'abca' )
2190
+ s = Series (labels )
2191
+ exp = Series (Categorical (labels , ordered = True ))
2196
2192
res = s .astype (CategoricalDtype (None , ordered = True ))
2197
2193
tm .assert_series_equal (res , exp )
2198
2194
2199
- exp = Series (Categorical (l , categories = list ('abcdef' ), ordered = True ))
2200
- res = s .astype (CategoricalDtype (list ('abcdef' ), ordered = True ))
2195
+ cats = list ('abcdef' )
2196
+ exp = Series (Categorical (labels , categories = cats , ordered = True ))
2197
+ res = s .astype (CategoricalDtype (cats , ordered = True ))
2201
2198
tm .assert_series_equal (res , exp )
2202
2199
2200
+ def test_frame_creation_astype (self ):
2201
+ # GH 12860
2202
+ cats = list ('abcde' )
2203
+ x = Categorical (list ('abcd' ), categories = cats )
2204
+ y = Categorical (list ('bcde' ), categories = cats )
2205
+ exp = DataFrame ({'x' : x , 'y' : y })
2206
+
2207
+ data = {'x' : list ('abcd' ), 'y' : list ('bcde' )}
2208
+ res = DataFrame (data ).astype ('category' )
2209
+ tm .assert_frame_equal (res , exp )
2210
+
2211
+ res = DataFrame (data ).astype (CategoricalDtype ())
2212
+ tm .assert_frame_equal (res , exp )
2213
+
2214
+ # categories keyword
2215
+ cats = list ('abdef' )
2216
+ x = Categorical (['a' , 'b' , np .nan , 'd' ], categories = cats )
2217
+ y = Categorical (['b' , np .nan , 'd' , 'e' ], categories = cats )
2218
+ exp = DataFrame ({'x' : x , 'y' : y })
2219
+
2220
+ res = DataFrame (data ).astype ('category' , categories = cats )
2221
+ tm .assert_frame_equal (res , exp )
2222
+
2223
+ res = DataFrame (data ).astype (CategoricalDtype (categories = cats ))
2224
+ tm .assert_frame_equal (res , exp )
2225
+
2226
+ # ordered keyword
2227
+ cats = [1 , 2 , 3 , 4 , 0 ]
2228
+ x = Categorical (range (1 , 5 ), categories = cats , ordered = True )
2229
+ y = Categorical (range (4 ), categories = cats , ordered = True )
2230
+ exp = DataFrame ({'x' : x , 'y' : y })
2231
+
2232
+ data = {'x' : range (1 , 5 ), 'y' : range (4 )}
2233
+ res = DataFrame (data ).astype ('category' , ordered = True )
2234
+ tm .assert_frame_equal (res , exp )
2235
+
2236
+ res = DataFrame (data ).astype (CategoricalDtype (ordered = True ))
2237
+ tm .assert_frame_equal (res , exp )
2238
+
2203
2239
@pytest .mark .parametrize ('columns' , [['x' ], ['x' , 'y' ], ['x' , 'y' , 'z' ]])
2204
2240
def test_empty_astype (self , columns ):
2205
2241
# GH 18004
2206
- msg = '> 1 ndim Categorical are not supported at this time'
2207
- with tm . assert_raises_regex ( NotImplementedError , msg ):
2208
- DataFrame ( columns = columns ). astype ( 'category' )
2242
+ exp = DataFrame ({ c : Categorical ([]) for c in columns }, index = [])
2243
+ res = DataFrame ( columns = columns ). astype ( 'category' )
2244
+ tm . assert_frame_equal ( res , exp )
2209
2245
2210
2246
def test_construction_series (self ):
2211
2247
0 commit comments