10
10
11
11
from pandas .compat import long
12
12
13
- from pandas import DataFrame , MultiIndex , Series , Timestamp , compat , date_range
13
+ from pandas import (
14
+ CategoricalDtype , DataFrame , MultiIndex , Series , Timestamp , compat ,
15
+ date_range )
14
16
from pandas .tests .frame .common import TestData
15
17
import pandas .util .testing as tm
16
18
@@ -220,6 +222,12 @@ def test_to_records_with_categorical(self):
220
222
dtype = [("index" , "<i8" ), ("A" , "<U" ),
221
223
("B" , "<U" ), ("C" , "<U" )])),
222
224
225
+ # Pass in a dtype instance.
226
+ (dict (column_dtypes = np .dtype ('unicode' )),
227
+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
228
+ dtype = [("index" , "<i8" ), ("A" , "<U" ),
229
+ ("B" , "<U" ), ("C" , "<U" )])),
230
+
223
231
# Pass in a dictionary (name-only).
224
232
(dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 , "C" : "<U2" }),
225
233
np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
@@ -249,6 +257,12 @@ def test_to_records_with_categorical(self):
249
257
dtype = [("index" , "<i8" ), ("A" , "i1" ),
250
258
("B" , "<f4" ), ("C" , "O" )])),
251
259
260
+ # Names / indices not in dtype mapping default to array dtype.
261
+ (dict (column_dtypes = {"A" : np .dtype ('int8' ), "B" : np .dtype ('float32' )}),
262
+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
263
+ dtype = [("index" , "<i8" ), ("A" , "i1" ),
264
+ ("B" , "<f4" ), ("C" , "O" )])),
265
+
252
266
# Mixture of everything.
253
267
(dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 },
254
268
index_dtypes = "<U2" ),
@@ -258,17 +272,26 @@ def test_to_records_with_categorical(self):
258
272
259
273
# Invalid dype values.
260
274
(dict (index = False , column_dtypes = list ()),
261
- "Invalid dtype \\ [\\ ] specified for column A" ),
275
+ ( ValueError , "Invalid dtype \\ [\\ ] specified for column A" ) ),
262
276
263
277
(dict (index = False , column_dtypes = {"A" : "int32" , "B" : 5 }),
264
- "Invalid dtype 5 specified for column B" ),
278
+ (ValueError , "Invalid dtype 5 specified for column B" )),
279
+
280
+ # Numpy can't handle EA types, so check error is raised
281
+ (dict (index = False , column_dtypes = {"A" : "int32" ,
282
+ "B" : CategoricalDtype (['a' , 'b' ])}),
283
+ (ValueError , 'Invalid dtype category specified for column B' )),
284
+
285
+ # Check that bad types raise
286
+ (dict (index = False , column_dtypes = {"A" : "int32" , "B" : "foo" }),
287
+ (TypeError , 'data type "foo" not understood' )),
265
288
])
266
289
def test_to_records_dtype (self , kwargs , expected ):
267
290
# see gh-18146
268
291
df = DataFrame ({"A" : [1 , 2 ], "B" : [0.2 , 1.5 ], "C" : ["a" , "bc" ]})
269
292
270
- if isinstance (expected , str ):
271
- with pytest .raises (ValueError , match = expected ):
293
+ if not isinstance (expected , np . recarray ):
294
+ with pytest .raises (expected [ 0 ] , match = expected [ 1 ] ):
272
295
df .to_records (** kwargs )
273
296
else :
274
297
result = df .to_records (** kwargs )
0 commit comments