@@ -162,12 +162,17 @@ def test_all_nan(self):
162
162
tm .assert_series_equal (res , expected )
163
163
164
164
def test_type_check (self ):
165
- # GH 11776
166
- df = pd .DataFrame ({'a' : [1 , - 3.14 , 7 ], 'b' : ['4' , '5' , '6' ]})
167
- with tm .assert_raises_regex (TypeError , "1-d array" ):
165
+ # see gh-11776
166
+ df = pd .DataFrame ({"a" : [1 , - 3.14 , 7 ], "b" : ["4" , "5" , "6" ]})
167
+ error_ctx = tm .assert_raises_regex (TypeError , "1-d array" )
168
+
169
+ # Check default parameters.
170
+ with error_ctx :
168
171
to_numeric (df )
169
- for errors in ['ignore' , 'raise' , 'coerce' ]:
170
- with tm .assert_raises_regex (TypeError , "1-d array" ):
172
+
173
+ # Check each parameter value for `errors`.
174
+ for errors in ["ignore" , "raise" , "coerce" ]:
175
+ with error_ctx :
171
176
to_numeric (df , errors = errors )
172
177
173
178
def test_scalar (self ):
@@ -227,17 +232,17 @@ def test_str(self):
227
232
res = pd .to_numeric (idx .values )
228
233
tm .assert_numpy_array_equal (res , exp )
229
234
230
- def test_datetimelike (self ):
231
- for tz in [ None , 'US/Eastern' , 'Asia/Tokyo' ]:
232
- idx = pd . date_range ( '20130101' , periods = 3 , tz = tz , name = ' xxx' )
233
- res = pd .to_numeric (idx )
234
- tm .assert_index_equal (res , pd .Index (idx .asi8 , name = ' xxx' ))
235
+ def test_datetime_like (self , tz_naive_fixture ):
236
+ idx = pd . date_range ( "20130101" , periods = 3 ,
237
+ tz = tz_naive_fixture , name = " xxx" )
238
+ res = pd .to_numeric (idx )
239
+ tm .assert_index_equal (res , pd .Index (idx .asi8 , name = " xxx" ))
235
240
236
- res = pd .to_numeric (pd .Series (idx , name = ' xxx' ))
237
- tm .assert_series_equal (res , pd .Series (idx .asi8 , name = ' xxx' ))
241
+ res = pd .to_numeric (pd .Series (idx , name = " xxx" ))
242
+ tm .assert_series_equal (res , pd .Series (idx .asi8 , name = " xxx" ))
238
243
239
- res = pd .to_numeric (idx .values )
240
- tm .assert_numpy_array_equal (res , idx .asi8 )
244
+ res = pd .to_numeric (idx .values )
245
+ tm .assert_numpy_array_equal (res , idx .asi8 )
241
246
242
247
def test_timedelta (self ):
243
248
idx = pd .timedelta_range ('1 days' , periods = 3 , freq = 'D' , name = 'xxx' )
@@ -255,7 +260,7 @@ def test_period(self):
255
260
res = pd .to_numeric (idx )
256
261
tm .assert_index_equal (res , pd .Index (idx .asi8 , name = 'xxx' ))
257
262
258
- # ToDo : enable when we can support native PeriodDtype
263
+ # TODO : enable when we can support native PeriodDtype
259
264
# res = pd.to_numeric(pd.Series(idx, name='xxx'))
260
265
# tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
261
266
@@ -271,116 +276,147 @@ def test_non_hashable(self):
271
276
with tm .assert_raises_regex (TypeError , "Invalid object type" ):
272
277
pd .to_numeric (s )
273
278
274
- def test_downcast (self ):
279
+ @pytest .mark .parametrize ("data" , [
280
+ ["1" , 2 , 3 ],
281
+ [1 , 2 , 3 ],
282
+ np .array (["1970-01-02" , "1970-01-03" ,
283
+ "1970-01-04" ], dtype = "datetime64[D]" )
284
+ ])
285
+ def test_downcast_basic (self , data ):
275
286
# see gh-13352
276
- mixed_data = ['1' , 2 , 3 ]
277
- int_data = [1 , 2 , 3 ]
278
- date_data = np .array (['1970-01-02' , '1970-01-03' ,
279
- '1970-01-04' ], dtype = 'datetime64[D]' )
280
-
281
- invalid_downcast = 'unsigned-integer'
282
- msg = 'invalid downcasting method provided'
287
+ invalid_downcast = "unsigned-integer"
288
+ msg = "invalid downcasting method provided"
283
289
284
- smallest_int_dtype = np . dtype ( np . typecodes [ 'Integer' ][ 0 ])
285
- smallest_uint_dtype = np . dtype ( np . typecodes [ 'UnsignedInteger' ][ 0 ] )
290
+ with tm . assert_raises_regex ( ValueError , msg ):
291
+ pd . to_numeric ( data , downcast = invalid_downcast )
286
292
287
- # support below np.float32 is rare and far between
288
- float_32_char = np .dtype (np .float32 ).char
289
- smallest_float_dtype = float_32_char
293
+ expected = np .array ([1 , 2 , 3 ], dtype = np .int64 )
290
294
291
- for data in ( mixed_data , int_data , date_data ):
292
- with tm . assert_raises_regex ( ValueError , msg ):
293
- pd . to_numeric ( data , downcast = invalid_downcast )
295
+ # Basic function tests.
296
+ res = pd . to_numeric ( data )
297
+ tm . assert_numpy_array_equal ( res , expected )
294
298
295
- expected = np .array ([1 , 2 , 3 ], dtype = np .int64 )
299
+ res = pd .to_numeric (data , downcast = None )
300
+ tm .assert_numpy_array_equal (res , expected )
296
301
297
- res = pd . to_numeric ( data )
298
- tm . assert_numpy_array_equal ( res , expected )
302
+ # Basic dtype support.
303
+ smallest_uint_dtype = np . dtype ( np . typecodes [ "UnsignedInteger" ][ 0 ] )
299
304
300
- res = pd .to_numeric (data , downcast = None )
301
- tm .assert_numpy_array_equal (res , expected )
305
+ # Support below np.float32 is rare and far between.
306
+ float_32_char = np .dtype (np .float32 ).char
307
+ smallest_float_dtype = float_32_char
302
308
303
- expected = np .array ([1 , 2 , 3 ], dtype = smallest_int_dtype )
309
+ expected = np .array ([1 , 2 , 3 ], dtype = smallest_uint_dtype )
310
+ res = pd .to_numeric (data , downcast = "unsigned" )
311
+ tm .assert_numpy_array_equal (res , expected )
304
312
305
- for signed_downcast in ( 'integer' , 'signed' ):
306
- res = pd .to_numeric (data , downcast = signed_downcast )
307
- tm .assert_numpy_array_equal (res , expected )
313
+ expected = np . array ([ 1 , 2 , 3 ], dtype = smallest_float_dtype )
314
+ res = pd .to_numeric (data , downcast = "float" )
315
+ tm .assert_numpy_array_equal (res , expected )
308
316
309
- expected = np .array ([1 , 2 , 3 ], dtype = smallest_uint_dtype )
310
- res = pd .to_numeric (data , downcast = 'unsigned' )
311
- tm .assert_numpy_array_equal (res , expected )
317
+ @pytest .mark .parametrize ("signed_downcast" , ["integer" , "signed" ])
318
+ @pytest .mark .parametrize ("data" , [
319
+ ["1" , 2 , 3 ],
320
+ [1 , 2 , 3 ],
321
+ np .array (["1970-01-02" , "1970-01-03" ,
322
+ "1970-01-04" ], dtype = "datetime64[D]" )
323
+ ])
324
+ def test_signed_downcast (self , data , signed_downcast ):
325
+ # see gh-13352
326
+ smallest_int_dtype = np .dtype (np .typecodes ["Integer" ][0 ])
327
+ expected = np .array ([1 , 2 , 3 ], dtype = smallest_int_dtype )
312
328
313
- expected = np .array ([1 , 2 , 3 ], dtype = smallest_float_dtype )
314
- res = pd .to_numeric (data , downcast = 'float' )
315
- tm .assert_numpy_array_equal (res , expected )
329
+ res = pd .to_numeric (data , downcast = signed_downcast )
330
+ tm .assert_numpy_array_equal (res , expected )
316
331
317
- # if we can't successfully cast the given
332
+ def test_ignore_downcast_invalid_data (self ):
333
+ # If we can't successfully cast the given
318
334
# data to a numeric dtype, do not bother
319
- # with the downcast parameter
320
- data = [' foo' , 2 , 3 ]
335
+ # with the downcast parameter.
336
+ data = [" foo" , 2 , 3 ]
321
337
expected = np .array (data , dtype = object )
322
- res = pd .to_numeric (data , errors = 'ignore' ,
323
- downcast = 'unsigned' )
338
+
339
+ res = pd .to_numeric (data , errors = "ignore" ,
340
+ downcast = "unsigned" )
324
341
tm .assert_numpy_array_equal (res , expected )
325
342
326
- # cannot cast to an unsigned integer because
327
- # we have a negative number
328
- data = ['-1' , 2 , 3 ]
343
+ def test_ignore_downcast_neg_to_unsigned (self ):
344
+ # Cannot cast to an unsigned integer
345
+ # because we have a negative number.
346
+ data = ["-1" , 2 , 3 ]
329
347
expected = np .array ([- 1 , 2 , 3 ], dtype = np .int64 )
330
- res = pd .to_numeric (data , downcast = 'unsigned' )
331
- tm .assert_numpy_array_equal (res , expected )
332
348
333
- # cannot cast to an integer (signed or unsigned)
334
- # because we have a float number
335
- data = (['1.1' , 2 , 3 ],
336
- [10000.0 , 20000 , 3000 , 40000.36 , 50000 , 50000.00 ])
337
- expected = (np .array ([1.1 , 2 , 3 ], dtype = np .float64 ),
338
- np .array ([10000.0 , 20000 , 3000 ,
339
- 40000.36 , 50000 , 50000.00 ], dtype = np .float64 ))
349
+ res = pd .to_numeric (data , downcast = "unsigned" )
350
+ tm .assert_numpy_array_equal (res , expected )
340
351
341
- for _data , _expected in zip (data , expected ):
342
- for downcast in ('integer' , 'signed' , 'unsigned' ):
343
- res = pd .to_numeric (_data , downcast = downcast )
344
- tm .assert_numpy_array_equal (res , _expected )
352
+ @pytest .mark .parametrize ("downcast" , ["integer" , "signed" , "unsigned" ])
353
+ @pytest .mark .parametrize ("data,expected" , [
354
+ (["1.1" , 2 , 3 ],
355
+ np .array ([1.1 , 2 , 3 ], dtype = np .float64 )),
356
+ ([10000.0 , 20000 , 3000 , 40000.36 , 50000 , 50000.00 ],
357
+ np .array ([10000.0 , 20000 , 3000 ,
358
+ 40000.36 , 50000 , 50000.00 ], dtype = np .float64 ))
359
+ ])
360
+ def test_ignore_downcast_cannot_convert_float (
361
+ self , data , expected , downcast ):
362
+ # Cannot cast to an integer (signed or unsigned)
363
+ # because we have a float number.
364
+ res = pd .to_numeric (data , downcast = downcast )
365
+ tm .assert_numpy_array_equal (res , expected )
345
366
367
+ @pytest .mark .parametrize ("downcast,expected_dtype" , [
368
+ ("integer" , np .int16 ),
369
+ ("signed" , np .int16 ),
370
+ ("unsigned" , np .uint16 )
371
+ ])
372
+ def test_downcast_not8bit (self , downcast , expected_dtype ):
346
373
# the smallest integer dtype need not be np.(u)int8
347
- data = ['256' , 257 , 258 ]
348
-
349
- for downcast , expected_dtype in zip (
350
- ['integer' , 'signed' , 'unsigned' ],
351
- [np .int16 , np .int16 , np .uint16 ]):
352
- expected = np .array ([256 , 257 , 258 ], dtype = expected_dtype )
353
- res = pd .to_numeric (data , downcast = downcast )
354
- tm .assert_numpy_array_equal (res , expected )
355
-
356
- def test_downcast_limits (self ):
357
- # Test the limits of each downcast. Bug: #14401.
358
-
359
- i = 'integer'
360
- u = 'unsigned'
361
- dtype_downcast_min_max = [
362
- ('int8' , i , [iinfo (np .int8 ).min , iinfo (np .int8 ).max ]),
363
- ('int16' , i , [iinfo (np .int16 ).min , iinfo (np .int16 ).max ]),
364
- ('int32' , i , [iinfo (np .int32 ).min , iinfo (np .int32 ).max ]),
365
- ('int64' , i , [iinfo (np .int64 ).min , iinfo (np .int64 ).max ]),
366
- ('uint8' , u , [iinfo (np .uint8 ).min , iinfo (np .uint8 ).max ]),
367
- ('uint16' , u , [iinfo (np .uint16 ).min , iinfo (np .uint16 ).max ]),
368
- ('uint32' , u , [iinfo (np .uint32 ).min , iinfo (np .uint32 ).max ]),
369
- ('uint64' , u , [iinfo (np .uint64 ).min , iinfo (np .uint64 ).max ]),
370
- ('int16' , i , [iinfo (np .int8 ).min , iinfo (np .int8 ).max + 1 ]),
371
- ('int32' , i , [iinfo (np .int16 ).min , iinfo (np .int16 ).max + 1 ]),
372
- ('int64' , i , [iinfo (np .int32 ).min , iinfo (np .int32 ).max + 1 ]),
373
- ('int16' , i , [iinfo (np .int8 ).min - 1 , iinfo (np .int16 ).max ]),
374
- ('int32' , i , [iinfo (np .int16 ).min - 1 , iinfo (np .int32 ).max ]),
375
- ('int64' , i , [iinfo (np .int32 ).min - 1 , iinfo (np .int64 ).max ]),
376
- ('uint16' , u , [iinfo (np .uint8 ).min , iinfo (np .uint8 ).max + 1 ]),
377
- ('uint32' , u , [iinfo (np .uint16 ).min , iinfo (np .uint16 ).max + 1 ]),
378
- ('uint64' , u , [iinfo (np .uint32 ).min , iinfo (np .uint32 ).max + 1 ])
379
- ]
380
-
381
- for dtype , downcast , min_max in dtype_downcast_min_max :
382
- series = pd .to_numeric (pd .Series (min_max ), downcast = downcast )
383
- assert series .dtype == dtype
374
+ data = ["256" , 257 , 258 ]
375
+
376
+ expected = np .array ([256 , 257 , 258 ], dtype = expected_dtype )
377
+ res = pd .to_numeric (data , downcast = downcast )
378
+ tm .assert_numpy_array_equal (res , expected )
379
+
380
+ @pytest .mark .parametrize ("dtype,downcast,min_max" , [
381
+ ("int8" , "integer" , [iinfo (np .int8 ).min ,
382
+ iinfo (np .int8 ).max ]),
383
+ ("int16" , "integer" , [iinfo (np .int16 ).min ,
384
+ iinfo (np .int16 ).max ]),
385
+ ('int32' , "integer" , [iinfo (np .int32 ).min ,
386
+ iinfo (np .int32 ).max ]),
387
+ ('int64' , "integer" , [iinfo (np .int64 ).min ,
388
+ iinfo (np .int64 ).max ]),
389
+ ('uint8' , "unsigned" , [iinfo (np .uint8 ).min ,
390
+ iinfo (np .uint8 ).max ]),
391
+ ('uint16' , "unsigned" , [iinfo (np .uint16 ).min ,
392
+ iinfo (np .uint16 ).max ]),
393
+ ('uint32' , "unsigned" , [iinfo (np .uint32 ).min ,
394
+ iinfo (np .uint32 ).max ]),
395
+ ('uint64' , "unsigned" , [iinfo (np .uint64 ).min ,
396
+ iinfo (np .uint64 ).max ]),
397
+ ('int16' , "integer" , [iinfo (np .int8 ).min ,
398
+ iinfo (np .int8 ).max + 1 ]),
399
+ ('int32' , "integer" , [iinfo (np .int16 ).min ,
400
+ iinfo (np .int16 ).max + 1 ]),
401
+ ('int64' , "integer" , [iinfo (np .int32 ).min ,
402
+ iinfo (np .int32 ).max + 1 ]),
403
+ ('int16' , "integer" , [iinfo (np .int8 ).min - 1 ,
404
+ iinfo (np .int16 ).max ]),
405
+ ('int32' , "integer" , [iinfo (np .int16 ).min - 1 ,
406
+ iinfo (np .int32 ).max ]),
407
+ ('int64' , "integer" , [iinfo (np .int32 ).min - 1 ,
408
+ iinfo (np .int64 ).max ]),
409
+ ('uint16' , "unsigned" , [iinfo (np .uint8 ).min ,
410
+ iinfo (np .uint8 ).max + 1 ]),
411
+ ('uint32' , "unsigned" , [iinfo (np .uint16 ).min ,
412
+ iinfo (np .uint16 ).max + 1 ]),
413
+ ('uint64' , "unsigned" , [iinfo (np .uint32 ).min ,
414
+ iinfo (np .uint32 ).max + 1 ])
415
+ ])
416
+ def test_downcast_limits (self , dtype , downcast , min_max ):
417
+ # see gh-14404: test the limits of each downcast.
418
+ series = pd .to_numeric (pd .Series (min_max ), downcast = downcast )
419
+ assert series .dtype == dtype
384
420
385
421
def test_coerce_uint64_conflict (self ):
386
422
# see gh-17007 and gh-17125
0 commit comments