4
4
from numpy import iinfo
5
5
import pytest
6
6
7
+ import pandas .compat as compat
8
+
7
9
import pandas as pd
8
10
from pandas import DataFrame , Index , Series , to_numeric
9
11
from pandas .util import testing as tm
10
12
11
13
14
+ @pytest .fixture (params = [None , "ignore" , "raise" , "coerce" ])
15
+ def errors (request ):
16
+ return request .param
17
+
18
+
19
+ @pytest .fixture (params = [True , False ])
20
+ def signed (request ):
21
+ return request .param
22
+
23
+
24
+ @pytest .fixture (params = [lambda x : x , str ], ids = ["identity" , "str" ])
25
+ def transform (request ):
26
+ return request .param
27
+
28
+
29
+ @pytest .fixture (params = [
30
+ 47393996303418497800 ,
31
+ 100000000000000000000
32
+ ])
33
+ def large_val (request ):
34
+ return request .param
35
+
36
+
37
+ @pytest .fixture (params = [True , False ])
38
+ def multiple_elts (request ):
39
+ return request .param
40
+
41
+
42
+ @pytest .fixture (params = [
43
+ (lambda x : Index (x , name = "idx" ), tm .assert_index_equal ),
44
+ (lambda x : Series (x , name = "ser" ), tm .assert_series_equal ),
45
+ (lambda x : np .array (Index (x ).values ), tm .assert_numpy_array_equal )
46
+ ])
47
+ def transform_assert_equal (request ):
48
+ return request .param
49
+
50
+
12
51
@pytest .mark .parametrize ("input_kwargs,result_kwargs" , [
13
52
(dict (), dict (dtype = np .int64 )),
14
53
(dict (errors = "coerce" , downcast = "integer" ), dict (dtype = np .int8 ))
@@ -172,7 +211,6 @@ def test_all_nan():
172
211
tm .assert_series_equal (result , expected )
173
212
174
213
175
- @pytest .mark .parametrize ("errors" , [None , "ignore" , "raise" , "coerce" ])
176
214
def test_type_check (errors ):
177
215
# see gh-11776
178
216
df = DataFrame ({"a" : [1 , - 3.14 , 7 ], "b" : ["4" , "5" , "6" ]})
@@ -183,11 +221,100 @@ def test_type_check(errors):
183
221
to_numeric (df , ** kwargs )
184
222
185
223
186
- @pytest .mark .parametrize ("val" , [
187
- 1 , 1.1 , "1" , "1.1" , - 1.5 , "-1.5"
188
- ])
189
- def test_scalar (val ):
190
- assert to_numeric (val ) == float (val )
224
+ @pytest .mark .parametrize ("val" , [1 , 1.1 , 20001 ])
225
+ def test_scalar (val , signed , transform ):
226
+ val = - val if signed else val
227
+ assert to_numeric (transform (val )) == float (val )
228
+
229
+
230
+ def test_really_large_scalar (large_val , signed , transform , errors ):
231
+ # see gh-24910
232
+ kwargs = dict (errors = errors ) if errors is not None else dict ()
233
+ val = - large_val if signed else large_val
234
+
235
+ val = transform (val )
236
+ val_is_string = isinstance (val , str )
237
+
238
+ if val_is_string and errors in (None , "raise" ):
239
+ msg = "Integer out of range. at position 0"
240
+ with pytest .raises (ValueError , match = msg ):
241
+ to_numeric (val , ** kwargs )
242
+ else :
243
+ expected = float (val ) if (errors == "coerce" and
244
+ val_is_string ) else val
245
+ assert tm .assert_almost_equal (to_numeric (val , ** kwargs ), expected )
246
+
247
+
248
+ def test_really_large_in_arr (large_val , signed , transform ,
249
+ multiple_elts , errors ):
250
+ # see gh-24910
251
+ kwargs = dict (errors = errors ) if errors is not None else dict ()
252
+ val = - large_val if signed else large_val
253
+ val = transform (val )
254
+
255
+ extra_elt = "string"
256
+ arr = [val ] + multiple_elts * [extra_elt ]
257
+
258
+ val_is_string = isinstance (val , str )
259
+ coercing = errors == "coerce"
260
+
261
+ if errors in (None , "raise" ) and (val_is_string or multiple_elts ):
262
+ if val_is_string :
263
+ msg = "Integer out of range. at position 0"
264
+ else :
265
+ msg = 'Unable to parse string "string" at position 1'
266
+
267
+ with pytest .raises (ValueError , match = msg ):
268
+ to_numeric (arr , ** kwargs )
269
+ else :
270
+ result = to_numeric (arr , ** kwargs )
271
+
272
+ exp_val = float (val ) if (coercing and val_is_string ) else val
273
+ expected = [exp_val ]
274
+
275
+ if multiple_elts :
276
+ if coercing :
277
+ expected .append (np .nan )
278
+ exp_dtype = float
279
+ else :
280
+ expected .append (extra_elt )
281
+ exp_dtype = object
282
+ else :
283
+ exp_dtype = float if isinstance (exp_val , (
284
+ int , compat .long , float )) else object
285
+
286
+ tm .assert_almost_equal (result , np .array (expected , dtype = exp_dtype ))
287
+
288
+
289
+ def test_really_large_in_arr_consistent (large_val , signed ,
290
+ multiple_elts , errors ):
291
+ # see gh-24910
292
+ #
293
+ # Even if we discover that we have to hold float, does not mean
294
+ # we should be lenient on subsequent elements that fail to be integer.
295
+ kwargs = dict (errors = errors ) if errors is not None else dict ()
296
+ arr = [str (- large_val if signed else large_val )]
297
+
298
+ if multiple_elts :
299
+ arr .insert (0 , large_val )
300
+
301
+ if errors in (None , "raise" ):
302
+ index = int (multiple_elts )
303
+ msg = "Integer out of range. at position {index}" .format (index = index )
304
+
305
+ with pytest .raises (ValueError , match = msg ):
306
+ to_numeric (arr , ** kwargs )
307
+ else :
308
+ result = to_numeric (arr , ** kwargs )
309
+
310
+ if errors == "coerce" :
311
+ expected = [float (i ) for i in arr ]
312
+ exp_dtype = float
313
+ else :
314
+ expected = arr
315
+ exp_dtype = object
316
+
317
+ tm .assert_almost_equal (result , np .array (expected , dtype = exp_dtype ))
191
318
192
319
193
320
@pytest .mark .parametrize ("errors,checker" , [
@@ -205,15 +332,6 @@ def test_scalar_fail(errors, checker):
205
332
assert checker (to_numeric (scalar , errors = errors ))
206
333
207
334
208
- @pytest .fixture (params = [
209
- (lambda x : Index (x , name = "idx" ), tm .assert_index_equal ),
210
- (lambda x : Series (x , name = "ser" ), tm .assert_series_equal ),
211
- (lambda x : np .array (Index (x ).values ), tm .assert_numpy_array_equal )
212
- ])
213
- def transform_assert_equal (request ):
214
- return request .param
215
-
216
-
217
335
@pytest .mark .parametrize ("data" , [
218
336
[1 , 2 , 3 ],
219
337
[1. , np .nan , 3 , np .nan ]
0 commit comments