13
13
from pandas .core .dtypes .cast import maybe_downcast_numeric
14
14
from pandas .core .dtypes .common import (
15
15
ensure_object ,
16
+ is_bool_dtype ,
16
17
is_datetime_or_timedelta_dtype ,
17
18
is_decimal ,
18
19
is_integer_dtype ,
19
20
is_number ,
20
21
is_numeric_dtype ,
22
+ is_object_dtype ,
21
23
is_scalar ,
22
24
needs_i8_conversion ,
23
25
)
27
29
)
28
30
29
31
import pandas as pd
30
- from pandas .core .arrays . numeric import NumericArray
32
+ from pandas .core .arrays import BaseMaskedArray
31
33
32
34
33
35
def to_numeric (
34
36
arg ,
35
37
errors : DateTimeErrorChoices = "raise" ,
36
38
downcast : Literal ["integer" , "signed" , "unsigned" , "float" ] | None = None ,
39
+ use_nullable_dtypes : bool = False ,
37
40
):
38
41
"""
39
42
Convert argument to a numeric type.
@@ -47,7 +50,7 @@ def to_numeric(
47
50
numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
48
51
or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
49
52
passed in, it is very likely they will be converted to float so that
50
- they can stored in an `ndarray`. These warnings apply similarly to
53
+ they can be stored in an `ndarray`. These warnings apply similarly to
51
54
`Series` since it internally leverages `ndarray`.
52
55
53
56
Parameters
@@ -78,6 +81,10 @@ def to_numeric(
78
81
the dtype it is to be cast to, so if none of the dtypes
79
82
checked satisfy that specification, no downcasting will be
80
83
performed on the data.
84
+ use_nullable_dtypes : bool = False
85
+ Whether or not to use nullable dtypes as default when converting data. If
86
+ set to True, nullable dtypes are used for all dtypes that have a nullable
87
+ implementation, even if no nulls are present.
81
88
82
89
Returns
83
90
-------
@@ -178,11 +185,12 @@ def to_numeric(
178
185
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
179
186
# save mask to reconstruct the full array after casting
180
187
mask : npt .NDArray [np .bool_ ] | None = None
181
- if isinstance (values , NumericArray ):
188
+ if isinstance (values , BaseMaskedArray ):
182
189
mask = values ._mask
183
190
values = values ._data [~ mask ]
184
191
185
192
values_dtype = getattr (values , "dtype" , None )
193
+ new_mask : np .ndarray | None = None
186
194
if is_numeric_dtype (values_dtype ):
187
195
pass
188
196
elif is_datetime_or_timedelta_dtype (values_dtype ):
@@ -191,13 +199,23 @@ def to_numeric(
191
199
values = ensure_object (values )
192
200
coerce_numeric = errors not in ("ignore" , "raise" )
193
201
try :
194
- values , _ = lib .maybe_convert_numeric (
195
- values , set (), coerce_numeric = coerce_numeric
202
+ values , new_mask = lib .maybe_convert_numeric ( # type: ignore[call-overload]
203
+ values ,
204
+ set (),
205
+ coerce_numeric = coerce_numeric ,
206
+ convert_to_masked_nullable = use_nullable_dtypes ,
196
207
)
197
208
except (ValueError , TypeError ):
198
209
if errors == "raise" :
199
210
raise
200
211
212
+ if new_mask is not None :
213
+ # Remove unnecessary values, is expected later anyway and enables
214
+ # downcasting
215
+ values = values [~ new_mask ]
216
+ elif use_nullable_dtypes and new_mask is None :
217
+ new_mask = np .zeros (values .shape , dtype = np .bool_ )
218
+
201
219
# attempt downcast only if the data has been successfully converted
202
220
# to a numerical dtype and if a downcast method has been specified
203
221
if downcast is not None and is_numeric_dtype (values .dtype ):
@@ -228,18 +246,31 @@ def to_numeric(
228
246
if values .dtype == dtype :
229
247
break
230
248
231
- # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array
232
- if mask is not None :
249
+ # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
250
+ # masked array
251
+ if (mask is not None or new_mask is not None ) and not is_object_dtype (values .dtype ):
252
+ if mask is None :
253
+ mask = new_mask
254
+ else :
255
+ mask = mask .copy ()
256
+ assert isinstance (mask , np .ndarray )
233
257
data = np .zeros (mask .shape , dtype = values .dtype )
234
258
data [~ mask ] = values
235
259
236
260
from pandas .core .arrays import (
261
+ BooleanArray ,
237
262
FloatingArray ,
238
263
IntegerArray ,
239
264
)
240
265
241
- klass = IntegerArray if is_integer_dtype (data .dtype ) else FloatingArray
242
- values = klass (data , mask .copy ())
266
+ klass : type [IntegerArray ] | type [BooleanArray ] | type [FloatingArray ]
267
+ if is_integer_dtype (data .dtype ):
268
+ klass = IntegerArray
269
+ elif is_bool_dtype (data .dtype ):
270
+ klass = BooleanArray
271
+ else :
272
+ klass = FloatingArray
273
+ values = klass (data , mask )
243
274
244
275
if is_series :
245
276
return arg ._constructor (values , index = arg .index , name = arg .name )
0 commit comments