1
1
from __future__ import annotations
2
2
3
- from typing import overload
3
+ from typing import (
4
+ Optional ,
5
+ overload ,
6
+ )
4
7
import warnings
5
8
6
9
import numpy as np
32
35
is_integer_dtype ,
33
36
is_list_like ,
34
37
is_object_dtype ,
38
+ is_string_dtype ,
35
39
pandas_dtype ,
36
40
)
37
41
from pandas .core .dtypes .missing import isna
@@ -119,17 +123,20 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
119
123
return None
120
124
121
125
122
- def safe_cast (values , dtype , copy : bool ):
126
+ def safe_cast (values , dtype , inferred_type : str | None , copy : bool ):
123
127
"""
124
128
Safely cast the values to the dtype if they
125
129
are equivalent, meaning floats must be equivalent to the
126
130
ints.
127
-
128
131
"""
132
+ if inferred_type in ("string" , "unicode" ):
133
+ # casts from str are always safe since they raise
134
+ # a ValueError if the str cannot be parsed into an int
135
+ return values .astype (dtype , copy = copy )
136
+
129
137
try :
130
138
return values .astype (dtype , casting = "safe" , copy = copy )
131
139
except TypeError as err :
132
-
133
140
casted = values .astype (dtype , copy = copy )
134
141
if (casted == values ).all ():
135
142
return casted
@@ -143,7 +150,7 @@ def coerce_to_array(
143
150
values , dtype , mask = None , copy : bool = False
144
151
) -> tuple [np .ndarray , np .ndarray ]:
145
152
"""
146
- Coerce the input values array to numpy arrays with a mask
153
+ Coerce the input values array to numpy arrays with a mask.
147
154
148
155
Parameters
149
156
----------
@@ -187,7 +194,9 @@ def coerce_to_array(
187
194
return values , mask
188
195
189
196
values = np .array (values , copy = copy )
190
- if is_object_dtype (values ):
197
+ inferred_type = None
198
+ # note that `is_string_dtype` subsumes `is_object_dtype`
199
+ if is_string_dtype (values ):
191
200
inferred_type = lib .infer_dtype (values , skipna = True )
192
201
if inferred_type == "empty" :
193
202
values = np .empty (len (values ))
@@ -198,6 +207,8 @@ def coerce_to_array(
198
207
"mixed-integer" ,
199
208
"integer-na" ,
200
209
"mixed-integer-float" ,
210
+ "string" ,
211
+ "unicode" ,
201
212
]:
202
213
raise TypeError (f"{ values .dtype } cannot be converted to an IntegerDtype" )
203
214
@@ -230,9 +241,7 @@ def coerce_to_array(
230
241
if mask .any ():
231
242
values = values .copy ()
232
243
values [mask ] = 1
233
- values = safe_cast (values , dtype , copy = False )
234
- else :
235
- values = safe_cast (values , dtype , copy = False )
244
+ values = safe_cast (values , dtype , inferred_type , copy = False )
236
245
237
246
return values , mask
238
247
0 commit comments