32
32
is_integer_dtype ,
33
33
is_list_like ,
34
34
is_object_dtype ,
35
+ is_string_dtype ,
35
36
pandas_dtype ,
36
37
)
37
38
from pandas .core .dtypes .missing import isna
@@ -119,17 +120,20 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
119
120
return None
120
121
121
122
122
- def safe_cast (values , dtype , copy : bool ):
123
+ def safe_cast (values , dtype , inferred_type : str | None , copy : bool ):
123
124
"""
124
125
Safely cast the values to the dtype if they
125
126
are equivalent, meaning floats must be equivalent to the
126
127
ints.
127
-
128
128
"""
129
+ if inferred_type in ("string" , "unicode" ):
130
+ # casts from str are always safe since they raise
131
+ # a ValueError if the str cannot be parsed into an int
132
+ return values .astype (dtype , copy = copy )
133
+
129
134
try :
130
135
return values .astype (dtype , casting = "safe" , copy = copy )
131
136
except TypeError as err :
132
-
133
137
casted = values .astype (dtype , copy = copy )
134
138
if (casted == values ).all ():
135
139
return casted
@@ -143,7 +147,7 @@ def coerce_to_array(
143
147
values , dtype , mask = None , copy : bool = False
144
148
) -> tuple [np .ndarray , np .ndarray ]:
145
149
"""
146
- Coerce the input values array to numpy arrays with a mask
150
+ Coerce the input values array to numpy arrays with a mask.
147
151
148
152
Parameters
149
153
----------
@@ -187,7 +191,8 @@ def coerce_to_array(
187
191
return values , mask
188
192
189
193
values = np .array (values , copy = copy )
190
- if is_object_dtype (values ):
194
+ inferred_type = None
195
+ if is_object_dtype (values ) or is_string_dtype (values ):
191
196
inferred_type = lib .infer_dtype (values , skipna = True )
192
197
if inferred_type == "empty" :
193
198
values = np .empty (len (values ))
@@ -198,6 +203,8 @@ def coerce_to_array(
198
203
"mixed-integer" ,
199
204
"integer-na" ,
200
205
"mixed-integer-float" ,
206
+ "string" ,
207
+ "unicode" ,
201
208
]:
202
209
raise TypeError (f"{ values .dtype } cannot be converted to an IntegerDtype" )
203
210
@@ -230,9 +237,7 @@ def coerce_to_array(
230
237
if mask .any ():
231
238
values = values .copy ()
232
239
values [mask ] = 1
233
- values = safe_cast (values , dtype , copy = False )
234
- else :
235
- values = safe_cast (values , dtype , copy = False )
240
+ values = safe_cast (values , dtype , inferred_type , copy = False )
236
241
237
242
return values , mask
238
243
0 commit comments