Skip to content

Commit 1d5f7aa

Browse files
committed
BUG: permit str dtype -> IntegerDtype conversions
Resolves pandas-dev#25472, resolves pandas-dev#25288.
1 parent c021d33 commit 1d5f7aa

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

pandas/core/arrays/integer.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from __future__ import annotations
22

3-
from typing import overload
3+
from typing import (
4+
Optional,
5+
overload,
6+
)
47
import warnings
58

69
import numpy as np
@@ -32,6 +35,7 @@
3235
is_integer_dtype,
3336
is_list_like,
3437
is_object_dtype,
38+
is_string_dtype,
3539
pandas_dtype,
3640
)
3741
from pandas.core.dtypes.missing import isna
@@ -119,17 +123,20 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
119123
return None
120124

121125

122-
def safe_cast(values, dtype, copy: bool):
126+
def safe_cast(values, dtype, inferred_type: str | None, copy: bool):
123127
"""
124128
Safely cast the values to the dtype if they
125129
are equivalent, meaning floats must be equivalent to the
126130
ints.
127-
128131
"""
132+
if inferred_type in ("string", "unicode"):
133+
# casts from str are always safe since they raise
134+
# a ValueError if the str cannot be parsed into an int
135+
return values.astype(dtype, copy=copy)
136+
129137
try:
130138
return values.astype(dtype, casting="safe", copy=copy)
131139
except TypeError as err:
132-
133140
casted = values.astype(dtype, copy=copy)
134141
if (casted == values).all():
135142
return casted
@@ -143,7 +150,7 @@ def coerce_to_array(
143150
values, dtype, mask=None, copy: bool = False
144151
) -> tuple[np.ndarray, np.ndarray]:
145152
"""
146-
Coerce the input values array to numpy arrays with a mask
153+
Coerce the input values array to numpy arrays with a mask.
147154
148155
Parameters
149156
----------
@@ -187,7 +194,9 @@ def coerce_to_array(
187194
return values, mask
188195

189196
values = np.array(values, copy=copy)
190-
if is_object_dtype(values):
197+
inferred_type = None
198+
# note that `is_string_dtype` subsumes `is_object_dtype`
199+
if is_string_dtype(values):
191200
inferred_type = lib.infer_dtype(values, skipna=True)
192201
if inferred_type == "empty":
193202
values = np.empty(len(values))
@@ -198,6 +207,8 @@ def coerce_to_array(
198207
"mixed-integer",
199208
"integer-na",
200209
"mixed-integer-float",
210+
"string",
211+
"unicode",
201212
]:
202213
raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
203214

@@ -230,9 +241,7 @@ def coerce_to_array(
230241
if mask.any():
231242
values = values.copy()
232243
values[mask] = 1
233-
values = safe_cast(values, dtype, copy=False)
234-
else:
235-
values = safe_cast(values, dtype, copy=False)
244+
values = safe_cast(values, dtype, inferred_type, copy=False)
236245

237246
return values, mask
238247

pandas/core/dtypes/common.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,14 @@ def ensure_python_int(value: int | np.integer) -> int:
143143

144144

145145
def classes(*klasses) -> Callable:
146-
"""evaluate if the tipo is a subclass of the klasses"""
146+
"""Evaluate if the tipo is a subclass of the klasses."""
147147
return lambda tipo: issubclass(tipo, klasses)
148148

149149

150150
def classes_and_not_datetimelike(*klasses) -> Callable:
151151
"""
152-
evaluate if the tipo is a subclass of the klasses
153-
and not a datetimelike
152+
Evaluate if the tipo is a subclass of the klasses
153+
and not a datetimelike.
154154
"""
155155
return lambda tipo: (
156156
issubclass(tipo, klasses)
@@ -674,7 +674,7 @@ def is_integer_dtype(arr_or_dtype) -> bool:
674674
"""
675675
Check whether the provided array or dtype is of an integer dtype.
676676
677-
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
677+
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
678678
679679
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
680680
as integer by this function.
@@ -726,7 +726,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool:
726726
"""
727727
Check whether the provided array or dtype is of a signed integer dtype.
728728
729-
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
729+
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
730730
731731
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
732732
as integer by this function.
@@ -1521,7 +1521,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:
15211521

15221522
def _is_dtype(arr_or_dtype, condition) -> bool:
15231523
"""
1524-
Return a boolean if the condition is satisfied for the arr_or_dtype.
1524+
Return true if the condition is satisfied for the arr_or_dtype.
15251525
15261526
Parameters
15271527
----------
@@ -1580,7 +1580,7 @@ def get_dtype(arr_or_dtype) -> DtypeObj:
15801580

15811581
def _is_dtype_type(arr_or_dtype, condition) -> bool:
15821582
"""
1583-
Return a boolean if the condition is satisfied for the arr_or_dtype.
1583+
Return true if the condition is satisfied for the arr_or_dtype.
15841584
15851585
Parameters
15861586
----------

0 commit comments

Comments
 (0)