Skip to content

Commit 4f601b1

Browse files
authored
REF: convert_dtypes return dtype objects (#41622)
1 parent 4b7b56c commit 4f601b1

File tree

2 files changed

+34
-32
lines changed

2 files changed

+34
-32
lines changed

pandas/core/dtypes/cast.py

+33-28
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,7 @@ def convert_dtypes(
13861386
convert_integer: bool = True,
13871387
convert_boolean: bool = True,
13881388
convert_floating: bool = True,
1389-
) -> Dtype:
1389+
) -> DtypeObj:
13901390
"""
13911391
Convert objects to best possible type, and optionally,
13921392
to types supporting ``pd.NA``.
@@ -1407,71 +1407,76 @@ def convert_dtypes(
14071407
14081408
Returns
14091409
-------
1410-
str, np.dtype, or ExtensionDtype
1411-
dtype
1412-
new dtype
1410+
np.dtype, or ExtensionDtype
14131411
"""
1414-
inferred_dtype: str | np.dtype | ExtensionDtype
1415-
# TODO: rule out str
1412+
inferred_dtype: str | DtypeObj
14161413

14171414
if (
14181415
convert_string or convert_integer or convert_boolean or convert_floating
14191416
) and isinstance(input_array, np.ndarray):
1420-
inferred_dtype = lib.infer_dtype(input_array)
14211417

1422-
if not convert_string and is_string_dtype(inferred_dtype):
1418+
if is_object_dtype(input_array.dtype):
1419+
inferred_dtype = lib.infer_dtype(input_array)
1420+
else:
14231421
inferred_dtype = input_array.dtype
14241422

1423+
if is_string_dtype(inferred_dtype):
1424+
if not convert_string:
1425+
inferred_dtype = input_array.dtype
1426+
else:
1427+
inferred_dtype = pandas_dtype("string")
1428+
return inferred_dtype
1429+
14251430
if convert_integer:
1426-
target_int_dtype = "Int64"
1431+
target_int_dtype = pandas_dtype("Int64")
14271432

14281433
if is_integer_dtype(input_array.dtype):
14291434
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
14301435

14311436
inferred_dtype = INT_STR_TO_DTYPE.get(
14321437
input_array.dtype.name, target_int_dtype
14331438
)
1434-
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
1435-
input_array.dtype
1436-
):
1437-
inferred_dtype = target_int_dtype
1438-
1439-
else:
1440-
if is_integer_dtype(inferred_dtype):
1441-
inferred_dtype = input_array.dtype
1439+
elif is_numeric_dtype(input_array.dtype):
1440+
# TODO: de-dup with maybe_cast_to_integer_array?
1441+
arr = input_array[notna(input_array)]
1442+
if (arr.astype(int) == arr).all():
1443+
inferred_dtype = target_int_dtype
1444+
else:
1445+
inferred_dtype = input_array.dtype
14421446

14431447
if convert_floating:
14441448
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
14451449
input_array.dtype
14461450
):
14471451
from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
14481452

1449-
inferred_float_dtype = FLOAT_STR_TO_DTYPE.get(
1450-
input_array.dtype.name, "Float64"
1453+
inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get(
1454+
input_array.dtype.name, pandas_dtype("Float64")
14511455
)
14521456
# if we could also convert to integer, check if all floats
14531457
# are actually integers
14541458
if convert_integer:
1459+
# TODO: de-dup with maybe_cast_to_integer_array?
14551460
arr = input_array[notna(input_array)]
14561461
if (arr.astype(int) == arr).all():
1457-
inferred_dtype = "Int64"
1462+
inferred_dtype = pandas_dtype("Int64")
14581463
else:
14591464
inferred_dtype = inferred_float_dtype
14601465
else:
14611466
inferred_dtype = inferred_float_dtype
1462-
else:
1463-
if is_float_dtype(inferred_dtype):
1464-
inferred_dtype = input_array.dtype
14651467

14661468
if convert_boolean:
14671469
if is_bool_dtype(input_array.dtype):
1468-
inferred_dtype = "boolean"
1469-
else:
1470-
if isinstance(inferred_dtype, str) and inferred_dtype == "boolean":
1471-
inferred_dtype = input_array.dtype
1470+
inferred_dtype = pandas_dtype("boolean")
1471+
elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean":
1472+
inferred_dtype = pandas_dtype("boolean")
1473+
1474+
if isinstance(inferred_dtype, str):
1475+
# If we couldn't do anything else, then we retain the dtype
1476+
inferred_dtype = input_array.dtype
14721477

14731478
else:
1474-
inferred_dtype = input_array.dtype
1479+
return input_array.dtype
14751480

14761481
return inferred_dtype
14771482

pandas/core/series.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -5065,10 +5065,7 @@ def _convert_dtypes(
50655065
convert_boolean,
50665066
convert_floating,
50675067
)
5068-
try:
5069-
result = input_series.astype(inferred_dtype)
5070-
except TypeError:
5071-
result = input_series.copy()
5068+
result = input_series.astype(inferred_dtype)
50725069
else:
50735070
result = input_series.copy()
50745071
return result

0 commit comments

Comments
 (0)