@@ -1386,7 +1386,7 @@ def convert_dtypes(
1386
1386
convert_integer : bool = True ,
1387
1387
convert_boolean : bool = True ,
1388
1388
convert_floating : bool = True ,
1389
- ) -> Dtype :
1389
+ ) -> DtypeObj :
1390
1390
"""
1391
1391
Convert objects to best possible type, and optionally,
1392
1392
to types supporting ``pd.NA``.
@@ -1407,71 +1407,76 @@ def convert_dtypes(
1407
1407
1408
1408
Returns
1409
1409
-------
1410
- str, np.dtype, or ExtensionDtype
1411
- dtype
1412
- new dtype
1410
+ np.dtype, or ExtensionDtype
1413
1411
"""
1414
- inferred_dtype : str | np .dtype | ExtensionDtype
1415
- # TODO: rule out str
1412
+ inferred_dtype : str | DtypeObj
1416
1413
1417
1414
if (
1418
1415
convert_string or convert_integer or convert_boolean or convert_floating
1419
1416
) and isinstance (input_array , np .ndarray ):
1420
- inferred_dtype = lib .infer_dtype (input_array )
1421
1417
1422
- if not convert_string and is_string_dtype (inferred_dtype ):
1418
+ if is_object_dtype (input_array .dtype ):
1419
+ inferred_dtype = lib .infer_dtype (input_array )
1420
+ else :
1423
1421
inferred_dtype = input_array .dtype
1424
1422
1423
+ if is_string_dtype (inferred_dtype ):
1424
+ if not convert_string :
1425
+ inferred_dtype = input_array .dtype
1426
+ else :
1427
+ inferred_dtype = pandas_dtype ("string" )
1428
+ return inferred_dtype
1429
+
1425
1430
if convert_integer :
1426
- target_int_dtype = "Int64"
1431
+ target_int_dtype = pandas_dtype ( "Int64" )
1427
1432
1428
1433
if is_integer_dtype (input_array .dtype ):
1429
1434
from pandas .core .arrays .integer import INT_STR_TO_DTYPE
1430
1435
1431
1436
inferred_dtype = INT_STR_TO_DTYPE .get (
1432
1437
input_array .dtype .name , target_int_dtype
1433
1438
)
1434
- if not is_integer_dtype (input_array .dtype ) and is_numeric_dtype (
1435
- input_array .dtype
1436
- ):
1437
- inferred_dtype = target_int_dtype
1438
-
1439
- else :
1440
- if is_integer_dtype (inferred_dtype ):
1441
- inferred_dtype = input_array .dtype
1439
+ elif is_numeric_dtype (input_array .dtype ):
1440
+ # TODO: de-dup with maybe_cast_to_integer_array?
1441
+ arr = input_array [notna (input_array )]
1442
+ if (arr .astype (int ) == arr ).all ():
1443
+ inferred_dtype = target_int_dtype
1444
+ else :
1445
+ inferred_dtype = input_array .dtype
1442
1446
1443
1447
if convert_floating :
1444
1448
if not is_integer_dtype (input_array .dtype ) and is_numeric_dtype (
1445
1449
input_array .dtype
1446
1450
):
1447
1451
from pandas .core .arrays .floating import FLOAT_STR_TO_DTYPE
1448
1452
1449
- inferred_float_dtype = FLOAT_STR_TO_DTYPE .get (
1450
- input_array .dtype .name , "Float64"
1453
+ inferred_float_dtype : DtypeObj = FLOAT_STR_TO_DTYPE .get (
1454
+ input_array .dtype .name , pandas_dtype ( "Float64" )
1451
1455
)
1452
1456
# if we could also convert to integer, check if all floats
1453
1457
# are actually integers
1454
1458
if convert_integer :
1459
+ # TODO: de-dup with maybe_cast_to_integer_array?
1455
1460
arr = input_array [notna (input_array )]
1456
1461
if (arr .astype (int ) == arr ).all ():
1457
- inferred_dtype = "Int64"
1462
+ inferred_dtype = pandas_dtype ( "Int64" )
1458
1463
else :
1459
1464
inferred_dtype = inferred_float_dtype
1460
1465
else :
1461
1466
inferred_dtype = inferred_float_dtype
1462
- else :
1463
- if is_float_dtype (inferred_dtype ):
1464
- inferred_dtype = input_array .dtype
1465
1467
1466
1468
if convert_boolean :
1467
1469
if is_bool_dtype (input_array .dtype ):
1468
- inferred_dtype = "boolean"
1469
- else :
1470
- if isinstance (inferred_dtype , str ) and inferred_dtype == "boolean" :
1471
- inferred_dtype = input_array .dtype
1470
+ inferred_dtype = pandas_dtype ("boolean" )
1471
+ elif isinstance (inferred_dtype , str ) and inferred_dtype == "boolean" :
1472
+ inferred_dtype = pandas_dtype ("boolean" )
1473
+
1474
+ if isinstance (inferred_dtype , str ):
1475
+ # If we couldn't do anything else, then we retain the dtype
1476
+ inferred_dtype = input_array .dtype
1472
1477
1473
1478
else :
1474
- inferred_dtype = input_array .dtype
1479
+ return input_array .dtype
1475
1480
1476
1481
return inferred_dtype
1477
1482
0 commit comments