Skip to content

Commit d2faa7f

Browse files
authored
ENH: Add support for dtype string aliases to Series#astype (#556)
* Add support for nullable integer data types to Series#astype * Add support for nullable float data types to Series#astype * Add support for nullable boolean data type to Series#astype * Add support for nullable string data type to Series#astype * Refactor dtype arg type aliases and add missing dtype aliases
1 parent 611c961 commit d2faa7f

File tree

2 files changed

+116
-26
lines changed

2 files changed

+116
-26
lines changed

pandas-stubs/_typing.pyi

+43-11
Original file line numberDiff line numberDiff line change
@@ -78,41 +78,73 @@ NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | ob
7878
Dtype: TypeAlias = ExtensionDtype | NpDtype
7979
DtypeArg: TypeAlias = Dtype | dict[Any, Dtype]
8080
BooleanDtypeArg: TypeAlias = (
81-
type[bool] | type[np.bool_] | pd.BooleanDtype | Literal["bool"]
81+
# Builtin bool type and its string alias
82+
type[bool] # noqa: Y030
83+
| Literal["bool"]
84+
# Pandas nullable boolean type and its string alias
85+
| pd.BooleanDtype
86+
| Literal["boolean"]
87+
# Numpy bool type
88+
| type[np.bool_]
8289
)
8390
IntDtypeArg: TypeAlias = (
84-
Literal["int", "int32"]
85-
| type[int]
91+
# Builtin integer type and its string alias
92+
type[int] # noqa: Y030
93+
| Literal["int"]
94+
# Pandas nullable integer types and their string aliases
8695
| pd.Int8Dtype
8796
| pd.Int16Dtype
8897
| pd.Int32Dtype
8998
| pd.Int64Dtype
99+
| Literal["Int8", "Int16", "Int32", "Int64"]
100+
# Numpy signed integer types and their string aliases
101+
| type[np.byte]
90102
| type[np.int8]
91103
| type[np.int16]
92104
| type[np.int32]
93105
| type[np.int64]
106+
| type[np.intp]
107+
| Literal["byte", "int8", "int16", "int32", "int64", "intp"]
108+
# Numpy unsigned integer types and their string aliases
109+
| type[np.ubyte]
94110
| type[np.uint8]
95111
| type[np.uint16]
96112
| type[np.uint32]
97113
| type[np.uint64]
98-
| type[np.intp]
99114
| type[np.uintp]
100-
| type[np.byte]
101-
| type[np.ubyte]
115+
| Literal["ubyte", "uint8", "uint16", "uint32", "uint64", "uintp"]
116+
)
117+
StrDtypeArg: TypeAlias = (
118+
# Builtin str type and its string alias
119+
type[str] # noqa: Y030
120+
| Literal["str"]
121+
# Pandas nullable string type and its string alias
122+
| pd.StringDtype
123+
| Literal["string"]
102124
)
103-
StrDtypeArg: TypeAlias = type[str] | pd.StringDtype | Literal["str"]
104125
BytesDtypeArg: TypeAlias = type[bytes]
105126
FloatDtypeArg: TypeAlias = (
106-
pd.Float32Dtype
127+
# Builtin float type and its string alias
128+
type[float] # noqa: Y030
129+
| Literal["float"]
130+
# Pandas nullable float types and their string aliases
131+
| pd.Float32Dtype
107132
| pd.Float64Dtype
133+
| Literal["Float32", "Float64"]
134+
# Numpy float types and their string aliases
108135
| type[np.float16]
109136
| type[np.float32]
110137
| type[np.float64]
111-
| type[float]
112-
| Literal["float"]
138+
| Literal["float16", "float32", "float64"]
113139
)
114140
ComplexDtypeArg: TypeAlias = (
115-
type[np.complex64] | type[np.complex128] | type[complex] | Literal["complex"]
141+
# Builtin complex type and its string alias
142+
type[complex] # noqa: Y030
143+
| Literal["complex"]
144+
# Numpy complex types and their aliases
145+
| type[np.complex64]
146+
| type[np.complex128]
147+
| Literal["complex64", "complex128"]
116148
)
117149
TimedeltaDtypeArg: TypeAlias = Literal["timedelta64[ns]"]
118150
TimestampDtypeArg: TypeAlias = Literal["datetime64[ns]"]

tests/test_series.py

+73-15
Original file line numberDiff line numberDiff line change
@@ -1449,32 +1449,81 @@ def test_updated_astype() -> None:
14491449
s = pd.Series([3, 4, 5])
14501450
s1 = pd.Series(True)
14511451

1452+
# Boolean types
1453+
1454+
# Builtin bool types
1455+
check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_)
1456+
check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_)
1457+
# Pandas nullable boolean types
1458+
check(
1459+
assert_type(s1.astype(pd.BooleanDtype()), "pd.Series[bool]"),
1460+
pd.Series,
1461+
np.bool_,
1462+
)
1463+
check(assert_type(s1.astype("boolean"), "pd.Series[bool]"), pd.Series, np.bool_)
1464+
# Numpy bool type
1465+
check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_)
1466+
1467+
# Integer types
1468+
1469+
# Builtin integer types
14521470
check(assert_type(s.astype(int), "pd.Series[int]"), pd.Series, np.integer)
14531471
check(assert_type(s.astype("int"), "pd.Series[int]"), pd.Series, np.integer)
1454-
check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.int32)
1472+
# Pandas nullable integer types
14551473
check(assert_type(s.astype(pd.Int8Dtype()), "pd.Series[int]"), pd.Series, np.int8)
14561474
check(assert_type(s.astype(pd.Int16Dtype()), "pd.Series[int]"), pd.Series, np.int16)
14571475
check(assert_type(s.astype(pd.Int32Dtype()), "pd.Series[int]"), pd.Series, np.int32)
14581476
check(assert_type(s.astype(pd.Int64Dtype()), "pd.Series[int]"), pd.Series, np.int64)
1477+
check(assert_type(s.astype("Int8"), "pd.Series[int]"), pd.Series, np.int8)
1478+
check(assert_type(s.astype("Int16"), "pd.Series[int]"), pd.Series, np.int16)
1479+
check(assert_type(s.astype("Int32"), "pd.Series[int]"), pd.Series, np.int32)
1480+
check(assert_type(s.astype("Int64"), "pd.Series[int]"), pd.Series, np.int64)
1481+
# Numpy signed integer types
1482+
check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte)
14591483
check(assert_type(s.astype(np.int8), "pd.Series[int]"), pd.Series, np.int8)
14601484
check(assert_type(s.astype(np.int16), "pd.Series[int]"), pd.Series, np.int16)
14611485
check(assert_type(s.astype(np.int32), "pd.Series[int]"), pd.Series, np.int32)
14621486
check(assert_type(s.astype(np.int64), "pd.Series[int]"), pd.Series, np.int64)
1487+
check(assert_type(s.astype(np.intp), "pd.Series[int]"), pd.Series, np.intp)
1488+
check(assert_type(s.astype("byte"), "pd.Series[int]"), pd.Series, np.byte)
1489+
check(assert_type(s.astype("int8"), "pd.Series[int]"), pd.Series, np.int8)
1490+
check(assert_type(s.astype("int16"), "pd.Series[int]"), pd.Series, np.int16)
1491+
check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.int32)
1492+
check(assert_type(s.astype("int64"), "pd.Series[int]"), pd.Series, np.int64)
1493+
check(assert_type(s.astype("intp"), "pd.Series[int]"), pd.Series, np.intp)
1494+
# Numpy unsigned integer types
1495+
check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte)
14631496
check(assert_type(s.astype(np.uint8), "pd.Series[int]"), pd.Series, np.uint8)
14641497
check(assert_type(s.astype(np.uint16), "pd.Series[int]"), pd.Series, np.uint16)
14651498
check(assert_type(s.astype(np.uint32), "pd.Series[int]"), pd.Series, np.uint32)
14661499
check(assert_type(s.astype(np.uint64), "pd.Series[int]"), pd.Series, np.uint64)
1467-
check(assert_type(s.astype(np.intp), "pd.Series[int]"), pd.Series, np.intp)
14681500
check(assert_type(s.astype(np.uintp), "pd.Series[int]"), pd.Series, np.uintp)
1469-
check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte)
1470-
check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte)
1501+
check(assert_type(s.astype("ubyte"), "pd.Series[int]"), pd.Series, np.ubyte)
1502+
check(assert_type(s.astype("uint8"), "pd.Series[int]"), pd.Series, np.uint8)
1503+
check(assert_type(s.astype("uint16"), "pd.Series[int]"), pd.Series, np.uint16)
1504+
check(assert_type(s.astype("uint32"), "pd.Series[int]"), pd.Series, np.uint32)
1505+
check(assert_type(s.astype("uint64"), "pd.Series[int]"), pd.Series, np.uint64)
1506+
check(assert_type(s.astype("uintp"), "pd.Series[int]"), pd.Series, np.uintp)
1507+
1508+
# String types
14711509

1510+
# Builtin str types
14721511
check(assert_type(s.astype(str), "pd.Series[str]"), pd.Series, str)
1473-
check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str)
14741512
check(assert_type(s.astype("str"), "pd.Series[str]"), pd.Series, str)
1513+
# Pandas nullable string types
1514+
check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str)
1515+
check(assert_type(s.astype("string"), "pd.Series[str]"), pd.Series, str)
1516+
1517+
# Bytes types
14751518

14761519
check(assert_type(s.astype(bytes), "pd.Series[bytes]"), pd.Series, bytes)
14771520

1521+
# Float types
1522+
1523+
# Builtin float types
1524+
check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float)
1525+
check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float)
1526+
# Pandas nullable float types
14781527
check(
14791528
assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]"),
14801529
pd.Series,
@@ -1485,12 +1534,22 @@ def test_updated_astype() -> None:
14851534
pd.Series,
14861535
np.float64,
14871536
)
1537+
check(assert_type(s.astype("Float32"), "pd.Series[float]"), pd.Series, np.float32)
1538+
check(assert_type(s.astype("Float64"), "pd.Series[float]"), pd.Series, np.float64)
1539+
# Numpy float types
14881540
check(assert_type(s.astype(np.float16), "pd.Series[float]"), pd.Series, np.float16)
14891541
check(assert_type(s.astype(np.float32), "pd.Series[float]"), pd.Series, np.float32)
14901542
check(assert_type(s.astype(np.float64), "pd.Series[float]"), pd.Series, np.float64)
1491-
check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float)
1492-
check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float)
1543+
check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.float16)
1544+
check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.float32)
1545+
check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64)
14931546

1547+
# Complex types
1548+
1549+
# Builtin complex types
1550+
check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex)
1551+
check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex)
1552+
# Numpy complex types
14941553
check(
14951554
assert_type(s.astype(np.complex64), "pd.Series[complex]"),
14961555
pd.Series,
@@ -1501,17 +1560,16 @@ def test_updated_astype() -> None:
15011560
pd.Series,
15021561
np.complex128,
15031562
)
1504-
check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex)
1505-
check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex)
1506-
15071563
check(
1508-
assert_type(s1.astype(pd.BooleanDtype()), "pd.Series[bool]"),
1564+
assert_type(s.astype("complex64"), "pd.Series[complex]"),
15091565
pd.Series,
1510-
np.bool_,
1566+
np.complex64,
1567+
)
1568+
check(
1569+
assert_type(s.astype("complex128"), "pd.Series[complex]"),
1570+
pd.Series,
1571+
np.complex128,
15111572
)
1512-
check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_)
1513-
check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_)
1514-
check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_)
15151573

15161574
check(
15171575
assert_type(s.astype("timedelta64[ns]"), TimedeltaSeries),

0 commit comments

Comments
 (0)