Skip to content

ENH: Add support for dtype string aliases to Series#astype #556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 43 additions & 11 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -78,41 +78,73 @@ NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | ob
Dtype: TypeAlias = ExtensionDtype | NpDtype
DtypeArg: TypeAlias = Dtype | dict[Any, Dtype]
BooleanDtypeArg: TypeAlias = (
type[bool] | type[np.bool_] | pd.BooleanDtype | Literal["bool"]
# Builtin bool type and its string alias
type[bool] # noqa: Y030
| Literal["bool"]
# Pandas nullable boolean type and its string alias
| pd.BooleanDtype
| Literal["boolean"]
# Numpy bool type
| type[np.bool_]
)
IntDtypeArg: TypeAlias = (
Literal["int", "int32"]
| type[int]
# Builtin integer type and its string alias
type[int] # noqa: Y030
| Literal["int"]
# Pandas nullable integer types and their string aliases
| pd.Int8Dtype
| pd.Int16Dtype
| pd.Int32Dtype
| pd.Int64Dtype
| Literal["Int8", "Int16", "Int32", "Int64"]
# Numpy signed integer types and their string aliases
| type[np.byte]
| type[np.int8]
| type[np.int16]
| type[np.int32]
| type[np.int64]
| type[np.intp]
| Literal["byte", "int8", "int16", "int32", "int64", "intp"]
# Numpy unsigned integer types and their string aliases
| type[np.ubyte]
| type[np.uint8]
| type[np.uint16]
| type[np.uint32]
| type[np.uint64]
| type[np.intp]
| type[np.uintp]
| type[np.byte]
| type[np.ubyte]
| Literal["ubyte", "uint8", "uint16", "uint32", "uint64", "uintp"]
)
StrDtypeArg: TypeAlias = (
# Builtin str type and its string alias
type[str] # noqa: Y030
| Literal["str"]
# Pandas nullable string type and its string alias
| pd.StringDtype
| Literal["string"]
)
StrDtypeArg: TypeAlias = type[str] | pd.StringDtype | Literal["str"]
BytesDtypeArg: TypeAlias = type[bytes]
FloatDtypeArg: TypeAlias = (
pd.Float32Dtype
# Builtin float type and its string alias
type[float] # noqa: Y030
| Literal["float"]
# Pandas nullable float types and their string aliases
| pd.Float32Dtype
| pd.Float64Dtype
| Literal["Float32", "Float64"]
# Numpy float types and their string aliases
| type[np.float16]
| type[np.float32]
| type[np.float64]
| type[float]
| Literal["float"]
| Literal["float16", "float32", "float64"]
)
ComplexDtypeArg: TypeAlias = (
type[np.complex64] | type[np.complex128] | type[complex] | Literal["complex"]
# Builtin complex type and its string alias
type[complex] # noqa: Y030
| Literal["complex"]
# Numpy complex types and their aliases
| type[np.complex64]
| type[np.complex128]
| Literal["complex64", "complex128"]
)
TimedeltaDtypeArg: TypeAlias = Literal["timedelta64[ns]"]
TimestampDtypeArg: TypeAlias = Literal["datetime64[ns]"]
Expand Down
88 changes: 73 additions & 15 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1449,32 +1449,81 @@ def test_updated_astype() -> None:
s = pd.Series([3, 4, 5])
s1 = pd.Series(True)

# Boolean types

# Builtin bool types
check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_)
# Pandas nullable boolean types
check(
assert_type(s1.astype(pd.BooleanDtype()), "pd.Series[bool]"),
pd.Series,
np.bool_,
)
check(assert_type(s1.astype("boolean"), "pd.Series[bool]"), pd.Series, np.bool_)
# Numpy bool type
check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_)

# Integer types

# Builtin integer types
check(assert_type(s.astype(int), "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.astype("int"), "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.int32)
# Pandas nullable integer types
check(assert_type(s.astype(pd.Int8Dtype()), "pd.Series[int]"), pd.Series, np.int8)
check(assert_type(s.astype(pd.Int16Dtype()), "pd.Series[int]"), pd.Series, np.int16)
check(assert_type(s.astype(pd.Int32Dtype()), "pd.Series[int]"), pd.Series, np.int32)
check(assert_type(s.astype(pd.Int64Dtype()), "pd.Series[int]"), pd.Series, np.int64)
check(assert_type(s.astype("Int8"), "pd.Series[int]"), pd.Series, np.int8)
check(assert_type(s.astype("Int16"), "pd.Series[int]"), pd.Series, np.int16)
check(assert_type(s.astype("Int32"), "pd.Series[int]"), pd.Series, np.int32)
check(assert_type(s.astype("Int64"), "pd.Series[int]"), pd.Series, np.int64)
# Numpy signed integer types
check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte)
check(assert_type(s.astype(np.int8), "pd.Series[int]"), pd.Series, np.int8)
check(assert_type(s.astype(np.int16), "pd.Series[int]"), pd.Series, np.int16)
check(assert_type(s.astype(np.int32), "pd.Series[int]"), pd.Series, np.int32)
check(assert_type(s.astype(np.int64), "pd.Series[int]"), pd.Series, np.int64)
check(assert_type(s.astype(np.intp), "pd.Series[int]"), pd.Series, np.intp)
check(assert_type(s.astype("byte"), "pd.Series[int]"), pd.Series, np.byte)
check(assert_type(s.astype("int8"), "pd.Series[int]"), pd.Series, np.int8)
check(assert_type(s.astype("int16"), "pd.Series[int]"), pd.Series, np.int16)
check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.int32)
check(assert_type(s.astype("int64"), "pd.Series[int]"), pd.Series, np.int64)
check(assert_type(s.astype("intp"), "pd.Series[int]"), pd.Series, np.intp)
# Numpy unsigned integer types
check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte)
check(assert_type(s.astype(np.uint8), "pd.Series[int]"), pd.Series, np.uint8)
check(assert_type(s.astype(np.uint16), "pd.Series[int]"), pd.Series, np.uint16)
check(assert_type(s.astype(np.uint32), "pd.Series[int]"), pd.Series, np.uint32)
check(assert_type(s.astype(np.uint64), "pd.Series[int]"), pd.Series, np.uint64)
check(assert_type(s.astype(np.intp), "pd.Series[int]"), pd.Series, np.intp)
check(assert_type(s.astype(np.uintp), "pd.Series[int]"), pd.Series, np.uintp)
check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte)
check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte)
check(assert_type(s.astype("ubyte"), "pd.Series[int]"), pd.Series, np.ubyte)
check(assert_type(s.astype("uint8"), "pd.Series[int]"), pd.Series, np.uint8)
check(assert_type(s.astype("uint16"), "pd.Series[int]"), pd.Series, np.uint16)
check(assert_type(s.astype("uint32"), "pd.Series[int]"), pd.Series, np.uint32)
check(assert_type(s.astype("uint64"), "pd.Series[int]"), pd.Series, np.uint64)
check(assert_type(s.astype("uintp"), "pd.Series[int]"), pd.Series, np.uintp)

# String types

# Builtin str types
check(assert_type(s.astype(str), "pd.Series[str]"), pd.Series, str)
check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str)
check(assert_type(s.astype("str"), "pd.Series[str]"), pd.Series, str)
# Pandas nullable string types
check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str)
check(assert_type(s.astype("string"), "pd.Series[str]"), pd.Series, str)

# Bytes types

check(assert_type(s.astype(bytes), "pd.Series[bytes]"), pd.Series, bytes)

# Float types

# Builtin float types
check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float)
check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float)
# Pandas nullable float types
check(
assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]"),
pd.Series,
Expand All @@ -1485,12 +1534,22 @@ def test_updated_astype() -> None:
pd.Series,
np.float64,
)
check(assert_type(s.astype("Float32"), "pd.Series[float]"), pd.Series, np.float32)
check(assert_type(s.astype("Float64"), "pd.Series[float]"), pd.Series, np.float64)
# Numpy float types
check(assert_type(s.astype(np.float16), "pd.Series[float]"), pd.Series, np.float16)
check(assert_type(s.astype(np.float32), "pd.Series[float]"), pd.Series, np.float32)
check(assert_type(s.astype(np.float64), "pd.Series[float]"), pd.Series, np.float64)
check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float)
check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float)
check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.float16)
check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.float32)
check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64)

# Complex types

# Builtin complex types
check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex)
check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex)
# Numpy complex types
check(
assert_type(s.astype(np.complex64), "pd.Series[complex]"),
pd.Series,
Expand All @@ -1501,17 +1560,16 @@ def test_updated_astype() -> None:
pd.Series,
np.complex128,
)
check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex)
check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex)

check(
assert_type(s1.astype(pd.BooleanDtype()), "pd.Series[bool]"),
assert_type(s.astype("complex64"), "pd.Series[complex]"),
pd.Series,
np.bool_,
np.complex64,
)
check(
assert_type(s.astype("complex128"), "pd.Series[complex]"),
pd.Series,
np.complex128,
)
check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_)

check(
assert_type(s.astype("timedelta64[ns]"), TimedeltaSeries),
Expand Down