diff --git a/pandas-stubs/core/arrays/arrow/dtype.pyi b/pandas-stubs/core/arrays/arrow/dtype.pyi index 041012a7a..9b2f88e86 100644 --- a/pandas-stubs/core/arrays/arrow/dtype.pyi +++ b/pandas-stubs/core/arrays/arrow/dtype.pyi @@ -1,7 +1,11 @@ import pyarrow as pa +from pandas._libs.missing import NAType + from pandas.core.dtypes.base import StorageExtensionDtype class ArrowDtype(StorageExtensionDtype): pyarrow_dtype: pa.DataType def __init__(self, pyarrow_dtype: pa.DataType) -> None: ... + @property + def na_value(self) -> NAType: ... diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 935948cc6..5a9e7ba60 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,9 +1,7 @@ import numpy as np -from pandas._typing import ( - Scalar, - type_t, -) +from pandas._libs.missing import NAType +from pandas._typing import type_t from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype @@ -12,14 +10,9 @@ from .masked import BaseMaskedArray as BaseMaskedArray class BooleanDtype(ExtensionDtype): name: str = ... @property - def na_value(self) -> Scalar: ... - @property - def type(self) -> type_t: ... - @property - def kind(self) -> str: ... + def na_value(self) -> NAType: ... @classmethod def construct_array_type(cls) -> type_t[BooleanArray]: ... - def __from_arrow__(self, array): ... def coerce_to_array(values, mask=..., copy: bool = ...): ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index 30046a98a..803bf7ade 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -1,25 +1,17 @@ +from pandas._libs.missing import NAType + from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype from .masked import BaseMaskedArray -_type = type - class _IntegerDtype(ExtensionDtype): - name: str - base = ... - type: _type - na_value = ... - def is_signed_integer(self): ... - def is_unsigned_integer(self): ... - def numpy_dtype(self): ... - def kind(self): ... - def itemsize(self): ... + base: None + @property + def na_value(self) -> NAType: ... + @property + def itemsize(self) -> int: ... @classmethod - def construct_array_type(cls): ... - def __from_arrow__(self, array): ... - -def safe_cast(values, dtype, copy): ... -def coerce_to_array(values, dtype, mask=..., copy: bool = ...): ... + def construct_array_type(cls) -> type[IntegerArray]: ... class IntegerArray(BaseMaskedArray): def dtype(self): ... diff --git a/pandas-stubs/core/arrays/numpy_.pyi b/pandas-stubs/core/arrays/numpy_.pyi index 535b1905d..af65d3fac 100644 --- a/pandas-stubs/core/arrays/numpy_.pyi +++ b/pandas-stubs/core/arrays/numpy_.pyi @@ -1,3 +1,4 @@ +import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin from pandas.core.arrays.base import ( ExtensionArray, @@ -7,21 +8,10 @@ from pandas.core.arrays.base import ( from pandas.core.dtypes.dtypes import ExtensionDtype class PandasDtype(ExtensionDtype): - def __init__(self, dtype) -> None: ... @property - def numpy_dtype(self): ... + def numpy_dtype(self) -> np.dtype: ... @property - def name(self): ... - @property - def type(self): ... - @classmethod - def construct_from_string(cls, string): ... - @classmethod - def construct_array_type(cls): ... - @property - def kind(self): ... - @property - def itemsize(self): ... + def itemsize(self) -> int: ... class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ... diff --git a/pandas-stubs/core/arrays/sparse/dtype.pyi b/pandas-stubs/core/arrays/sparse/dtype.pyi index 630b5be03..23b736e3b 100644 --- a/pandas-stubs/core/arrays/sparse/dtype.pyi +++ b/pandas-stubs/core/arrays/sparse/dtype.pyi @@ -1,6 +1,7 @@ from pandas._typing import ( Dtype, Scalar, + npt, ) from pandas.core.dtypes.base import ExtensionDtype @@ -8,26 +9,9 @@ from pandas.core.dtypes.dtypes import ( register_extension_dtype as register_extension_dtype, ) -# merged types from pylance - class SparseDtype(ExtensionDtype): - def __init__(self, dtype: Dtype = ..., fill_value: Scalar | None = ...) -> None: ... - def __hash__(self): ... - def __eq__(self, other) -> bool: ... - @property - def fill_value(self): ... - @property - def kind(self): ... - @property - def type(self): ... - @property - def subtype(self): ... + def __init__( + self, dtype: Dtype | npt.DTypeLike = ..., fill_value: Scalar | None = ... + ) -> None: ... @property - def name(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string): ... - @classmethod - def is_dtype(cls, dtype): ... - def update_dtype(self, dtype): ... + def fill_value(self) -> Scalar | None: ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 700510120..4ae56b77c 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,17 +1,15 @@ +from typing import Literal + from pandas.core.arrays import PandasArray -from pandas._typing import type_t +from pandas._libs.missing import NAType from pandas.core.dtypes.base import ExtensionDtype class StringDtype(ExtensionDtype): - name: str = ... - na_value = ... + def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ... @property - def type(self) -> type_t: ... - @classmethod - def construct_array_type(cls) -> type_t[StringArray]: ... - def __from_arrow__(self, array): ... + def na_value(self) -> NAType: ... class StringArray(PandasArray): def __init__(self, values, copy: bool = ...) -> None: ... diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index dc7ea85ce..8d3ae4556 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -1,26 +1,30 @@ +from typing import Literal + from pandas.core.arrays import ExtensionArray +from pandas._libs import NaTType +from pandas._libs.missing import NAType from pandas._typing import type_t class ExtensionDtype: - def __eq__(self, other) -> bool: ... - def __hash__(self) -> int: ... - def __ne__(self, other) -> bool: ... @property - def na_value(self): ... + def na_value(self) -> NAType | NaTType: ... @property def type(self) -> type_t: ... @property - def kind(self) -> str: ... + def kind( + self, + ) -> Literal["b", "i", "u", "f", "c", "m", "M", "O", "S", "U", "V"]: ... @property def name(self) -> str: ... @property def names(self) -> list[str] | None: ... + def empty(self, size: int | tuple[int, ...]) -> type_t[ExtensionArray]: ... @classmethod def construct_array_type(cls) -> type_t[ExtensionArray]: ... @classmethod - def construct_from_string(cls, string: str): ... + def construct_from_string(cls, string: str) -> ExtensionDtype: ... @classmethod - def is_dtype(cls, dtype) -> bool: ... + def is_dtype(cls, dtype: object) -> bool: ... class StorageExtensionDtype(ExtensionDtype): ... diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index 15651cd91..1ee3bfc7d 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -1,125 +1,57 @@ +import datetime as dt from typing import ( Any, - Sequence, + Literal, ) +import numpy as np from pandas.core.indexes.base import Index +from pandas.core.series import Series -from pandas._libs.tslibs import ( # , timezones as timezones - Period as Period, - Timestamp, +from pandas._libs import NaTType +from pandas._libs.tslibs import BaseOffset +from pandas._typing import ( + Ordered, + npt, ) -from pandas._typing import Ordered from .base import ExtensionDtype as ExtensionDtype -_str = str - def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ... class BaseMaskedDtype(ExtensionDtype): ... - -class PandasExtensionDtype(ExtensionDtype): - subdtype = ... - str: _str | None = ... - num: int = ... - shape: tuple[int, ...] = ... - itemsize: int = ... - base = ... - isbuiltin: int = ... - isnative: int = ... - def __hash__(self) -> int: ... - @classmethod - def reset_cache(cls) -> None: ... - -class CategoricalDtypeType(type): ... +class PandasExtensionDtype(ExtensionDtype): ... class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): - name: _str = ... - type: type[CategoricalDtypeType] = ... - kind: _str = ... - str: _str = ... - base = ... def __init__( - self, categories: Sequence[Any] | None = ..., ordered: Ordered = ... + self, + categories: Series | Index | list[Any] | None = ..., + ordered: Ordered = ..., ) -> None: ... - @classmethod - def construct_from_string(cls, string: _str) -> CategoricalDtype: ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... - @classmethod - def construct_array_type(cls): ... - @staticmethod - def validate_ordered(ordered: Ordered) -> None: ... - @staticmethod - def validate_categories(categories, fastpath: bool = ...): ... - def update_dtype(self, dtype: _str | CategoricalDtype) -> CategoricalDtype: ... @property def categories(self) -> Index: ... @property def ordered(self) -> Ordered: ... class DatetimeTZDtype(PandasExtensionDtype): - type: type[Timestamp] = ... - kind: _str = ... - str: _str = ... - num: int = ... - base = ... - na_value = ... - def __init__(self, unit: _str = ..., tz=...) -> None: ... + def __init__( + self, unit: Literal["ns"] = ..., tz: str | int | dt.tzinfo | None = ... + ) -> None: ... @property - def unit(self): ... + def unit(self) -> Literal["ns"]: ... @property - def tz(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string: _str): ... + def tz(self) -> dt.tzinfo: ... @property - def name(self) -> _str: ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... + def na_value(self) -> NaTType: ... class PeriodDtype(PandasExtensionDtype): - type: type[Period] = ... - kind: _str = ... - str: _str = ... - base = ... - num: int = ... - def __new__(cls, freq=...): ... - @property - def freq(self): ... - @classmethod - def construct_from_string(cls, string: _str): ... + def __init__(self, freq: str | BaseOffset = ...): ... @property - def name(self) -> _str: ... + def freq(self) -> BaseOffset: ... @property - def na_value(self): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... - @classmethod - def is_dtype(cls, dtype) -> bool: ... - @classmethod - def construct_array_type(cls): ... - def __from_arrow__(self, array): ... + def na_value(self) -> NaTType: ... class IntervalDtype(PandasExtensionDtype): - name: _str = ... - kind: _str = ... - str: _str = ... - base = ... - num: int = ... - def __new__(cls, subtype=...): ... - @property - def subtype(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string: _str): ... + def __init__(self, subtype: str | npt.DTypeLike | None = ...): ... @property - def type(self): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... - @classmethod - def is_dtype(cls, dtype) -> bool: ... - def __from_arrow__(self, array): ... + def subtype(self) -> np.dtype | None: ... diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py new file mode 100644 index 000000000..9a3257496 --- /dev/null +++ b/tests/test_dtypes.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import datetime as dt +from datetime import ( + timedelta, + timezone, +) +from typing import ( + Literal, + Optional, + Union, +) + +import numpy as np +import pandas as pd +from pandas.core.arrays import BooleanArray # noqa: F401 +from pandas.core.arrays import IntegerArray # noqa: F401 +import pyarrow as pa +from typing_extensions import assert_type + +from pandas._libs import NaTType +from pandas._libs.missing import NAType +from pandas._typing import Scalar + +from tests import check + +from pandas.tseries.offsets import ( + BusinessDay, + CustomBusinessDay, + Day, +) + + +def test_datetimetz_dtype() -> None: + dttz_dt = pd.DatetimeTZDtype(unit="ns", tz="UTC") + check(assert_type(dttz_dt, pd.DatetimeTZDtype), pd.DatetimeTZDtype) + check( + assert_type( + pd.DatetimeTZDtype(unit="ns", tz=timezone(timedelta(hours=1))), + pd.DatetimeTZDtype, + ), + pd.DatetimeTZDtype, + ) + check(assert_type(dttz_dt.unit, Literal["ns"]), str) + check(assert_type(dttz_dt.tz, dt.tzinfo), dt.tzinfo) + check(assert_type(dttz_dt.name, str), str) + check(assert_type(dttz_dt.na_value, NaTType), NaTType) + + +def test_period_dtype() -> None: + p_dt = pd.PeriodDtype(freq="D") + check(assert_type(p_dt, pd.PeriodDtype), pd.PeriodDtype) + check(assert_type(pd.PeriodDtype(freq=Day()), pd.PeriodDtype), pd.PeriodDtype) + check( + assert_type(pd.PeriodDtype(freq=BusinessDay()), pd.PeriodDtype), pd.PeriodDtype + ) + check( + assert_type(pd.PeriodDtype(freq=CustomBusinessDay()), pd.PeriodDtype), + pd.PeriodDtype, + ) + check( + assert_type(p_dt.freq, pd.tseries.offsets.BaseOffset), + pd.tseries.offsets.DateOffset, + ) + check(assert_type(p_dt.na_value, NaTType), NaTType) + check(assert_type(p_dt.name, str), str) + + +def test_interval_dtype() -> None: + i_dt = pd.IntervalDtype("int64") + check(assert_type(i_dt, pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(np.int64), pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(float), pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(complex), pd.IntervalDtype), pd.IntervalDtype) + check( + assert_type(pd.IntervalDtype(np.timedelta64), pd.IntervalDtype), + pd.IntervalDtype, + ) + check( + assert_type(pd.IntervalDtype(np.datetime64), pd.IntervalDtype), pd.IntervalDtype + ) + + +def test_int64_dtype() -> None: + check(assert_type(pd.Int8Dtype(), pd.Int8Dtype), pd.Int8Dtype) + check(assert_type(pd.Int16Dtype(), pd.Int16Dtype), pd.Int16Dtype) + check(assert_type(pd.Int32Dtype(), pd.Int32Dtype), pd.Int32Dtype) + check(assert_type(pd.Int64Dtype(), pd.Int64Dtype), pd.Int64Dtype) + check(assert_type(pd.UInt8Dtype(), pd.UInt8Dtype), pd.UInt8Dtype) + check(assert_type(pd.UInt16Dtype(), pd.UInt16Dtype), pd.UInt16Dtype) + check(assert_type(pd.UInt32Dtype(), pd.UInt32Dtype), pd.UInt32Dtype) + check(assert_type(pd.UInt64Dtype(), pd.UInt64Dtype), pd.UInt64Dtype) + + i64dt = pd.Int64Dtype() + check(assert_type(i64dt.itemsize, int), int) + check(assert_type(i64dt.na_value, NAType), NAType) + check(assert_type(i64dt.construct_array_type(), "type[IntegerArray]"), type) + + +def test_categorical_dtype() -> None: + cdt = pd.CategoricalDtype(categories=["a", "b", "c"], ordered=True) + check(assert_type(cdt, pd.CategoricalDtype), pd.CategoricalDtype) + check( + assert_type(pd.CategoricalDtype(categories=[1, 2, 3]), pd.CategoricalDtype), + pd.CategoricalDtype, + ) + check(assert_type(cdt.categories, pd.Index), pd.Index) + assert check(assert_type(cdt.ordered, Optional[bool]), bool) + + +def test_sparse_dtype() -> None: + s_dt = pd.SparseDtype("i4") + check(assert_type(s_dt, pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(complex), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(bool), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.int64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(float), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.datetime64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.timedelta64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype("datetime64"), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(), pd.SparseDtype), pd.SparseDtype) + # pyright ignore because mypy does not like non-minimal unions, while pyright + # can't minimize to check + check( + assert_type( + s_dt.fill_value, # pyright: ignore[reportGeneralTypeIssues] + Union[Scalar, None], + ), + int, + ) + + +def test_string_dtype() -> None: + s_dt = pd.StringDtype("pyarrow") + check(assert_type(pd.StringDtype("pyarrow"), pd.StringDtype), pd.StringDtype) + check(assert_type(pd.StringDtype("python"), pd.StringDtype), pd.StringDtype) + check(assert_type(s_dt.na_value, NAType), NAType) + + +def test_boolean_dtype() -> None: + b_dt = pd.BooleanDtype() + check(assert_type(b_dt, pd.BooleanDtype), pd.BooleanDtype) + check(assert_type(b_dt.na_value, NAType), NAType) + check(assert_type(b_dt.construct_array_type(), "type[BooleanArray]"), type) + + +def test_arrow_dtype() -> None: + a_dt = pd.ArrowDtype(pa.int64()) + check(assert_type(a_dt, pd.ArrowDtype), pd.ArrowDtype) + check(assert_type(a_dt.pyarrow_dtype, pa.DataType), pa.DataType)