From 322a4cbabd6f08e25ce7c5c596b4c07704f205c4 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 06:22:53 +0100 Subject: [PATCH 1/6] ENH: Improve dtypes --- pandas-stubs/core/arrays/string_.pyi | 9 +- pandas-stubs/core/dtypes/dtypes.pyi | 84 +++++-------------- tests/test_dtypes.py | 118 +++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 68 deletions(-) create mode 100644 tests/test_dtypes.py diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 700510120..75229fc88 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,3 +1,7 @@ +from typing import Literal + +import numpy as np +import pandas as pd from pandas.core.arrays import PandasArray from pandas._typing import type_t @@ -5,12 +9,9 @@ from pandas._typing import type_t from pandas.core.dtypes.base import ExtensionDtype class StringDtype(ExtensionDtype): - name: str = ... - na_value = ... + def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ... @property def type(self) -> type_t: ... - @classmethod - def construct_array_type(cls) -> type_t[StringArray]: ... def __from_arrow__(self, array): ... class StringArray(PandasArray): diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index 15651cd91..317c40dec 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -1,15 +1,15 @@ -from typing import ( - Any, - Sequence, -) +import datetime as dt +from typing import Any +import numpy as np from pandas.core.indexes.base import Index +from pandas.core.series import Series -from pandas._libs.tslibs import ( # , timezones as timezones - Period as Period, - Timestamp, +from pandas._libs.tslibs import BaseOffset +from pandas._typing import ( + Ordered, + npt, ) -from pandas._typing import Ordered from .base import ExtensionDtype as ExtensionDtype @@ -32,94 +32,50 @@ class PandasExtensionDtype(ExtensionDtype): @classmethod def reset_cache(cls) -> None: ... -class CategoricalDtypeType(type): ... - class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): - name: _str = ... - type: type[CategoricalDtypeType] = ... - kind: _str = ... - str: _str = ... - base = ... def __init__( - self, categories: Sequence[Any] | None = ..., ordered: Ordered = ... + self, + categories: Series | Index | list[Any] | None = ..., + ordered: Ordered = ..., ) -> None: ... - @classmethod - def construct_from_string(cls, string: _str) -> CategoricalDtype: ... def __hash__(self) -> int: ... def __eq__(self, other) -> bool: ... - @classmethod - def construct_array_type(cls): ... - @staticmethod - def validate_ordered(ordered: Ordered) -> None: ... - @staticmethod - def validate_categories(categories, fastpath: bool = ...): ... - def update_dtype(self, dtype: _str | CategoricalDtype) -> CategoricalDtype: ... @property def categories(self) -> Index: ... @property def ordered(self) -> Ordered: ... class DatetimeTZDtype(PandasExtensionDtype): - type: type[Timestamp] = ... - kind: _str = ... - str: _str = ... - num: int = ... - base = ... - na_value = ... - def __init__(self, unit: _str = ..., tz=...) -> None: ... + def __init__( + self, unit: _str = ..., tz: str | int | dt.tzinfo | None = ... + ) -> None: ... @property def unit(self): ... @property def tz(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string: _str): ... @property def name(self) -> _str: ... def __hash__(self) -> int: ... def __eq__(self, other) -> bool: ... class PeriodDtype(PandasExtensionDtype): - type: type[Period] = ... - kind: _str = ... - str: _str = ... - base = ... - num: int = ... - def __new__(cls, freq=...): ... + def __new__(cls, freq: str | BaseOffset = ...): ... + def __hash__(self) -> int: ... + def __eq__(self, other) -> bool: ... @property def freq(self): ... - @classmethod - def construct_from_string(cls, string: _str): ... @property def name(self) -> _str: ... @property def na_value(self): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... - @classmethod - def is_dtype(cls, dtype) -> bool: ... - @classmethod - def construct_array_type(cls): ... def __from_arrow__(self, array): ... class IntervalDtype(PandasExtensionDtype): - name: _str = ... - kind: _str = ... - str: _str = ... - base = ... - num: int = ... - def __new__(cls, subtype=...): ... + def __new__(cls, subtype: str | npt.DTypeLike | None = ...): ... + def __hash__(self) -> int: ... + def __eq__(self, other) -> bool: ... @property def subtype(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string: _str): ... @property def type(self): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... - @classmethod - def is_dtype(cls, dtype) -> bool: ... def __from_arrow__(self, array): ... diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py new file mode 100644 index 000000000..8db3e3ff1 --- /dev/null +++ b/tests/test_dtypes.py @@ -0,0 +1,118 @@ +from datetime import ( + timedelta, + timezone, +) + +import numpy as np +import pandas as pd +import pyarrow as pa +from typing_extensions import assert_type + +from tests import check + +from pandas.tseries.offsets import ( + BusinessDay, + CustomBusinessDay, + Day, +) + + +def test_datetimetz_dtype() -> None: + check( + assert_type(pd.DatetimeTZDtype(unit="ns", tz="UTC"), pd.DatetimeTZDtype), + pd.DatetimeTZDtype, + ) + check( + assert_type( + pd.DatetimeTZDtype(unit="ns", tz=timezone(timedelta(hours=1))), + pd.DatetimeTZDtype, + ), + pd.DatetimeTZDtype, + ) + + +def test_period_dtype() -> None: + check(assert_type(pd.PeriodDtype(freq="D"), pd.PeriodDtype), pd.PeriodDtype) + check(assert_type(pd.PeriodDtype(freq=Day()), pd.PeriodDtype), pd.PeriodDtype) + check( + assert_type(pd.PeriodDtype(freq=BusinessDay()), pd.PeriodDtype), pd.PeriodDtype + ) + check( + assert_type(pd.PeriodDtype(freq=CustomBusinessDay()), pd.PeriodDtype), + pd.PeriodDtype, + ) + + +def test_interval_dtype() -> None: + check( + assert_type( + pd.Interval(pd.Timestamp("2017-01-01"), pd.Timestamp("2017-01-02")), + "pd.Interval[pd.Timestamp]", + ), + pd.Interval, + ) + check( + assert_type(pd.Interval(1, 2, closed="left"), "pd.Interval[int]"), pd.Interval + ) + check( + assert_type(pd.Interval(1.0, 2.5, closed="right"), "pd.Interval[float]"), + pd.Interval, + ) + check( + assert_type(pd.Interval(1.0, 2.5, closed="both"), "pd.Interval[float]"), + pd.Interval, + ) + check( + assert_type( + pd.Interval( + pd.Timedelta("1 day"), pd.Timedelta("2 days"), closed="neither" + ), + "pd.Interval[pd.Timedelta]", + ), + pd.Interval, + ) + + +def test_int64_dtype() -> None: + check(assert_type(pd.Int64Dtype(), pd.Int64Dtype), pd.Int64Dtype) + + +def test_categorical_dtype() -> None: + check( + assert_type( + pd.CategoricalDtype(categories=["a", "b", "c"], ordered=True), + pd.CategoricalDtype, + ), + pd.CategoricalDtype, + ) + check( + assert_type(pd.CategoricalDtype(categories=[1, 2, 3]), pd.CategoricalDtype), + pd.CategoricalDtype, + ) + + +def test_sparse_dtype() -> None: + check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(complex), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(bool), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(int), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.int64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(float), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.datetime64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(np.timedelta64), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype("datetime64"), pd.SparseDtype), pd.SparseDtype) + check(assert_type(pd.SparseDtype(), pd.SparseDtype), pd.SparseDtype) + + +def test_string_dtype() -> None: + check(assert_type(pd.StringDtype("pyarrow"), pd.StringDtype), pd.StringDtype) + check(assert_type(pd.StringDtype("python"), pd.StringDtype), pd.StringDtype) + + +def test_boolean_dtype() -> None: + check(assert_type(pd.BooleanDtype(), pd.BooleanDtype), pd.BooleanDtype) + + +def test_arrow_dtype() -> None: + check(assert_type(pd.ArrowDtype(pa.int64()), pd.ArrowDtype), pd.ArrowDtype) From df9762e1f05e70865a036f46f9d843b186d5e73e Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 16 Oct 2022 22:39:53 +0100 Subject: [PATCH 2/6] CLN: Remove unnecesssary parts of classes (eq, hash) --- pandas-stubs/core/dtypes/dtypes.pyi | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index 317c40dec..7a9f70650 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -13,22 +13,20 @@ from pandas._typing import ( from .base import ExtensionDtype as ExtensionDtype -_str = str - def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ... class BaseMaskedDtype(ExtensionDtype): ... class PandasExtensionDtype(ExtensionDtype): subdtype = ... - str: _str | None = ... + str: str | None = ... num: int = ... shape: tuple[int, ...] = ... itemsize: int = ... base = ... isbuiltin: int = ... isnative: int = ... - def __hash__(self) -> int: ... + @classmethod def reset_cache(cls) -> None: ... @@ -38,8 +36,6 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): categories: Series | Index | list[Any] | None = ..., ordered: Ordered = ..., ) -> None: ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... @property def categories(self) -> Index: ... @property @@ -47,33 +43,27 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): class DatetimeTZDtype(PandasExtensionDtype): def __init__( - self, unit: _str = ..., tz: str | int | dt.tzinfo | None = ... + self, unit: str = ..., tz: str | int | dt.tzinfo | None = ... ) -> None: ... @property def unit(self): ... @property def tz(self): ... @property - def name(self) -> _str: ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... + def name(self) -> str: ... class PeriodDtype(PandasExtensionDtype): def __new__(cls, freq: str | BaseOffset = ...): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... @property def freq(self): ... @property - def name(self) -> _str: ... + def name(self) -> str: ... @property def na_value(self): ... def __from_arrow__(self, array): ... class IntervalDtype(PandasExtensionDtype): def __new__(cls, subtype: str | npt.DTypeLike | None = ...): ... - def __hash__(self) -> int: ... - def __eq__(self, other) -> bool: ... @property def subtype(self): ... @property From a687e1f08ce6d33d9d22b5a22afbab359db62c43 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 18 Oct 2022 08:49:10 +0100 Subject: [PATCH 3/6] CLN/ENH: Restructure Dtypes and add typing info --- pandas-stubs/core/arrays/boolean.pyi | 12 +++------ pandas-stubs/core/arrays/integer.pyi | 31 ++++++++++++----------- pandas-stubs/core/arrays/numpy_.pyi | 18 ++++--------- pandas-stubs/core/arrays/sparse/dtype.pyi | 29 +++++++-------------- pandas-stubs/core/arrays/string_.pyi | 4 --- pandas-stubs/core/dtypes/base.pyi | 6 ++--- 6 files changed, 35 insertions(+), 65 deletions(-) diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 935948cc6..704745628 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,9 +1,7 @@ import numpy as np -from pandas._typing import ( - Scalar, - type_t, -) +from pandas._libs.missing import NAType +from pandas._typing import type_t from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype @@ -12,11 +10,7 @@ from .masked import BaseMaskedArray as BaseMaskedArray class BooleanDtype(ExtensionDtype): name: str = ... @property - def na_value(self) -> Scalar: ... - @property - def type(self) -> type_t: ... - @property - def kind(self) -> str: ... + def na_value(self) -> NAType: ... @classmethod def construct_array_type(cls) -> type_t[BooleanArray]: ... def __from_arrow__(self, array): ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index 30046a98a..1aba7a87d 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -1,26 +1,27 @@ +import numpy as np + +from pandas._libs.missing import NAType + from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype from .masked import BaseMaskedArray -_type = type - class _IntegerDtype(ExtensionDtype): - name: str - base = ... - type: _type - na_value = ... - def is_signed_integer(self): ... - def is_unsigned_integer(self): ... - def numpy_dtype(self): ... - def kind(self): ... - def itemsize(self): ... + base: None + @property + def na_value(self) -> NAType: ... + @property + def is_signed_integer(self) -> bool: ... + @property + def is_unsigned_integer(self) -> bool: ... + @property + def numpy_dtype(self) -> np.dtype: ... + @property + def itemsize(self) -> int: ... @classmethod - def construct_array_type(cls): ... + def construct_array_type(cls) -> type[IntegerArray]: ... def __from_arrow__(self, array): ... -def safe_cast(values, dtype, copy): ... -def coerce_to_array(values, dtype, mask=..., copy: bool = ...): ... - class IntegerArray(BaseMaskedArray): def dtype(self): ... def __init__(self, values, mask, copy: bool = ...) -> None: ... diff --git a/pandas-stubs/core/arrays/numpy_.pyi b/pandas-stubs/core/arrays/numpy_.pyi index f44e5a6af..793a43500 100644 --- a/pandas-stubs/core/arrays/numpy_.pyi +++ b/pandas-stubs/core/arrays/numpy_.pyi @@ -5,24 +5,16 @@ from pandas.core.arrays.base import ( ExtensionOpsMixin, ) +from pandas._typing import npt + from pandas.core.dtypes.dtypes import ExtensionDtype class PandasDtype(ExtensionDtype): - def __init__(self, dtype) -> None: ... - @property - def numpy_dtype(self): ... - @property - def name(self): ... - @property - def type(self): ... - @classmethod - def construct_from_string(cls, string): ... - @classmethod - def construct_array_type(cls): ... + def __init__(self, dtype: npt.DTypeLike) -> None: ... @property - def kind(self): ... + def numpy_dtype(self) -> np.dtype: ... @property - def itemsize(self): ... + def itemsize(self) -> int: ... class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ... diff --git a/pandas-stubs/core/arrays/sparse/dtype.pyi b/pandas-stubs/core/arrays/sparse/dtype.pyi index 630b5be03..706cc62cc 100644 --- a/pandas-stubs/core/arrays/sparse/dtype.pyi +++ b/pandas-stubs/core/arrays/sparse/dtype.pyi @@ -1,6 +1,9 @@ +import numpy as np + from pandas._typing import ( Dtype, Scalar, + npt, ) from pandas.core.dtypes.base import ExtensionDtype @@ -8,26 +11,12 @@ from pandas.core.dtypes.dtypes import ( register_extension_dtype as register_extension_dtype, ) -# merged types from pylance - class SparseDtype(ExtensionDtype): - def __init__(self, dtype: Dtype = ..., fill_value: Scalar | None = ...) -> None: ... - def __hash__(self): ... - def __eq__(self, other) -> bool: ... - @property - def fill_value(self): ... - @property - def kind(self): ... - @property - def type(self): ... + def __init__( + self, dtype: Dtype | npt.DTypeLike = ..., fill_value: Scalar | None = ... + ) -> None: ... @property - def subtype(self): ... + def fill_value(self) -> Scalar | None: ... @property - def name(self): ... - @classmethod - def construct_array_type(cls): ... - @classmethod - def construct_from_string(cls, string): ... - @classmethod - def is_dtype(cls, dtype): ... - def update_dtype(self, dtype): ... + def subtype(self) -> Dtype: ... + def update_dtype(self, dtype: SparseDtype | npt.DTypeLike): ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 75229fc88..7b87c79e2 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -4,14 +4,10 @@ import numpy as np import pandas as pd from pandas.core.arrays import PandasArray -from pandas._typing import type_t - from pandas.core.dtypes.base import ExtensionDtype class StringDtype(ExtensionDtype): def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ... - @property - def type(self) -> type_t: ... def __from_arrow__(self, array): ... class StringArray(PandasArray): diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index dc7ea85ce..8947f363a 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -3,9 +3,6 @@ from pandas.core.arrays import ExtensionArray from pandas._typing import type_t class ExtensionDtype: - def __eq__(self, other) -> bool: ... - def __hash__(self) -> int: ... - def __ne__(self, other) -> bool: ... @property def na_value(self): ... @property @@ -16,10 +13,11 @@ class ExtensionDtype: def name(self) -> str: ... @property def names(self) -> list[str] | None: ... + def empty(self, size: int | tuple[int, ...]) -> type_t[ExtensionArray]: ... @classmethod def construct_array_type(cls) -> type_t[ExtensionArray]: ... @classmethod - def construct_from_string(cls, string: str): ... + def construct_from_string(cls, string: str) -> ExtensionDtype: ... @classmethod def is_dtype(cls, dtype) -> bool: ... From 3f87ef0ca362cfc507f91100589b487721192f53 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 18 Oct 2022 19:04:06 +0100 Subject: [PATCH 4/6] ENH: Improvements to dtypes --- pandas-stubs/core/arrays/arrow/dtype.pyi | 4 + pandas-stubs/core/arrays/sparse/dtype.pyi | 2 +- pandas-stubs/core/arrays/string_.pyi | 4 + pandas-stubs/core/dtypes/dtypes.pyi | 28 +++--- tests/test_dtypes.py | 117 +++++++++++++++------- 5 files changed, 102 insertions(+), 53 deletions(-) diff --git a/pandas-stubs/core/arrays/arrow/dtype.pyi b/pandas-stubs/core/arrays/arrow/dtype.pyi index dc27fff7c..24fe8ea12 100644 --- a/pandas-stubs/core/arrays/arrow/dtype.pyi +++ b/pandas-stubs/core/arrays/arrow/dtype.pyi @@ -1,8 +1,12 @@ import numpy as np import pyarrow as pa +from pandas._libs.missing import NAType + from pandas.core.dtypes.base import StorageExtensionDtype class ArrowDtype(StorageExtensionDtype): pyarrow_dtype: pa.DataType def __init__(self, pyarrow_dtype: pa.DataType) -> None: ... + @property + def na_value(self) -> NAType: ... diff --git a/pandas-stubs/core/arrays/sparse/dtype.pyi b/pandas-stubs/core/arrays/sparse/dtype.pyi index 706cc62cc..8711f0c92 100644 --- a/pandas-stubs/core/arrays/sparse/dtype.pyi +++ b/pandas-stubs/core/arrays/sparse/dtype.pyi @@ -19,4 +19,4 @@ class SparseDtype(ExtensionDtype): def fill_value(self) -> Scalar | None: ... @property def subtype(self) -> Dtype: ... - def update_dtype(self, dtype: SparseDtype | npt.DTypeLike): ... + def update_dtype(self, dtype: SparseDtype | npt.DTypeLike) -> SparseDtype: ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 7b87c79e2..bc635555c 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -4,11 +4,15 @@ import numpy as np import pandas as pd from pandas.core.arrays import PandasArray +from pandas._libs.missing import NAType + from pandas.core.dtypes.base import ExtensionDtype class StringDtype(ExtensionDtype): def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ... def __from_arrow__(self, array): ... + @property + def na_value(self) -> NAType: ... class StringArray(PandasArray): def __init__(self, values, copy: bool = ...) -> None: ... diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index 7a9f70650..a85186456 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -1,10 +1,14 @@ import datetime as dt -from typing import Any +from typing import ( + Any, + Literal, +) import numpy as np from pandas.core.indexes.base import Index from pandas.core.series import Series +from pandas._libs import NaTType from pandas._libs.tslibs import BaseOffset from pandas._typing import ( Ordered, @@ -43,29 +47,25 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): class DatetimeTZDtype(PandasExtensionDtype): def __init__( - self, unit: str = ..., tz: str | int | dt.tzinfo | None = ... + self, unit: Literal["ns"] = ..., tz: str | int | dt.tzinfo | None = ... ) -> None: ... @property - def unit(self): ... + def unit(self) -> Literal["ns"]: ... @property - def tz(self): ... + def tz(self) -> dt.tzinfo: ... @property - def name(self) -> str: ... + def na_value(self) -> NaTType: ... class PeriodDtype(PandasExtensionDtype): - def __new__(cls, freq: str | BaseOffset = ...): ... - @property - def freq(self): ... + def __init__(self, freq: str | BaseOffset = ...): ... @property - def name(self) -> str: ... + def freq(self) -> BaseOffset: ... @property - def na_value(self): ... + def na_value(self) -> NaTType: ... def __from_arrow__(self, array): ... class IntervalDtype(PandasExtensionDtype): - def __new__(cls, subtype: str | npt.DTypeLike | None = ...): ... - @property - def subtype(self): ... + def __init__(self, subtype: str | npt.DTypeLike | None = ...): ... @property - def type(self): ... + def subtype(self) -> np.dtype | None: ... def __from_arrow__(self, array): ... diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 8db3e3ff1..5bec9360d 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -1,13 +1,29 @@ +from __future__ import annotations + +import datetime as dt from datetime import ( timedelta, timezone, ) +from typing import ( + Literal, + Optional, + Union, +) import numpy as np import pandas as pd +from pandas.core.arrays import ( + BooleanArray, + IntegerArray, +) import pyarrow as pa from typing_extensions import assert_type +from pandas._libs import NaTType +from pandas._libs.missing import NAType +from pandas._typing import Dtype + from tests import check from pandas.tseries.offsets import ( @@ -18,10 +34,8 @@ def test_datetimetz_dtype() -> None: - check( - assert_type(pd.DatetimeTZDtype(unit="ns", tz="UTC"), pd.DatetimeTZDtype), - pd.DatetimeTZDtype, - ) + dttz_dt = pd.DatetimeTZDtype(unit="ns", tz="UTC") + check(assert_type(dttz_dt, pd.DatetimeTZDtype), pd.DatetimeTZDtype) check( assert_type( pd.DatetimeTZDtype(unit="ns", tz=timezone(timedelta(hours=1))), @@ -29,10 +43,15 @@ def test_datetimetz_dtype() -> None: ), pd.DatetimeTZDtype, ) + check(assert_type(dttz_dt.unit, Literal["ns"]), str) + check(assert_type(dttz_dt.tz, dt.tzinfo), dt.tzinfo) + check(assert_type(dttz_dt.name, str), str) + check(assert_type(dttz_dt.na_value, NaTType), NaTType) def test_period_dtype() -> None: - check(assert_type(pd.PeriodDtype(freq="D"), pd.PeriodDtype), pd.PeriodDtype) + p_dt = pd.PeriodDtype(freq="D") + check(assert_type(p_dt, pd.PeriodDtype), pd.PeriodDtype) check(assert_type(pd.PeriodDtype(freq=Day()), pd.PeriodDtype), pd.PeriodDtype) check( assert_type(pd.PeriodDtype(freq=BusinessDay()), pd.PeriodDtype), pd.PeriodDtype @@ -41,61 +60,65 @@ def test_period_dtype() -> None: assert_type(pd.PeriodDtype(freq=CustomBusinessDay()), pd.PeriodDtype), pd.PeriodDtype, ) + check( + assert_type(p_dt.freq, pd.tseries.offsets.BaseOffset), + pd.tseries.offsets.DateOffset, + ) + check(assert_type(p_dt.na_value, NaTType), NaTType) + check(assert_type(p_dt.name, str), str) def test_interval_dtype() -> None: + i_dt = pd.IntervalDtype("int64") + check(assert_type(i_dt, pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(np.int64), pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(float), pd.IntervalDtype), pd.IntervalDtype) + check(assert_type(pd.IntervalDtype(complex), pd.IntervalDtype), pd.IntervalDtype) check( - assert_type( - pd.Interval(pd.Timestamp("2017-01-01"), pd.Timestamp("2017-01-02")), - "pd.Interval[pd.Timestamp]", - ), - pd.Interval, - ) - check( - assert_type(pd.Interval(1, 2, closed="left"), "pd.Interval[int]"), pd.Interval - ) - check( - assert_type(pd.Interval(1.0, 2.5, closed="right"), "pd.Interval[float]"), - pd.Interval, + assert_type(pd.IntervalDtype(np.timedelta64), pd.IntervalDtype), + pd.IntervalDtype, ) check( - assert_type(pd.Interval(1.0, 2.5, closed="both"), "pd.Interval[float]"), - pd.Interval, - ) - check( - assert_type( - pd.Interval( - pd.Timedelta("1 day"), pd.Timedelta("2 days"), closed="neither" - ), - "pd.Interval[pd.Timedelta]", - ), - pd.Interval, + assert_type(pd.IntervalDtype(np.datetime64), pd.IntervalDtype), pd.IntervalDtype ) def test_int64_dtype() -> None: + check(assert_type(pd.Int8Dtype(), pd.Int8Dtype), pd.Int8Dtype) + check(assert_type(pd.Int16Dtype(), pd.Int16Dtype), pd.Int16Dtype) + check(assert_type(pd.Int32Dtype(), pd.Int32Dtype), pd.Int32Dtype) check(assert_type(pd.Int64Dtype(), pd.Int64Dtype), pd.Int64Dtype) + check(assert_type(pd.UInt8Dtype(), pd.UInt8Dtype), pd.UInt8Dtype) + check(assert_type(pd.UInt16Dtype(), pd.UInt16Dtype), pd.UInt16Dtype) + check(assert_type(pd.UInt32Dtype(), pd.UInt32Dtype), pd.UInt32Dtype) + check(assert_type(pd.UInt64Dtype(), pd.UInt64Dtype), pd.UInt64Dtype) + + i64dt = pd.Int64Dtype() + check(assert_type(i64dt.itemsize, int), int) + check(assert_type(i64dt.na_value, NAType), NAType) + check(assert_type(i64dt.is_signed_integer, bool), bool) + check(assert_type(i64dt.is_unsigned_integer, bool), bool) + check(assert_type(i64dt.numpy_dtype, np.dtype), np.dtype) + check(assert_type(i64dt.construct_array_type(), type[IntegerArray]), type) def test_categorical_dtype() -> None: - check( - assert_type( - pd.CategoricalDtype(categories=["a", "b", "c"], ordered=True), - pd.CategoricalDtype, - ), - pd.CategoricalDtype, - ) + cdt = pd.CategoricalDtype(categories=["a", "b", "c"], ordered=True) + check(assert_type(cdt, pd.CategoricalDtype), pd.CategoricalDtype) check( assert_type(pd.CategoricalDtype(categories=[1, 2, 3]), pd.CategoricalDtype), pd.CategoricalDtype, ) + check(assert_type(cdt.categories, pd.Index), pd.Index) + assert check(assert_type(cdt.ordered, Optional[bool]), bool) def test_sparse_dtype() -> None: + s_dt = pd.SparseDtype("i4") + check(assert_type(s_dt, pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(complex), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(bool), pd.SparseDtype), pd.SparseDtype) - check(assert_type(pd.SparseDtype(int), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(np.int64), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(str), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(float), pd.SparseDtype), pd.SparseDtype) @@ -103,16 +126,34 @@ def test_sparse_dtype() -> None: check(assert_type(pd.SparseDtype(np.timedelta64), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype("datetime64"), pd.SparseDtype), pd.SparseDtype) check(assert_type(pd.SparseDtype(), pd.SparseDtype), pd.SparseDtype) + # pyright ignore because mypy does not like non-minimal unions, while pyright + # can't minimize to check + check( + assert_type( + s_dt.fill_value, # pyright: ignore[reportGeneralTypeIssues] + Union[str, bytes, dt.date, timedelta, complex, None], + ), + int, + ) + check(assert_type(s_dt.subtype, Dtype), np.dtype) + check(assert_type(s_dt.update_dtype(np.int64), pd.SparseDtype), pd.SparseDtype) def test_string_dtype() -> None: + s_dt = pd.StringDtype("pyarrow") check(assert_type(pd.StringDtype("pyarrow"), pd.StringDtype), pd.StringDtype) check(assert_type(pd.StringDtype("python"), pd.StringDtype), pd.StringDtype) + check(assert_type(s_dt.na_value, NAType), NAType) def test_boolean_dtype() -> None: - check(assert_type(pd.BooleanDtype(), pd.BooleanDtype), pd.BooleanDtype) + b_dt = pd.BooleanDtype() + check(assert_type(b_dt, pd.BooleanDtype), pd.BooleanDtype) + check(assert_type(b_dt.na_value, NAType), NAType) + check(assert_type(b_dt.construct_array_type(), type[BooleanArray]), type) def test_arrow_dtype() -> None: - check(assert_type(pd.ArrowDtype(pa.int64()), pd.ArrowDtype), pd.ArrowDtype) + a_dt = pd.ArrowDtype(pa.int64()) + check(assert_type(a_dt, pd.ArrowDtype), pd.ArrowDtype) + check(assert_type(a_dt.pyarrow_dtype, pa.DataType), pa.DataType) From f67279b5bd3b22e836b103883e28098ead112a77 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 19 Oct 2022 00:30:58 +0100 Subject: [PATCH 5/6] TST: Fix test for python 3.8 --- tests/test_dtypes.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 5bec9360d..98b258847 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -13,10 +13,8 @@ import numpy as np import pandas as pd -from pandas.core.arrays import ( - BooleanArray, - IntegerArray, -) +from pandas.core.arrays import BooleanArray # noqa: F401 +from pandas.core.arrays import IntegerArray # noqa: F401 import pyarrow as pa from typing_extensions import assert_type @@ -99,7 +97,7 @@ def test_int64_dtype() -> None: check(assert_type(i64dt.is_signed_integer, bool), bool) check(assert_type(i64dt.is_unsigned_integer, bool), bool) check(assert_type(i64dt.numpy_dtype, np.dtype), np.dtype) - check(assert_type(i64dt.construct_array_type(), type[IntegerArray]), type) + check(assert_type(i64dt.construct_array_type(), "type[IntegerArray]"), type) def test_categorical_dtype() -> None: @@ -150,7 +148,7 @@ def test_boolean_dtype() -> None: b_dt = pd.BooleanDtype() check(assert_type(b_dt, pd.BooleanDtype), pd.BooleanDtype) check(assert_type(b_dt.na_value, NAType), NAType) - check(assert_type(b_dt.construct_array_type(), type[BooleanArray]), type) + check(assert_type(b_dt.construct_array_type(), "type[BooleanArray]"), type) def test_arrow_dtype() -> None: From 9bae8b511707adc803b638645c2fb25afa3d7956 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 25 Nov 2022 14:38:51 +0000 Subject: [PATCH 6/6] CLN: Fix issues identified --- pandas-stubs/core/arrays/boolean.pyi | 1 - pandas-stubs/core/arrays/integer.pyi | 9 --------- pandas-stubs/core/arrays/numpy_.pyi | 4 +--- pandas-stubs/core/arrays/sparse/dtype.pyi | 5 ----- pandas-stubs/core/arrays/string_.pyi | 3 --- pandas-stubs/core/dtypes/base.pyi | 12 +++++++++--- pandas-stubs/core/dtypes/dtypes.pyi | 16 +--------------- tests/test_dtypes.py | 9 ++------- 8 files changed, 13 insertions(+), 46 deletions(-) diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 704745628..5a9e7ba60 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -13,7 +13,6 @@ class BooleanDtype(ExtensionDtype): def na_value(self) -> NAType: ... @classmethod def construct_array_type(cls) -> type_t[BooleanArray]: ... - def __from_arrow__(self, array): ... def coerce_to_array(values, mask=..., copy: bool = ...): ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index 1aba7a87d..803bf7ade 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -1,5 +1,3 @@ -import numpy as np - from pandas._libs.missing import NAType from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype @@ -11,16 +9,9 @@ class _IntegerDtype(ExtensionDtype): @property def na_value(self) -> NAType: ... @property - def is_signed_integer(self) -> bool: ... - @property - def is_unsigned_integer(self) -> bool: ... - @property - def numpy_dtype(self) -> np.dtype: ... - @property def itemsize(self) -> int: ... @classmethod def construct_array_type(cls) -> type[IntegerArray]: ... - def __from_arrow__(self, array): ... class IntegerArray(BaseMaskedArray): def dtype(self): ... diff --git a/pandas-stubs/core/arrays/numpy_.pyi b/pandas-stubs/core/arrays/numpy_.pyi index d1b57422d..af65d3fac 100644 --- a/pandas-stubs/core/arrays/numpy_.pyi +++ b/pandas-stubs/core/arrays/numpy_.pyi @@ -1,15 +1,13 @@ +import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin from pandas.core.arrays.base import ( ExtensionArray, ExtensionOpsMixin, ) -from pandas._typing import npt - from pandas.core.dtypes.dtypes import ExtensionDtype class PandasDtype(ExtensionDtype): - def __init__(self, dtype: npt.DTypeLike) -> None: ... @property def numpy_dtype(self) -> np.dtype: ... @property diff --git a/pandas-stubs/core/arrays/sparse/dtype.pyi b/pandas-stubs/core/arrays/sparse/dtype.pyi index 8711f0c92..23b736e3b 100644 --- a/pandas-stubs/core/arrays/sparse/dtype.pyi +++ b/pandas-stubs/core/arrays/sparse/dtype.pyi @@ -1,5 +1,3 @@ -import numpy as np - from pandas._typing import ( Dtype, Scalar, @@ -17,6 +15,3 @@ class SparseDtype(ExtensionDtype): ) -> None: ... @property def fill_value(self) -> Scalar | None: ... - @property - def subtype(self) -> Dtype: ... - def update_dtype(self, dtype: SparseDtype | npt.DTypeLike) -> SparseDtype: ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index bc635555c..4ae56b77c 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,7 +1,5 @@ from typing import Literal -import numpy as np -import pandas as pd from pandas.core.arrays import PandasArray from pandas._libs.missing import NAType @@ -10,7 +8,6 @@ from pandas.core.dtypes.base import ExtensionDtype class StringDtype(ExtensionDtype): def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ... - def __from_arrow__(self, array): ... @property def na_value(self) -> NAType: ... diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index 8947f363a..8d3ae4556 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -1,14 +1,20 @@ +from typing import Literal + from pandas.core.arrays import ExtensionArray +from pandas._libs import NaTType +from pandas._libs.missing import NAType from pandas._typing import type_t class ExtensionDtype: @property - def na_value(self): ... + def na_value(self) -> NAType | NaTType: ... @property def type(self) -> type_t: ... @property - def kind(self) -> str: ... + def kind( + self, + ) -> Literal["b", "i", "u", "f", "c", "m", "M", "O", "S", "U", "V"]: ... @property def name(self) -> str: ... @property @@ -19,6 +25,6 @@ class ExtensionDtype: @classmethod def construct_from_string(cls, string: str) -> ExtensionDtype: ... @classmethod - def is_dtype(cls, dtype) -> bool: ... + def is_dtype(cls, dtype: object) -> bool: ... class StorageExtensionDtype(ExtensionDtype): ... diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index a85186456..1ee3bfc7d 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -20,19 +20,7 @@ from .base import ExtensionDtype as ExtensionDtype def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ... class BaseMaskedDtype(ExtensionDtype): ... - -class PandasExtensionDtype(ExtensionDtype): - subdtype = ... - str: str | None = ... - num: int = ... - shape: tuple[int, ...] = ... - itemsize: int = ... - base = ... - isbuiltin: int = ... - isnative: int = ... - - @classmethod - def reset_cache(cls) -> None: ... +class PandasExtensionDtype(ExtensionDtype): ... class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): def __init__( @@ -62,10 +50,8 @@ class PeriodDtype(PandasExtensionDtype): def freq(self) -> BaseOffset: ... @property def na_value(self) -> NaTType: ... - def __from_arrow__(self, array): ... class IntervalDtype(PandasExtensionDtype): def __init__(self, subtype: str | npt.DTypeLike | None = ...): ... @property def subtype(self) -> np.dtype | None: ... - def __from_arrow__(self, array): ... diff --git a/tests/test_dtypes.py b/tests/test_dtypes.py index 98b258847..9a3257496 100644 --- a/tests/test_dtypes.py +++ b/tests/test_dtypes.py @@ -20,7 +20,7 @@ from pandas._libs import NaTType from pandas._libs.missing import NAType -from pandas._typing import Dtype +from pandas._typing import Scalar from tests import check @@ -94,9 +94,6 @@ def test_int64_dtype() -> None: i64dt = pd.Int64Dtype() check(assert_type(i64dt.itemsize, int), int) check(assert_type(i64dt.na_value, NAType), NAType) - check(assert_type(i64dt.is_signed_integer, bool), bool) - check(assert_type(i64dt.is_unsigned_integer, bool), bool) - check(assert_type(i64dt.numpy_dtype, np.dtype), np.dtype) check(assert_type(i64dt.construct_array_type(), "type[IntegerArray]"), type) @@ -129,12 +126,10 @@ def test_sparse_dtype() -> None: check( assert_type( s_dt.fill_value, # pyright: ignore[reportGeneralTypeIssues] - Union[str, bytes, dt.date, timedelta, complex, None], + Union[Scalar, None], ), int, ) - check(assert_type(s_dt.subtype, Dtype), np.dtype) - check(assert_type(s_dt.update_dtype(np.int64), pd.SparseDtype), pd.SparseDtype) def test_string_dtype() -> None: