From 4bae5e7723b65f73053743d5c16b2cb59bbf02d8 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 25 Feb 2023 16:06:28 -0500 Subject: [PATCH 1/4] Support an ExtensionDtype and ExtensionArray --- pandas-stubs/_libs/lib.pyi | 28 ++- pandas-stubs/_typing.pyi | 5 +- pandas-stubs/api/extensions/__init__.pyi | 3 + pandas-stubs/core/algorithms.pyi | 9 + pandas-stubs/core/arraylike.pyi | 8 + pandas-stubs/core/arrays/base.pyi | 39 +-- pandas-stubs/core/arrays/categorical.pyi | 5 +- pandas-stubs/core/arrays/datetimelike.pyi | 6 +- pandas-stubs/core/arrays/interval.pyi | 24 +- pandas-stubs/core/arrays/masked.pyi | 5 +- pandas-stubs/core/arrays/sparse/array.pyi | 8 +- pandas-stubs/core/dtypes/base.pyi | 17 +- pandas-stubs/core/dtypes/dtypes.pyi | 5 +- pyproject.toml | 2 +- tests/extension/__init__.py | 0 tests/extension/decimal/__init__.py | 0 tests/extension/decimal/array.py | 288 ++++++++++++++++++++++ tests/test_extension.py | 16 ++ 18 files changed, 423 insertions(+), 45 deletions(-) create mode 100644 tests/extension/__init__.py create mode 100644 tests/extension/decimal/__init__.py create mode 100644 tests/extension/decimal/array.py create mode 100644 tests/test_extension.py diff --git a/pandas-stubs/_libs/lib.pyi b/pandas-stubs/_libs/lib.pyi index bb392d24e..920717e51 100644 --- a/pandas-stubs/_libs/lib.pyi +++ b/pandas-stubs/_libs/lib.pyi @@ -1,9 +1,27 @@ +from enum import Enum +from typing import ( + Final, + Literal, +) + +from pandas import Interval +from typing_extensions import ( + TypeAlias, + TypeGuard, +) + +class _NoDefault(Enum): + no_default = ... + +no_default: Final = _NoDefault.no_default +NoDefault: TypeAlias = Literal[_NoDefault.no_default] + def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... -def is_interval(val: object) -> bool: ... -def is_complex(val: object) -> bool: ... -def is_bool(val: object) -> bool: ... -def is_integer(val: object) -> bool: ... -def is_float(val: object) -> bool: ... +def is_interval(val: object) -> TypeGuard[Interval]: ... +def is_complex(val: object) -> TypeGuard[complex]: ... +def is_bool(val: object) -> TypeGuard[bool]: ... +def is_integer(val: object) -> TypeGuard[int]: ... +def is_float(val: object) -> TypeGuard[float]: ... diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index d4f7960da..d7c6ba4f2 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -113,7 +113,8 @@ class WriteExcelBuffer(WriteBuffer[bytes], Protocol): FilePath: TypeAlias = str | PathLike[str] -Axis: TypeAlias = str | int +AxisInt: TypeAlias = Literal[0, 1] +Axis: TypeAlias = AxisInt | Literal["index", "columns", "rows"] IndexLabel: TypeAlias = Hashable | Sequence[Hashable] Label: TypeAlias = Hashable | None Level: TypeAlias = Hashable | int @@ -240,6 +241,8 @@ IntervalT = TypeVar( ) IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"] +TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer] + IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"] # Shared by functions such as drop and astype diff --git a/pandas-stubs/api/extensions/__init__.pyi b/pandas-stubs/api/extensions/__init__.pyi index dedb5c98a..c0971d518 100644 --- a/pandas-stubs/api/extensions/__init__.pyi +++ b/pandas-stubs/api/extensions/__init__.pyi @@ -3,11 +3,14 @@ from pandas.core.accessor import ( register_index_accessor as register_index_accessor, register_series_accessor as register_series_accessor, ) +from pandas.core.algorithms import take as take from pandas.core.arrays import ( ExtensionArray as ExtensionArray, ExtensionScalarOpsMixin as ExtensionScalarOpsMixin, ) +from pandas._libs.lib import no_default as no_default + from pandas.core.dtypes.dtypes import ( ExtensionDtype as ExtensionDtype, register_extension_dtype as register_extension_dtype, diff --git a/pandas-stubs/core/algorithms.pyi b/pandas-stubs/core/algorithms.pyi index c81b8438a..ec17cc925 100644 --- a/pandas-stubs/core/algorithms.pyi +++ b/pandas-stubs/core/algorithms.pyi @@ -14,7 +14,9 @@ from pandas.api.extensions import ExtensionArray from pandas._typing import ( AnyArrayLike, + AxisInt, IntervalT, + TakeIndexer, ) # These are type: ignored because the Index types overlap due to inheritance but indices @@ -69,3 +71,10 @@ def value_counts( bins: int | None = ..., dropna: bool = ..., ) -> Series: ... +def take( + arr, + indices: TakeIndexer, + axis: AxisInt = 0, + allow_fill: bool = False, + fill_value=None, +): ... diff --git a/pandas-stubs/core/arraylike.pyi b/pandas-stubs/core/arraylike.pyi index df456a48c..b2dafe45a 100644 --- a/pandas-stubs/core/arraylike.pyi +++ b/pandas-stubs/core/arraylike.pyi @@ -1,7 +1,12 @@ from typing import Any +import numpy as np from typing_extensions import Self +from pandas._libs.ops_dispatch import ( + maybe_dispatch_ufunc_to_dunder_op as maybe_dispatch_ufunc_to_dunder_op, +) + class OpsMixin: def __eq__(self, other: object) -> Self: ... # type: ignore[override] def __ne__(self, other: object) -> Self: ... # type: ignore[override] @@ -35,3 +40,6 @@ class OpsMixin: def __rdivmod__(self, other: Any) -> tuple[Self, Self]: ... def __pow__(self, other: Any) -> Self: ... def __rpow__(self, other: Any) -> Self: ... + +def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): ... +def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): ... diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index e4ce87ca6..21193ffa8 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -1,21 +1,23 @@ -from collections.abc import Sequence +from typing import Any import numpy as np +from typing_extensions import Self from pandas._typing import ( ArrayLike, Scalar, + TakeIndexer, npt, ) from pandas.core.dtypes.dtypes import ExtensionDtype as ExtensionDtype -from pandas.core.dtypes.generic import ABCExtensionArray class ExtensionArray: - def __getitem__(self, item) -> None: ... + def __getitem__(self, item) -> Any: ... def __setitem__(self, key: int | slice | np.ndarray, value) -> None: ... def __len__(self) -> int: ... def __iter__(self): ... + def __contains__(self, item: object) -> bool | np.bool_: ... def to_numpy( self, dtype: npt.DTypeLike | None = ..., @@ -37,22 +39,31 @@ class ExtensionArray: ) -> np.ndarray: ... def fillna(self, value=..., method=..., limit=...): ... def dropna(self): ... - def shift( - self, periods: int = ..., fill_value: object = ... - ) -> ABCExtensionArray: ... + def shift(self: Self, periods: int = ..., fill_value: object = ...) -> Self: ... def unique(self): ... def searchsorted(self, value, side: str = ..., sorter=...): ... # TODO: remove keyword-only when pandas removed na_sentinel def factorize( - self, *, use_na_sentinel: bool = ... - ) -> tuple[np.ndarray, ABCExtensionArray]: ... + self: Self, *, use_na_sentinel: bool = ... + ) -> tuple[np.ndarray, Self]: ... def repeat(self, repeats, axis=...): ... def take( - self, indices: Sequence[int], *, allow_fill: bool = ..., fill_value=... - ) -> ABCExtensionArray: ... - def copy(self) -> ABCExtensionArray: ... - def view(self, dtype=...) -> ABCExtensionArray | np.ndarray: ... - def ravel(self, order=...) -> ABCExtensionArray: ... + self: Self, + indexer: TakeIndexer, + *, + allow_fill: bool = ..., + fill_value=..., + ) -> Self: ... + def copy(self: Self) -> Self: ... + def view(self: Self, dtype=...) -> Self | np.ndarray: ... + def ravel(self: Self, order=...) -> Self: ... + +class ExtensionOpsMixin: + @classmethod + def _add_arithmetic_ops(cls) -> None: ... + @classmethod + def _add_comparison_ops(cls) -> None: ... + @classmethod + def _add_logical_ops(cls) -> None: ... -class ExtensionOpsMixin: ... class ExtensionScalarOpsMixin(ExtensionOpsMixin): ... diff --git a/pandas-stubs/core/arrays/categorical.pyi b/pandas-stubs/core/arrays/categorical.pyi index 9fc791521..472d92dce 100644 --- a/pandas-stubs/core/arrays/categorical.pyi +++ b/pandas-stubs/core/arrays/categorical.pyi @@ -24,6 +24,7 @@ from pandas._typing import ( ListLike, Ordered, Scalar, + TakeIndexer, np_ndarray_bool, np_ndarray_int, ) @@ -165,7 +166,9 @@ class Categorical(ExtensionArray, PandasObject): def view(self, dtype=...): ... def to_dense(self): ... def fillna(self, value=..., method=..., limit=...): ... - def take(self, indexer, *, allow_fill: bool = ..., fill_value=...): ... + def take( + self, indexer: TakeIndexer, *, allow_fill: bool = ..., fill_value=... + ) -> Categorical: ... def take_nd(self, indexer, allow_fill: bool = ..., fill_value=...): ... def __len__(self) -> int: ... def __iter__(self): ... diff --git a/pandas-stubs/core/arrays/datetimelike.pyi b/pandas-stubs/core/arrays/datetimelike.pyi index 6619c4768..889193aae 100644 --- a/pandas-stubs/core/arrays/datetimelike.pyi +++ b/pandas-stubs/core/arrays/datetimelike.pyi @@ -5,11 +5,13 @@ from pandas.core.arrays.base import ( ExtensionArray, ExtensionOpsMixin, ) +from typing_extensions import Self from pandas._libs import ( NaT as NaT, NaTType as NaTType, ) +from pandas._typing import TakeIndexer class DatelikeOps: def strftime(self, date_format): ... @@ -40,7 +42,9 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, ExtensionArray): def astype(self, dtype, copy: bool = ...): ... def view(self, dtype=...): ... def unique(self): ... - def take(self, indices, *, allow_fill: bool = ..., fill_value=...): ... + def take( + self: Self, indices: TakeIndexer, *, allow_fill: bool = ..., fill_value=... + ) -> Self: ... def copy(self): ... def shift(self, periods: int = ..., fill_value=..., axis: int = ...): ... def searchsorted(self, value, side: str = ..., sorter=...): ... diff --git a/pandas-stubs/core/arrays/interval.pyi b/pandas-stubs/core/arrays/interval.pyi index 1b63ed724..55052733d 100644 --- a/pandas-stubs/core/arrays/interval.pyi +++ b/pandas-stubs/core/arrays/interval.pyi @@ -1,14 +1,16 @@ import numpy as np from pandas import Index from pandas.core.arrays.base import ExtensionArray as ExtensionArray +from typing_extensions import Self from pandas._libs.interval import ( Interval as Interval, IntervalMixin as IntervalMixin, ) -from pandas._typing import Axis - -from pandas.core.dtypes.generic import ABCExtensionArray +from pandas._typing import ( + Axis, + TakeIndexer, +) class IntervalArray(IntervalMixin, ExtensionArray): ndim: int = ... @@ -40,12 +42,16 @@ class IntervalArray(IntervalMixin, ExtensionArray): def nbytes(self) -> int: ... @property def size(self) -> int: ... - def shift( - self, periods: int = ..., fill_value: object = ... - ) -> ABCExtensionArray: ... - def take( - self, indices, *, allow_fill: bool = ..., fill_value=..., axis=..., **kwargs - ): ... + def shift(self, periods: int = ..., fill_value: object = ...) -> IntervalArray: ... + def take( # type: ignore[override] + self: Self, + indices: TakeIndexer, + *, + allow_fill: bool = ..., + fill_value=..., + axis=..., + **kwargs, + ) -> Self: ... def value_counts(self, dropna: bool = ...): ... @property def left(self) -> Index: ... diff --git a/pandas-stubs/core/arrays/masked.pyi b/pandas-stubs/core/arrays/masked.pyi index dccf64ddc..6276fff60 100644 --- a/pandas-stubs/core/arrays/masked.pyi +++ b/pandas-stubs/core/arrays/masked.pyi @@ -6,6 +6,7 @@ from pandas.core.arrays import ( from pandas._typing import ( Scalar, + TakeIndexer, npt, ) @@ -26,6 +27,8 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): def isna(self): ... @property def nbytes(self) -> int: ... - def take(self, indexer, *, allow_fill: bool = ..., fill_value=...): ... + def take( + self, indexer: TakeIndexer, allow_fill: bool = ..., fill_value=... + ) -> BaseMaskedArray: ... def copy(self): ... def value_counts(self, dropna: bool = ...): ... diff --git a/pandas-stubs/core/arrays/sparse/array.pyi b/pandas-stubs/core/arrays/sparse/array.pyi index 0e57b63b2..b11cfaa4e 100644 --- a/pandas-stubs/core/arrays/sparse/array.pyi +++ b/pandas-stubs/core/arrays/sparse/array.pyi @@ -5,6 +5,8 @@ from pandas.core.arrays import ( ) from pandas.core.base import PandasObject +from pandas._typing import TakeIndexer + class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): def __init__( self, @@ -42,12 +44,14 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): def fillna(self, value=..., method=..., limit=...): ... def shift(self, periods: int = ..., fill_value=...): ... def unique(self): ... - def factorize( # type: ignore[override] + def factorize( self, na_sentinel: int = ..., use_na_sentinel: bool = ... ) -> tuple[np.ndarray, SparseArray]: ... def value_counts(self, dropna: bool = ...): ... def __getitem__(self, key): ... - def take(self, indices, *, allow_fill: bool = ..., fill_value=...): ... + def take( + self, indices: TakeIndexer, *, allow_fill: bool = ..., fill_value=... + ) -> SparseArray: ... def searchsorted(self, v, side: str = ..., sorter=...): ... def copy(self): ... def astype(self, dtype=..., copy: bool = ...): ... diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index 8d3ae4556..8b4e6be10 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -1,23 +1,22 @@ -from typing import Literal +from typing import ( + Any, + Literal, +) from pandas.core.arrays import ExtensionArray -from pandas._libs import NaTType -from pandas._libs.missing import NAType from pandas._typing import type_t class ExtensionDtype: - @property - def na_value(self) -> NAType | NaTType: ... - @property - def type(self) -> type_t: ... + type: type_t + na_value: Any + name: str + @property def kind( self, ) -> Literal["b", "i", "u", "f", "c", "m", "M", "O", "S", "U", "V"]: ... @property - def name(self) -> str: ... - @property def names(self) -> list[str] | None: ... def empty(self, size: int | tuple[int, ...]) -> type_t[ExtensionArray]: ... @classmethod diff --git a/pandas-stubs/core/dtypes/dtypes.pyi b/pandas-stubs/core/dtypes/dtypes.pyi index 1ee3bfc7d..48e42f496 100644 --- a/pandas-stubs/core/dtypes/dtypes.pyi +++ b/pandas-stubs/core/dtypes/dtypes.pyi @@ -2,6 +2,7 @@ import datetime as dt from typing import ( Any, Literal, + TypeVar, ) import numpy as np @@ -17,7 +18,9 @@ from pandas._typing import ( from .base import ExtensionDtype as ExtensionDtype -def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ... +_ExtensionDtypeT = TypeVar("_ExtensionDtypeT", bound=ExtensionDtype) + +def register_extension_dtype(cls: type[_ExtensionDtypeT]) -> type[_ExtensionDtypeT]: ... class BaseMaskedDtype(ExtensionDtype): ... class PandasExtensionDtype(ExtensionDtype): ... diff --git a/pyproject.toml b/pyproject.toml index a24a80a41..e911ee559 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ types-pytz = ">= 2022.1.1" mypy = "1.0" pyarrow = ">=10.0.1" pytest = ">=7.1.2" -pyright = ">=1.1.286" +pyright = ">=1.1.295" poethepoet = ">=0.16.5" loguru = ">=0.6.0" pandas = "1.5.3" diff --git a/tests/extension/__init__.py b/tests/extension/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extension/decimal/__init__.py b/tests/extension/decimal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extension/decimal/array.py b/tests/extension/decimal/array.py new file mode 100644 index 000000000..0fd00676e --- /dev/null +++ b/tests/extension/decimal/array.py @@ -0,0 +1,288 @@ +from __future__ import annotations + +import decimal +import numbers +import sys + +import numpy as np +import pandas as pd +from pandas.api.extensions import ( + no_default, + register_extension_dtype, +) +from pandas.api.types import ( + is_list_like, + is_scalar, +) +from pandas.core import arraylike +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) +from pandas.core.indexers import check_array_indexer + +from pandas._typing import ( + TakeIndexer, + type_t, +) + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_float, + pandas_dtype, +) + + +@register_extension_dtype +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = "decimal" + na_value = decimal.Decimal("NaN") + _metadata = ("context",) + + def __init__(self, context: decimal.Context | None = None) -> None: + self.context = context or decimal.getcontext() + + def __repr__(self) -> str: + return f"DecimalDtype(context={self.context})" + + @classmethod + def construct_array_type(cls) -> type_t[DecimalArray]: + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray + + @property + def _is_numeric(self) -> bool: + return True + + +class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray): + __array_priority__ = 1000 + + def __init__( + self, + values: list[decimal.Decimal | float] | np.ndarray, + dtype=None, + copy=False, + context=None, + ) -> None: + for i, val in enumerate(values): + if is_float(val): + if np.isnan(val): + values[i] = DecimalDtype.na_value + else: + values[i] = DecimalDtype.type(val) + elif not isinstance(val, decimal.Decimal): + raise TypeError("All values must be of type " + str(decimal.Decimal)) + values_np = np.asarray(values, dtype=object) + + self._data = values_np + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + self._dtype = DecimalDtype(context) + + @property + def dtype(self) -> DecimalDtype: + return self._dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def to_numpy( + self, + dtype=None, + copy: bool = False, + na_value: object = no_default, + decimals=None, + ) -> np.ndarray: + result = np.asarray(self, dtype=dtype) + if decimals is not None: + result = np.asarray([round(x, decimals) for x in result]) + return result + + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # + if not all( + isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + # e.g. test_array_ufunc_series_scalar_other + return result + + if "out" in kwargs: + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if ufunc.nout > 1: + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self._data[item] + else: + # array, slice. + item = pd.api.indexers.check_array_indexer(self, item) + return type(self)(self._data[item]) + + def take( + self, indexer: TakeIndexer, *, allow_fill: bool = False, fill_value=None + ) -> DecimalArray: + from pandas.api.extensions import take + + data = self._data + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result) + + def copy(self) -> DecimalArray: + return type(self)(self._data.copy(), dtype=self.dtype) + + def astype(self, dtype, copy=True): + if is_dtype_equal(dtype, self._dtype): + if not copy: + return self + dtype = pandas_dtype(dtype) + if isinstance(dtype, type(self.dtype)): + return type(self)(self._data, copy=copy, context=dtype.context) + + return super().astype(dtype, copy=copy) + + def __setitem__(self, key, value): + if is_list_like(value): + if is_scalar(key): + raise ValueError("setting an array element with a sequence.") + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + + key = check_array_indexer(self, key) + self._data[key] = value + + def __len__(self) -> int: + return len(self._data) + + def __contains__(self, item) -> bool | np.bool_: + if not isinstance(item, decimal.Decimal): + return False + elif item.is_nan(): + return self.isna().any() + else: + return super().__contains__(item) + + @property + def nbytes(self) -> int: + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self._data], dtype=bool) + + @property + def _na_value(self): + return decimal.Decimal("NaN") + + def _formatter(self, boxed=False): + if boxed: + return "Decimal: {}".format + return repr + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x._data for x in to_concat])) + + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if skipna: + # If we don't have any NAs, we can ignore skipna + if self.isna().any(): + other = self[~self.isna()] + return other._reduce(name, **kwargs) + + if name == "sum" and len(self) == 0: + # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy + return decimal.Decimal(0) + + try: + op = getattr(self.data, name) + except AttributeError as err: + raise NotImplementedError( + f"decimal does not support the {name} operation" + ) from err + return op(axis=0) + + def _cmp_method(self, other, op): + # For use with OpsMixin + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: + # Assume it's an object + ovalues = [param] * len(self) + return ovalues + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + return np.asarray(res, dtype=bool) + + def value_counts(self, dropna: bool = True): + from pandas.core.algorithms import value_counts + + return value_counts(self.to_numpy(), dropna=dropna) + + +DecimalArray._add_arithmetic_ops() diff --git a/tests/test_extension.py b/tests/test_extension.py new file mode 100644 index 000000000..8714adfbe --- /dev/null +++ b/tests/test_extension.py @@ -0,0 +1,16 @@ +import decimal + +from typing_extensions import assert_type + +from tests import check +from tests.extension.decimal.array import ( + DecimalArray, + DecimalDtype, +) + + +def test_constructor() -> None: + arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + + check(assert_type(arr, DecimalArray), DecimalArray, decimal.Decimal) + check(assert_type(arr.dtype, DecimalDtype), DecimalDtype) From 2edb805f3adbcebfd263b246b40fe53fc4b9f9f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 26 Feb 2023 11:12:10 -0500 Subject: [PATCH 2/4] update TypeGuard usage. Test astype for Decimal --- pandas-stubs/_libs/lib.pyi | 5 +++-- tests/test_series.py | 22 ++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas-stubs/_libs/lib.pyi b/pandas-stubs/_libs/lib.pyi index 920717e51..c53a4c1bb 100644 --- a/pandas-stubs/_libs/lib.pyi +++ b/pandas-stubs/_libs/lib.pyi @@ -4,6 +4,7 @@ from typing import ( Literal, ) +import numpy as np from pandas import Interval from typing_extensions import ( TypeAlias, @@ -22,6 +23,6 @@ def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... def is_interval(val: object) -> TypeGuard[Interval]: ... def is_complex(val: object) -> TypeGuard[complex]: ... -def is_bool(val: object) -> TypeGuard[bool]: ... -def is_integer(val: object) -> TypeGuard[int]: ... +def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ... +def is_integer(val: object) -> TypeGuard[int | np.integer]: ... def is_float(val: object) -> TypeGuard[float]: ... diff --git a/tests/test_series.py b/tests/test_series.py index e633083ce..ed43d8759 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1,8 +1,7 @@ from __future__ import annotations import datetime - -# from decimal import Decimal +from decimal import Decimal from pathlib import Path import re from typing import ( @@ -27,8 +26,6 @@ ExtensionDtype, ) from pandas.core.window import ExponentialMovingWindow - -# from pandas.tests.extension.decimal import DecimalDtype import pytest from typing_extensions import ( TypeAlias, @@ -49,6 +46,7 @@ check, pytest_warns_bounded, ) +from tests.extension.decimal.array import DecimalDtype if TYPE_CHECKING: from pandas.core.series import ( @@ -1527,11 +1525,11 @@ def test_updated_astype() -> None: Timestamp, ) - # orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) - # newtype: ExtensionDtype = DecimalDtype() - # decseries = orseries.astype(newtype) - # check( - # assert_type(decseries, pd.Series), - # pd.Series, - # Decimal, - # ) + orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) + newtype = DecimalDtype() + decseries = orseries.astype(newtype) + check( + assert_type(decseries, pd.Series), + pd.Series, + Decimal, + ) From fb1c15b791aeeef9620babb90ff4874c3062894b Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 26 Feb 2023 22:41:00 -0500 Subject: [PATCH 3/4] use ClassVar. Change is_float to accept numpy. Remove self: Self --- pandas-stubs/_libs/lib.pyi | 2 +- pandas-stubs/core/arrays/base.pyi | 14 ++++++-------- pandas-stubs/core/arrays/boolean.pyi | 6 +++--- pandas-stubs/core/dtypes/base.pyi | 7 ++++--- tests/extension/decimal/array.py | 3 ++- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas-stubs/_libs/lib.pyi b/pandas-stubs/_libs/lib.pyi index c53a4c1bb..06f31f0db 100644 --- a/pandas-stubs/_libs/lib.pyi +++ b/pandas-stubs/_libs/lib.pyi @@ -25,4 +25,4 @@ def is_interval(val: object) -> TypeGuard[Interval]: ... def is_complex(val: object) -> TypeGuard[complex]: ... def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ... def is_integer(val: object) -> TypeGuard[int | np.integer]: ... -def is_float(val: object) -> TypeGuard[float]: ... +def is_float(val: object) -> TypeGuard[float | np.floating]: ... diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index 21193ffa8..3694c0b92 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -39,24 +39,22 @@ class ExtensionArray: ) -> np.ndarray: ... def fillna(self, value=..., method=..., limit=...): ... def dropna(self): ... - def shift(self: Self, periods: int = ..., fill_value: object = ...) -> Self: ... + def shift(self, periods: int = ..., fill_value: object = ...) -> Self: ... def unique(self): ... def searchsorted(self, value, side: str = ..., sorter=...): ... # TODO: remove keyword-only when pandas removed na_sentinel - def factorize( - self: Self, *, use_na_sentinel: bool = ... - ) -> tuple[np.ndarray, Self]: ... + def factorize(self, *, use_na_sentinel: bool = ...) -> tuple[np.ndarray, Self]: ... def repeat(self, repeats, axis=...): ... def take( - self: Self, + self, indexer: TakeIndexer, *, allow_fill: bool = ..., fill_value=..., ) -> Self: ... - def copy(self: Self) -> Self: ... - def view(self: Self, dtype=...) -> Self | np.ndarray: ... - def ravel(self: Self, order=...) -> Self: ... + def copy(self) -> Self: ... + def view(self, dtype=...) -> Self | np.ndarray: ... + def ravel(self, order=...) -> Self: ... class ExtensionOpsMixin: @classmethod diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index b126a6b1e..0bde58e6a 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,3 +1,5 @@ +from typing import ClassVar + import numpy as np from pandas._libs.missing import NAType @@ -8,9 +10,7 @@ from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype from .masked import BaseMaskedArray as BaseMaskedArray class BooleanDtype(ExtensionDtype): - name: str = ... - @property - def na_value(self) -> NAType: ... + na_value: ClassVar[NAType] @classmethod def construct_array_type(cls) -> type_t[BooleanArray]: ... diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index 8b4e6be10..e63dcd4e2 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -1,5 +1,6 @@ from typing import ( Any, + ClassVar, Literal, ) @@ -8,9 +9,9 @@ from pandas.core.arrays import ExtensionArray from pandas._typing import type_t class ExtensionDtype: - type: type_t - na_value: Any - name: str + type: ClassVar[type_t] + na_value: ClassVar[Any] + name: ClassVar[str] @property def kind( diff --git a/tests/extension/decimal/array.py b/tests/extension/decimal/array.py index 0fd00676e..c970cffa1 100644 --- a/tests/extension/decimal/array.py +++ b/tests/extension/decimal/array.py @@ -79,7 +79,8 @@ def __init__( if np.isnan(val): values[i] = DecimalDtype.na_value else: - values[i] = DecimalDtype.type(val) + fval = float(val) # Handle numpy case + values[i] = DecimalDtype.type(fval) elif not isinstance(val, decimal.Decimal): raise TypeError("All values must be of type " + str(decimal.Decimal)) values_np = np.asarray(values, dtype=object) From fb9a7ae1e6e3e3338b727eae6ddc8a6052a60c7d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 27 Feb 2023 12:00:23 -0500 Subject: [PATCH 4/4] remove declarations of private funcs. make na_value a property --- pandas-stubs/core/arraylike.pyi | 4 ---- pandas-stubs/core/dtypes/base.pyi | 4 ++-- tests/extension/decimal/array.py | 11 +++++++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas-stubs/core/arraylike.pyi b/pandas-stubs/core/arraylike.pyi index b2dafe45a..c1148a1b4 100644 --- a/pandas-stubs/core/arraylike.pyi +++ b/pandas-stubs/core/arraylike.pyi @@ -1,6 +1,5 @@ from typing import Any -import numpy as np from typing_extensions import Self from pandas._libs.ops_dispatch import ( @@ -40,6 +39,3 @@ class OpsMixin: def __rdivmod__(self, other: Any) -> tuple[Self, Self]: ... def __pow__(self, other: Any) -> Self: ... def __rpow__(self, other: Any) -> Self: ... - -def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): ... -def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): ... diff --git a/pandas-stubs/core/dtypes/base.pyi b/pandas-stubs/core/dtypes/base.pyi index e63dcd4e2..4ce9ac70d 100644 --- a/pandas-stubs/core/dtypes/base.pyi +++ b/pandas-stubs/core/dtypes/base.pyi @@ -1,5 +1,4 @@ from typing import ( - Any, ClassVar, Literal, ) @@ -10,9 +9,10 @@ from pandas._typing import type_t class ExtensionDtype: type: ClassVar[type_t] - na_value: ClassVar[Any] name: ClassVar[str] + @property + def na_value(self) -> object: ... @property def kind( self, diff --git a/tests/extension/decimal/array.py b/tests/extension/decimal/array.py index c970cffa1..8fac43ac5 100644 --- a/tests/extension/decimal/array.py +++ b/tests/extension/decimal/array.py @@ -39,9 +39,12 @@ class DecimalDtype(ExtensionDtype): type = decimal.Decimal name = "decimal" - na_value = decimal.Decimal("NaN") _metadata = ("context",) + @property + def na_value(self) -> decimal.Decimal: + return decimal.Decimal("NaN") + def __init__(self, context: decimal.Context | None = None) -> None: self.context = context or decimal.getcontext() @@ -77,7 +80,7 @@ def __init__( for i, val in enumerate(values): if is_float(val): if np.isnan(val): - values[i] = DecimalDtype.na_value + values[i] = DecimalDtype().na_value else: fval = float(val) # Handle numpy case values[i] = DecimalDtype.type(fval) @@ -139,7 +142,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return result if "out" in kwargs: - return arraylike.dispatch_ufunc_with_out( + return arraylike.dispatch_ufunc_with_out( # type: ignore[attr-defined] # pyright: ignore[reportGeneralTypeIssues] self, ufunc, method, *inputs, **kwargs ) @@ -147,7 +150,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): result = getattr(ufunc, method)(*inputs, **kwargs) if method == "reduce": - result = arraylike.dispatch_reduction_ufunc( + result = arraylike.dispatch_reduction_ufunc( # type: ignore[attr-defined] # pyright: ignore[reportGeneralTypeIssues] self, ufunc, method, *inputs, **kwargs ) if result is not NotImplemented: