Skip to content

Support an ExtensionDtype and ExtensionArray #554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions pandas-stubs/_libs/lib.pyi
Original file line number Diff line number Diff line change
@@ -1,9 +1,28 @@
from enum import Enum
from typing import (
Final,
Literal,
)

import numpy as np
from pandas import Interval
from typing_extensions import (
TypeAlias,
TypeGuard,
)

class _NoDefault(Enum):
no_default = ...

no_default: Final = _NoDefault.no_default
NoDefault: TypeAlias = Literal[_NoDefault.no_default]

def infer_dtype(value: object, skipna: bool = ...) -> str: ...
def is_iterator(obj: object) -> bool: ...
def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
def is_interval(val: object) -> bool: ...
def is_complex(val: object) -> bool: ...
def is_bool(val: object) -> bool: ...
def is_integer(val: object) -> bool: ...
def is_float(val: object) -> bool: ...
def is_interval(val: object) -> TypeGuard[Interval]: ...
def is_complex(val: object) -> TypeGuard[complex]: ...
def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
def is_float(val: object) -> TypeGuard[float | np.floating]: ...
5 changes: 4 additions & 1 deletion pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ class WriteExcelBuffer(WriteBuffer[bytes], Protocol):

FilePath: TypeAlias = str | PathLike[str]

Axis: TypeAlias = str | int
AxisInt: TypeAlias = Literal[0, 1]
Axis: TypeAlias = AxisInt | Literal["index", "columns", "rows"]
Comment on lines -169 to +170
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@twoertwein @Dr-Irv While I understand the intention, this one causes a problem with the following types from Pandas:

pandas.core.frame.DataFrame.reorder_levels(self, order: Sequence[Axis], axis: Axis = 0)

The order parameter expects a sequence of ints or strings - with this change this is forbidden.

From my code:

reportquery.py:612: error: Argument 1 to
"reorder_levels" of "DataFrame" has incompatible type "Sequence[str]"; expected
"Sequence[Union[Literal[0, 1], Literal['index', 'columns', 'rows']]]"
[arg-type]
            df = df.reorder_levels(column_names, axis=1)

PS: Sorry for not creating an issue (yet). I am short on time.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PS: Sorry for not creating an issue (yet). I am short on time.

When you do have the time, please create an issue with a full code sample.

This has picked up that reorder_levels() has an incorrect type for order. It should be Sequence[Hashable]

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we go: #560

IndexLabel: TypeAlias = Hashable | Sequence[Hashable]
Label: TypeAlias = Hashable | None
Level: TypeAlias = Hashable | int
Expand Down Expand Up @@ -293,6 +294,8 @@ IntervalT = TypeVar(
)
IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"]

TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer]

IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"]

# Shared by functions such as drop and astype
Expand Down
3 changes: 3 additions & 0 deletions pandas-stubs/api/extensions/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ from pandas.core.accessor import (
register_index_accessor as register_index_accessor,
register_series_accessor as register_series_accessor,
)
from pandas.core.algorithms import take as take
from pandas.core.arrays import (
ExtensionArray as ExtensionArray,
ExtensionScalarOpsMixin as ExtensionScalarOpsMixin,
)

from pandas._libs.lib import no_default as no_default

from pandas.core.dtypes.dtypes import (
ExtensionDtype as ExtensionDtype,
register_extension_dtype as register_extension_dtype,
Expand Down
9 changes: 9 additions & 0 deletions pandas-stubs/core/algorithms.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ from pandas.api.extensions import ExtensionArray

from pandas._typing import (
AnyArrayLike,
AxisInt,
IntervalT,
TakeIndexer,
)

# These are type: ignored because the Index types overlap due to inheritance but indices
Expand Down Expand Up @@ -69,3 +71,10 @@ def value_counts(
bins: int | None = ...,
dropna: bool = ...,
) -> Series: ...
def take(
arr,
indices: TakeIndexer,
axis: AxisInt = 0,
allow_fill: bool = False,
fill_value=None,
): ...
4 changes: 4 additions & 0 deletions pandas-stubs/core/arraylike.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ from typing import Any

from typing_extensions import Self

from pandas._libs.ops_dispatch import (
maybe_dispatch_ufunc_to_dunder_op as maybe_dispatch_ufunc_to_dunder_op,
)

class OpsMixin:
def __eq__(self, other: object) -> Self: ... # type: ignore[override]
def __ne__(self, other: object) -> Self: ... # type: ignore[override]
Expand Down
39 changes: 24 additions & 15 deletions pandas-stubs/core/arrays/base.pyi
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
from collections.abc import Sequence
from typing import Any

import numpy as np
from typing_extensions import Self

from pandas._typing import (
ArrayLike,
Scalar,
TakeIndexer,
npt,
)

from pandas.core.dtypes.dtypes import ExtensionDtype as ExtensionDtype
from pandas.core.dtypes.generic import ABCExtensionArray

class ExtensionArray:
def __getitem__(self, item) -> None: ...
def __getitem__(self, item) -> Any: ...
def __setitem__(self, key: int | slice | np.ndarray, value) -> None: ...
def __len__(self) -> int: ...
def __iter__(self): ...
def __contains__(self, item: object) -> bool | np.bool_: ...
def to_numpy(
self,
dtype: npt.DTypeLike | None = ...,
Expand All @@ -37,22 +39,29 @@ class ExtensionArray:
) -> np.ndarray: ...
def fillna(self, value=..., method=..., limit=...): ...
def dropna(self): ...
def shift(
self, periods: int = ..., fill_value: object = ...
) -> ABCExtensionArray: ...
def shift(self, periods: int = ..., fill_value: object = ...) -> Self: ...
def unique(self): ...
def searchsorted(self, value, side: str = ..., sorter=...): ...
# TODO: remove keyword-only when pandas removed na_sentinel
def factorize(
self, *, use_na_sentinel: bool = ...
) -> tuple[np.ndarray, ABCExtensionArray]: ...
def factorize(self, *, use_na_sentinel: bool = ...) -> tuple[np.ndarray, Self]: ...
def repeat(self, repeats, axis=...): ...
def take(
self, indices: Sequence[int], *, allow_fill: bool = ..., fill_value=...
) -> ABCExtensionArray: ...
def copy(self) -> ABCExtensionArray: ...
def view(self, dtype=...) -> ABCExtensionArray | np.ndarray: ...
def ravel(self, order=...) -> ABCExtensionArray: ...
self,
indexer: TakeIndexer,
*,
allow_fill: bool = ...,
fill_value=...,
) -> Self: ...
def copy(self) -> Self: ...
def view(self, dtype=...) -> Self | np.ndarray: ...
def ravel(self, order=...) -> Self: ...

class ExtensionOpsMixin:
@classmethod
def _add_arithmetic_ops(cls) -> None: ...
@classmethod
def _add_comparison_ops(cls) -> None: ...
@classmethod
def _add_logical_ops(cls) -> None: ...

class ExtensionOpsMixin: ...
class ExtensionScalarOpsMixin(ExtensionOpsMixin): ...
6 changes: 3 additions & 3 deletions pandas-stubs/core/arrays/boolean.pyi
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import ClassVar

import numpy as np

from pandas._libs.missing import NAType
Expand All @@ -8,9 +10,7 @@ from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype
from .masked import BaseMaskedArray as BaseMaskedArray

class BooleanDtype(ExtensionDtype):
name: str = ...
@property
def na_value(self) -> NAType: ...
na_value: ClassVar[NAType]
@classmethod
def construct_array_type(cls) -> type_t[BooleanArray]: ...

Expand Down
5 changes: 4 additions & 1 deletion pandas-stubs/core/arrays/categorical.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ from pandas._typing import (
ListLike,
Ordered,
Scalar,
TakeIndexer,
np_ndarray_bool,
np_ndarray_int,
)
Expand Down Expand Up @@ -165,7 +166,9 @@ class Categorical(ExtensionArray, PandasObject):
def view(self, dtype=...): ...
def to_dense(self): ...
def fillna(self, value=..., method=..., limit=...): ...
def take(self, indexer, *, allow_fill: bool = ..., fill_value=...): ...
def take(
self, indexer: TakeIndexer, *, allow_fill: bool = ..., fill_value=...
) -> Categorical: ...
def take_nd(self, indexer, allow_fill: bool = ..., fill_value=...): ...
def __len__(self) -> int: ...
def __iter__(self): ...
Expand Down
6 changes: 5 additions & 1 deletion pandas-stubs/core/arrays/datetimelike.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ from pandas.core.arrays.base import (
ExtensionArray,
ExtensionOpsMixin,
)
from typing_extensions import Self

from pandas._libs import (
NaT as NaT,
NaTType as NaTType,
)
from pandas._typing import TakeIndexer

class DatelikeOps:
def strftime(self, date_format): ...
Expand Down Expand Up @@ -40,7 +42,9 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, ExtensionArray):
def astype(self, dtype, copy: bool = ...): ...
def view(self, dtype=...): ...
def unique(self): ...
def take(self, indices, *, allow_fill: bool = ..., fill_value=...): ...
def take(
self: Self, indices: TakeIndexer, *, allow_fill: bool = ..., fill_value=...
) -> Self: ...
def copy(self): ...
def shift(self, periods: int = ..., fill_value=..., axis: int = ...): ...
def searchsorted(self, value, side: str = ..., sorter=...): ...
Expand Down
24 changes: 15 additions & 9 deletions pandas-stubs/core/arrays/interval.pyi
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import numpy as np
from pandas import Index
from pandas.core.arrays.base import ExtensionArray as ExtensionArray
from typing_extensions import Self

from pandas._libs.interval import (
Interval as Interval,
IntervalMixin as IntervalMixin,
)
from pandas._typing import Axis

from pandas.core.dtypes.generic import ABCExtensionArray
from pandas._typing import (
Axis,
TakeIndexer,
)

class IntervalArray(IntervalMixin, ExtensionArray):
ndim: int = ...
Expand Down Expand Up @@ -40,12 +42,16 @@ class IntervalArray(IntervalMixin, ExtensionArray):
def nbytes(self) -> int: ...
@property
def size(self) -> int: ...
def shift(
self, periods: int = ..., fill_value: object = ...
) -> ABCExtensionArray: ...
def take(
self, indices, *, allow_fill: bool = ..., fill_value=..., axis=..., **kwargs
): ...
def shift(self, periods: int = ..., fill_value: object = ...) -> IntervalArray: ...
def take( # type: ignore[override]
self: Self,
indices: TakeIndexer,
*,
allow_fill: bool = ...,
fill_value=...,
axis=...,
**kwargs,
) -> Self: ...
def value_counts(self, dropna: bool = ...): ...
@property
def left(self) -> Index: ...
Expand Down
5 changes: 4 additions & 1 deletion pandas-stubs/core/arrays/masked.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from pandas.core.arrays import (

from pandas._typing import (
Scalar,
TakeIndexer,
npt,
)

Expand All @@ -26,6 +27,8 @@ class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin):
def isna(self): ...
@property
def nbytes(self) -> int: ...
def take(self, indexer, *, allow_fill: bool = ..., fill_value=...): ...
def take(
self, indexer: TakeIndexer, allow_fill: bool = ..., fill_value=...
) -> BaseMaskedArray: ...
def copy(self): ...
def value_counts(self, dropna: bool = ...): ...
8 changes: 6 additions & 2 deletions pandas-stubs/core/arrays/sparse/array.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from pandas.core.arrays import (
)
from pandas.core.base import PandasObject

from pandas._typing import TakeIndexer

class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin):
def __init__(
self,
Expand Down Expand Up @@ -42,12 +44,14 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin):
def fillna(self, value=..., method=..., limit=...): ...
def shift(self, periods: int = ..., fill_value=...): ...
def unique(self): ...
def factorize( # type: ignore[override]
def factorize(
self, na_sentinel: int = ..., use_na_sentinel: bool = ...
) -> tuple[np.ndarray, SparseArray]: ...
def value_counts(self, dropna: bool = ...): ...
def __getitem__(self, key): ...
def take(self, indices, *, allow_fill: bool = ..., fill_value=...): ...
def take(
self, indices: TakeIndexer, *, allow_fill: bool = ..., fill_value=...
) -> SparseArray: ...
def searchsorted(self, v, side: str = ..., sorter=...): ...
def copy(self): ...
def astype(self, dtype=..., copy: bool = ...): ...
Expand Down
16 changes: 8 additions & 8 deletions pandas-stubs/core/dtypes/base.pyi
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
from typing import Literal
from typing import (
ClassVar,
Literal,
)

from pandas.core.arrays import ExtensionArray

from pandas._libs import NaTType
from pandas._libs.missing import NAType
from pandas._typing import type_t

class ExtensionDtype:
type: ClassVar[type_t]
name: ClassVar[str]

@property
def na_value(self) -> NAType | NaTType: ...
@property
def type(self) -> type_t: ...
def na_value(self) -> object: ...
@property
def kind(
self,
) -> Literal["b", "i", "u", "f", "c", "m", "M", "O", "S", "U", "V"]: ...
@property
def name(self) -> str: ...
@property
def names(self) -> list[str] | None: ...
def empty(self, size: int | tuple[int, ...]) -> type_t[ExtensionArray]: ...
@classmethod
Expand Down
5 changes: 4 additions & 1 deletion pandas-stubs/core/dtypes/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import datetime as dt
from typing import (
Any,
Literal,
TypeVar,
)

import numpy as np
Expand All @@ -17,7 +18,9 @@ from pandas._typing import (

from .base import ExtensionDtype as ExtensionDtype

def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ...
_ExtensionDtypeT = TypeVar("_ExtensionDtypeT", bound=ExtensionDtype)

def register_extension_dtype(cls: type[_ExtensionDtypeT]) -> type[_ExtensionDtypeT]: ...

class BaseMaskedDtype(ExtensionDtype): ...
class PandasExtensionDtype(ExtensionDtype): ...
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ types-pytz = ">= 2022.1.1"
mypy = "1.0"
pyarrow = ">=10.0.1"
pytest = ">=7.1.2"
pyright = ">=1.1.286"
pyright = ">=1.1.295"
poethepoet = ">=0.16.5"
loguru = ">=0.6.0"
pandas = "1.5.3"
Expand Down
Empty file added tests/extension/__init__.py
Empty file.
Empty file.
Loading