Skip to content

Commit 3d1eee4

Browse files
bashtageKevin Sheppard
and
Kevin Sheppard
authored
ENH: Improve dtypes (#386)
* ENH: Improve dtypes * CLN: Remove unnecesssary parts of classes (eq, hash) * CLN/ENH: Restructure Dtypes and add typing info * ENH: Improvements to dtypes * TST: Fix test for python 3.8 * CLN: Fix issues identified Co-authored-by: Kevin Sheppard <[email protected]>
1 parent dc9a094 commit 3d1eee4

File tree

9 files changed

+215
-166
lines changed

9 files changed

+215
-166
lines changed
+4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import pyarrow as pa
22

3+
from pandas._libs.missing import NAType
4+
35
from pandas.core.dtypes.base import StorageExtensionDtype
46

57
class ArrowDtype(StorageExtensionDtype):
68
pyarrow_dtype: pa.DataType
79
def __init__(self, pyarrow_dtype: pa.DataType) -> None: ...
10+
@property
11+
def na_value(self) -> NAType: ...

pandas-stubs/core/arrays/boolean.pyi

+3-10
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import numpy as np
22

3-
from pandas._typing import (
4-
Scalar,
5-
type_t,
6-
)
3+
from pandas._libs.missing import NAType
4+
from pandas._typing import type_t
75

86
from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype
97

@@ -12,14 +10,9 @@ from .masked import BaseMaskedArray as BaseMaskedArray
1210
class BooleanDtype(ExtensionDtype):
1311
name: str = ...
1412
@property
15-
def na_value(self) -> Scalar: ...
16-
@property
17-
def type(self) -> type_t: ...
18-
@property
19-
def kind(self) -> str: ...
13+
def na_value(self) -> NAType: ...
2014
@classmethod
2115
def construct_array_type(cls) -> type_t[BooleanArray]: ...
22-
def __from_arrow__(self, array): ...
2316

2417
def coerce_to_array(values, mask=..., copy: bool = ...): ...
2518

pandas-stubs/core/arrays/integer.pyi

+8-16
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,17 @@
1+
from pandas._libs.missing import NAType
2+
13
from pandas.core.dtypes.base import ExtensionDtype as ExtensionDtype
24

35
from .masked import BaseMaskedArray
46

5-
_type = type
6-
77
class _IntegerDtype(ExtensionDtype):
8-
name: str
9-
base = ...
10-
type: _type
11-
na_value = ...
12-
def is_signed_integer(self): ...
13-
def is_unsigned_integer(self): ...
14-
def numpy_dtype(self): ...
15-
def kind(self): ...
16-
def itemsize(self): ...
8+
base: None
9+
@property
10+
def na_value(self) -> NAType: ...
11+
@property
12+
def itemsize(self) -> int: ...
1713
@classmethod
18-
def construct_array_type(cls): ...
19-
def __from_arrow__(self, array): ...
20-
21-
def safe_cast(values, dtype, copy): ...
22-
def coerce_to_array(values, dtype, mask=..., copy: bool = ...): ...
14+
def construct_array_type(cls) -> type[IntegerArray]: ...
2315

2416
class IntegerArray(BaseMaskedArray):
2517
def dtype(self): ...

pandas-stubs/core/arrays/numpy_.pyi

+3-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
from numpy.lib.mixins import NDArrayOperatorsMixin
23
from pandas.core.arrays.base import (
34
ExtensionArray,
@@ -7,21 +8,10 @@ from pandas.core.arrays.base import (
78
from pandas.core.dtypes.dtypes import ExtensionDtype
89

910
class PandasDtype(ExtensionDtype):
10-
def __init__(self, dtype) -> None: ...
1111
@property
12-
def numpy_dtype(self): ...
12+
def numpy_dtype(self) -> np.dtype: ...
1313
@property
14-
def name(self): ...
15-
@property
16-
def type(self): ...
17-
@classmethod
18-
def construct_from_string(cls, string): ...
19-
@classmethod
20-
def construct_array_type(cls): ...
21-
@property
22-
def kind(self): ...
23-
@property
24-
def itemsize(self): ...
14+
def itemsize(self) -> int: ...
2515

2616
class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
2717
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ...
+5-21
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,17 @@
11
from pandas._typing import (
22
Dtype,
33
Scalar,
4+
npt,
45
)
56

67
from pandas.core.dtypes.base import ExtensionDtype
78
from pandas.core.dtypes.dtypes import (
89
register_extension_dtype as register_extension_dtype,
910
)
1011

11-
# merged types from pylance
12-
1312
class SparseDtype(ExtensionDtype):
14-
def __init__(self, dtype: Dtype = ..., fill_value: Scalar | None = ...) -> None: ...
15-
def __hash__(self): ...
16-
def __eq__(self, other) -> bool: ...
17-
@property
18-
def fill_value(self): ...
19-
@property
20-
def kind(self): ...
21-
@property
22-
def type(self): ...
23-
@property
24-
def subtype(self): ...
13+
def __init__(
14+
self, dtype: Dtype | npt.DTypeLike = ..., fill_value: Scalar | None = ...
15+
) -> None: ...
2516
@property
26-
def name(self): ...
27-
@classmethod
28-
def construct_array_type(cls): ...
29-
@classmethod
30-
def construct_from_string(cls, string): ...
31-
@classmethod
32-
def is_dtype(cls, dtype): ...
33-
def update_dtype(self, dtype): ...
17+
def fill_value(self) -> Scalar | None: ...

pandas-stubs/core/arrays/string_.pyi

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
1+
from typing import Literal
2+
13
from pandas.core.arrays import PandasArray
24

3-
from pandas._typing import type_t
5+
from pandas._libs.missing import NAType
46

57
from pandas.core.dtypes.base import ExtensionDtype
68

79
class StringDtype(ExtensionDtype):
8-
name: str = ...
9-
na_value = ...
10+
def __init__(self, storage: Literal["python", "pyarrow"] | None) -> None: ...
1011
@property
11-
def type(self) -> type_t: ...
12-
@classmethod
13-
def construct_array_type(cls) -> type_t[StringArray]: ...
14-
def __from_arrow__(self, array): ...
12+
def na_value(self) -> NAType: ...
1513

1614
class StringArray(PandasArray):
1715
def __init__(self, values, copy: bool = ...) -> None: ...

pandas-stubs/core/dtypes/base.pyi

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,30 @@
1+
from typing import Literal
2+
13
from pandas.core.arrays import ExtensionArray
24

5+
from pandas._libs import NaTType
6+
from pandas._libs.missing import NAType
37
from pandas._typing import type_t
48

59
class ExtensionDtype:
6-
def __eq__(self, other) -> bool: ...
7-
def __hash__(self) -> int: ...
8-
def __ne__(self, other) -> bool: ...
910
@property
10-
def na_value(self): ...
11+
def na_value(self) -> NAType | NaTType: ...
1112
@property
1213
def type(self) -> type_t: ...
1314
@property
14-
def kind(self) -> str: ...
15+
def kind(
16+
self,
17+
) -> Literal["b", "i", "u", "f", "c", "m", "M", "O", "S", "U", "V"]: ...
1518
@property
1619
def name(self) -> str: ...
1720
@property
1821
def names(self) -> list[str] | None: ...
22+
def empty(self, size: int | tuple[int, ...]) -> type_t[ExtensionArray]: ...
1923
@classmethod
2024
def construct_array_type(cls) -> type_t[ExtensionArray]: ...
2125
@classmethod
22-
def construct_from_string(cls, string: str): ...
26+
def construct_from_string(cls, string: str) -> ExtensionDtype: ...
2327
@classmethod
24-
def is_dtype(cls, dtype) -> bool: ...
28+
def is_dtype(cls, dtype: object) -> bool: ...
2529

2630
class StorageExtensionDtype(ExtensionDtype): ...

pandas-stubs/core/dtypes/dtypes.pyi

+24-92
Original file line numberDiff line numberDiff line change
@@ -1,125 +1,57 @@
1+
import datetime as dt
12
from typing import (
23
Any,
3-
Sequence,
4+
Literal,
45
)
56

7+
import numpy as np
68
from pandas.core.indexes.base import Index
9+
from pandas.core.series import Series
710

8-
from pandas._libs.tslibs import ( # , timezones as timezones
9-
Period as Period,
10-
Timestamp,
11+
from pandas._libs import NaTType
12+
from pandas._libs.tslibs import BaseOffset
13+
from pandas._typing import (
14+
Ordered,
15+
npt,
1116
)
12-
from pandas._typing import Ordered
1317

1418
from .base import ExtensionDtype as ExtensionDtype
1519

16-
_str = str
17-
1820
def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: ...
1921

2022
class BaseMaskedDtype(ExtensionDtype): ...
21-
22-
class PandasExtensionDtype(ExtensionDtype):
23-
subdtype = ...
24-
str: _str | None = ...
25-
num: int = ...
26-
shape: tuple[int, ...] = ...
27-
itemsize: int = ...
28-
base = ...
29-
isbuiltin: int = ...
30-
isnative: int = ...
31-
def __hash__(self) -> int: ...
32-
@classmethod
33-
def reset_cache(cls) -> None: ...
34-
35-
class CategoricalDtypeType(type): ...
23+
class PandasExtensionDtype(ExtensionDtype): ...
3624

3725
class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
38-
name: _str = ...
39-
type: type[CategoricalDtypeType] = ...
40-
kind: _str = ...
41-
str: _str = ...
42-
base = ...
4326
def __init__(
44-
self, categories: Sequence[Any] | None = ..., ordered: Ordered = ...
27+
self,
28+
categories: Series | Index | list[Any] | None = ...,
29+
ordered: Ordered = ...,
4530
) -> None: ...
46-
@classmethod
47-
def construct_from_string(cls, string: _str) -> CategoricalDtype: ...
48-
def __hash__(self) -> int: ...
49-
def __eq__(self, other) -> bool: ...
50-
@classmethod
51-
def construct_array_type(cls): ...
52-
@staticmethod
53-
def validate_ordered(ordered: Ordered) -> None: ...
54-
@staticmethod
55-
def validate_categories(categories, fastpath: bool = ...): ...
56-
def update_dtype(self, dtype: _str | CategoricalDtype) -> CategoricalDtype: ...
5731
@property
5832
def categories(self) -> Index: ...
5933
@property
6034
def ordered(self) -> Ordered: ...
6135

6236
class DatetimeTZDtype(PandasExtensionDtype):
63-
type: type[Timestamp] = ...
64-
kind: _str = ...
65-
str: _str = ...
66-
num: int = ...
67-
base = ...
68-
na_value = ...
69-
def __init__(self, unit: _str = ..., tz=...) -> None: ...
37+
def __init__(
38+
self, unit: Literal["ns"] = ..., tz: str | int | dt.tzinfo | None = ...
39+
) -> None: ...
7040
@property
71-
def unit(self): ...
41+
def unit(self) -> Literal["ns"]: ...
7242
@property
73-
def tz(self): ...
74-
@classmethod
75-
def construct_array_type(cls): ...
76-
@classmethod
77-
def construct_from_string(cls, string: _str): ...
43+
def tz(self) -> dt.tzinfo: ...
7844
@property
79-
def name(self) -> _str: ...
80-
def __hash__(self) -> int: ...
81-
def __eq__(self, other) -> bool: ...
45+
def na_value(self) -> NaTType: ...
8246

8347
class PeriodDtype(PandasExtensionDtype):
84-
type: type[Period] = ...
85-
kind: _str = ...
86-
str: _str = ...
87-
base = ...
88-
num: int = ...
89-
def __new__(cls, freq=...): ...
90-
@property
91-
def freq(self): ...
92-
@classmethod
93-
def construct_from_string(cls, string: _str): ...
48+
def __init__(self, freq: str | BaseOffset = ...): ...
9449
@property
95-
def name(self) -> _str: ...
50+
def freq(self) -> BaseOffset: ...
9651
@property
97-
def na_value(self): ...
98-
def __hash__(self) -> int: ...
99-
def __eq__(self, other) -> bool: ...
100-
@classmethod
101-
def is_dtype(cls, dtype) -> bool: ...
102-
@classmethod
103-
def construct_array_type(cls): ...
104-
def __from_arrow__(self, array): ...
52+
def na_value(self) -> NaTType: ...
10553

10654
class IntervalDtype(PandasExtensionDtype):
107-
name: _str = ...
108-
kind: _str = ...
109-
str: _str = ...
110-
base = ...
111-
num: int = ...
112-
def __new__(cls, subtype=...): ...
113-
@property
114-
def subtype(self): ...
115-
@classmethod
116-
def construct_array_type(cls): ...
117-
@classmethod
118-
def construct_from_string(cls, string: _str): ...
55+
def __init__(self, subtype: str | npt.DTypeLike | None = ...): ...
11956
@property
120-
def type(self): ...
121-
def __hash__(self) -> int: ...
122-
def __eq__(self, other) -> bool: ...
123-
@classmethod
124-
def is_dtype(cls, dtype) -> bool: ...
125-
def __from_arrow__(self, array): ...
57+
def subtype(self) -> np.dtype | None: ...

0 commit comments

Comments
 (0)