2
2
A verbatim copy (vendored) of the spec from https://github.com/data-apis/dataframe-api
3
3
"""
4
4
5
+ from __future__ import annotations
6
+
5
7
from abc import (
6
8
ABC ,
7
9
abstractmethod ,
8
10
)
9
11
import enum
10
12
from typing import (
11
13
Any ,
12
- Dict ,
13
14
Iterable ,
14
- Optional ,
15
15
Sequence ,
16
- Tuple ,
17
16
TypedDict ,
18
17
)
19
18
@@ -90,18 +89,18 @@ class ColumnNullType(enum.IntEnum):
90
89
class ColumnBuffers (TypedDict ):
91
90
# first element is a buffer containing the column data;
92
91
# second element is the data buffer's associated dtype
93
- data : Tuple [ " Buffer" , Any ]
92
+ data : tuple [ Buffer , Any ]
94
93
95
94
# first element is a buffer containing mask values indicating missing data;
96
95
# second element is the mask value buffer's associated dtype.
97
96
# None if the null representation is not a bit or byte mask
98
- validity : Optional [ Tuple [ " Buffer" , Any ]]
97
+ validity : tuple [ Buffer , Any ] | None
99
98
100
99
# first element is a buffer containing the offset values for
101
100
# variable-size binary data (e.g., variable-length strings);
102
101
# second element is the offsets buffer's associated dtype.
103
102
# None if the data buffer does not have an associated offsets buffer
104
- offsets : Optional [ Tuple [ " Buffer" , Any ]]
103
+ offsets : tuple [ Buffer , Any ] | None
105
104
106
105
107
106
class CategoricalDescription (TypedDict ):
@@ -111,7 +110,7 @@ class CategoricalDescription(TypedDict):
111
110
is_dictionary : bool
112
111
# Python-level only (e.g. ``{int: str}``).
113
112
# None if not a dictionary-style categorical.
114
- mapping : Optional [ dict ]
113
+ mapping : dict | None
115
114
116
115
117
116
class Buffer (ABC ):
@@ -161,7 +160,7 @@ def __dlpack__(self):
161
160
raise NotImplementedError ("__dlpack__" )
162
161
163
162
@abstractmethod
164
- def __dlpack_device__ (self ) -> Tuple [DlpackDeviceType , Optional [ int ] ]:
163
+ def __dlpack_device__ (self ) -> tuple [DlpackDeviceType , int | None ]:
165
164
"""
166
165
Device type and device ID for where the data in the buffer resides.
167
166
Uses device type codes matching DLPack.
@@ -239,7 +238,7 @@ def offset(self) -> int:
239
238
240
239
@property
241
240
@abstractmethod
242
- def dtype (self ) -> Tuple [DtypeKind , int , str , str ]:
241
+ def dtype (self ) -> tuple [DtypeKind , int , str , str ]:
243
242
"""
244
243
Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
245
244
@@ -293,7 +292,7 @@ def describe_categorical(self) -> CategoricalDescription:
293
292
294
293
@property
295
294
@abstractmethod
296
- def describe_null (self ) -> Tuple [ColumnNullType , Any ]:
295
+ def describe_null (self ) -> tuple [ColumnNullType , Any ]:
297
296
"""
298
297
Return the missing value (or "null") representation the column dtype
299
298
uses, as a tuple ``(kind, value)``.
@@ -306,7 +305,7 @@ def describe_null(self) -> Tuple[ColumnNullType, Any]:
306
305
307
306
@property
308
307
@abstractmethod
309
- def null_count (self ) -> Optional [ int ] :
308
+ def null_count (self ) -> int | None :
310
309
"""
311
310
Number of null elements, if known.
312
311
@@ -316,7 +315,7 @@ def null_count(self) -> Optional[int]:
316
315
317
316
@property
318
317
@abstractmethod
319
- def metadata (self ) -> Dict [str , Any ]:
318
+ def metadata (self ) -> dict [str , Any ]:
320
319
"""
321
320
The metadata for the column. See `DataFrame.metadata` for more details.
322
321
"""
@@ -330,7 +329,7 @@ def num_chunks(self) -> int:
330
329
pass
331
330
332
331
@abstractmethod
333
- def get_chunks (self , n_chunks : Optional [ int ] = None ) -> Iterable [" Column" ]:
332
+ def get_chunks (self , n_chunks : int | None = None ) -> Iterable [Column ]:
334
333
"""
335
334
Return an iterator yielding the chunks.
336
335
@@ -395,7 +394,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
395
394
396
395
@property
397
396
@abstractmethod
398
- def metadata (self ) -> Dict [str , Any ]:
397
+ def metadata (self ) -> dict [str , Any ]:
399
398
"""
400
399
The metadata for the data frame, as a dictionary with string keys. The
401
400
contents of `metadata` may be anything, they are meant for a library
@@ -415,7 +414,7 @@ def num_columns(self) -> int:
415
414
pass
416
415
417
416
@abstractmethod
418
- def num_rows (self ) -> Optional [ int ] :
417
+ def num_rows (self ) -> int | None :
419
418
# TODO: not happy with Optional, but need to flag it may be expensive
420
419
# why include it if it may be None - what do we expect consumers
421
420
# to do here?
@@ -460,21 +459,21 @@ def get_columns(self) -> Iterable[Column]:
460
459
pass
461
460
462
461
@abstractmethod
463
- def select_columns (self , indices : Sequence [int ]) -> " DataFrame" :
462
+ def select_columns (self , indices : Sequence [int ]) -> DataFrame :
464
463
"""
465
464
Create a new DataFrame by selecting a subset of columns by index.
466
465
"""
467
466
pass
468
467
469
468
@abstractmethod
470
- def select_columns_by_name (self , names : Sequence [str ]) -> " DataFrame" :
469
+ def select_columns_by_name (self , names : Sequence [str ]) -> DataFrame :
471
470
"""
472
471
Create a new DataFrame by selecting a subset of columns by name.
473
472
"""
474
473
pass
475
474
476
475
@abstractmethod
477
- def get_chunks (self , n_chunks : Optional [ int ] = None ) -> Iterable [" DataFrame" ]:
476
+ def get_chunks (self , n_chunks : int | None = None ) -> Iterable [DataFrame ]:
478
477
"""
479
478
Return an iterator yielding the chunks.
480
479
0 commit comments