From 3a52ce8e3ab72ce8bd50d4aa13ddfcfe5a55ae5d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 27 Sep 2023 13:29:10 +0100 Subject: [PATCH 1/8] remove unused typevars, make dtype type alias --- .../dataframe_api/__init__.py | 55 ++--------- .../API_specification/dataframe_api/_types.py | 43 ++++---- .../dataframe_api/column_object.py | 97 +++++++++---------- .../dataframe_api/dataframe_object.py | 31 +++--- .../API_specification/dataframe_api/dtypes.py | 33 +++++++ 5 files changed, 123 insertions(+), 136 deletions(-) create mode 100644 spec/API_specification/dataframe_api/dtypes.py diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 1e7d57b4..0c34c2f3 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -3,12 +3,15 @@ """ from __future__ import annotations -from typing import Mapping, Sequence, Any +from typing import Mapping, Sequence, Any, TYPE_CHECKING from .column_object import * from .dataframe_object import DataFrame from .groupby_object import * -from ._types import DType +from .dtypes import * + +if TYPE_CHECKING: + from ._types import DType __all__ = [ "__dataframe_api_version__", @@ -63,7 +66,7 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame: """ ... -def column_from_sequence(sequence: Sequence[Any], *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]: +def column_from_sequence(sequence: Sequence[Any], *, dtype: DType, name: str = '', api_version: str | None = None) -> Column: """ Construct Column from sequence of elements. @@ -91,7 +94,7 @@ def column_from_sequence(sequence: Sequence[Any], *, dtype: Any, name: str = '', """ ... -def dataframe_from_dict(data: Mapping[str, Column[Any]], *, api_version: str | None = None) -> DataFrame: +def dataframe_from_dict(data: Mapping[str, Column], *, api_version: str | None = None) -> DataFrame: """ Construct DataFrame from map of column names to Columns. @@ -123,7 +126,7 @@ def dataframe_from_dict(data: Mapping[str, Column[Any]], *, api_version: str | N ... -def column_from_1d_array(array: Any, *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]: +def column_from_1d_array(array: Any, *, dtype: DType, name: str = '', api_version: str | None = None) -> Column: """ Construct Column from 1D array. @@ -232,51 +235,13 @@ def is_null(value: object, /) -> bool: """ -########## -# Dtypes # -########## - -class Int64: - """Integer type with 64 bits of precision.""" - -class Int32: - """Integer type with 32 bits of precision.""" - -class Int16: - """Integer type with 16 bits of precision.""" - -class Int8: - """Integer type with 8 bits of precision.""" - -class UInt64: - """Unsigned integer type with 64 bits of precision.""" - -class UInt32: - """Unsigned integer type with 32 bits of precision.""" - -class UInt16: - """Unsigned integer type with 16 bits of precision.""" - -class UInt8: - """Unsigned integer type with 8 bits of precision.""" - -class Float64: - """Floating point type with 64 bits of precision.""" - -class Float32: - """Floating point type with 32 bits of precision.""" - -class Bool: - """Boolean type with 8 bits of precision.""" - - -def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool: +def is_dtype(dtype: DType, kind: str | tuple[str, ...]) -> bool: """ Returns a boolean indicating whether a provided dtype is of a specified data type “kind”. Parameters ---------- - dtype: Any + dtype: DType The input dtype. kind: str data type kind. diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 2b6d7d08..2b43e115 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -1,8 +1,5 @@ """ Types for type annotations used in the dataframe API standard. - -The type variables should be replaced with the actual types for a given -library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame. """ from __future__ import annotations @@ -14,12 +11,28 @@ Optional, Sequence, Tuple, - TypeVar, Union, - Protocol, + TYPE_CHECKING, ) from enum import Enum +if TYPE_CHECKING: + from .dtypes import ( + Bool, + Float64, + Float32, + Int64, + Int32, + Int16, + Int8, + UInt64, + UInt32, + UInt16, + UInt8, + ) + + DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] + # Type alias: Mypy needs Any, but for readability we need to make clear this # is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.) Scalar = Any @@ -27,26 +40,6 @@ # It is not valid as a type. NullType = Any -array = TypeVar("array") -device = TypeVar("device") -DType = TypeVar("DType") -SupportsDLPack = TypeVar("SupportsDLPack") -SupportsBufferProtocol = TypeVar("SupportsBufferProtocol") -PyCapsule = TypeVar("PyCapsule") -# ellipsis cannot actually be imported from anywhere, so include a dummy here -# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556 -ellipsis = TypeVar("ellipsis") - -_T_co = TypeVar("_T_co", covariant=True) - - -class NestedSequence(Protocol[_T_co]): - def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]: - ... - - def __len__(self, /) -> int: - ... - __all__ = [ "Any", diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 960462ff..7b0f552a 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -2,17 +2,14 @@ from typing import Any,NoReturn, TYPE_CHECKING, Literal, Generic -from ._types import DType - if TYPE_CHECKING: - from . import Bool - from ._types import NullType, Scalar + from ._types import NullType, Scalar, DType __all__ = ['Column'] -class Column(Generic[DType]): +class Column: """ Column object @@ -73,21 +70,21 @@ def dtype(self) -> Any: Return data type of column. """ - def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]: + def get_rows(self: Column, indices: Column) -> Column: """ Select a subset of rows, similar to `ndarray.take`. Parameters ---------- - indices : Column[int] + indices : Column Positions of rows to select. """ ... def slice_rows( - self: Column[DType], start: int | None, stop: int | None, step: int | None - ) -> Column[DType]: + self: Column, start: int | None, stop: int | None, step: int | None + ) -> Column: """ Select a subset of rows corresponding to a slice. @@ -104,13 +101,13 @@ def slice_rows( ... - def filter(self: Column[DType], mask: Column[Bool]) -> Column[DType]: + def filter(self: Column, mask: Column) -> Column: """ Select a subset of rows corresponding to a mask. Parameters ---------- - mask : Column[bool] + mask : Column Returns ------- @@ -146,7 +143,7 @@ def sort( *, ascending: bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[DType]: + ) -> Column: """ Sort column. @@ -175,7 +172,7 @@ def sorted_indices( *, ascending: bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[Any]: + ) -> Column: """ Return row numbers which would sort column. @@ -194,11 +191,11 @@ def sorted_indices( Returns ------- - Column[int] + Column """ ... - def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]: # type: ignore[override] + def __eq__(self, other: Column | Scalar) -> Column: # type: ignore[override] """ Compare for equality. @@ -216,7 +213,7 @@ def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]: # type: ignore[o Column """ - def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: # type: ignore[override] + def __ne__(self: Column, other: Column | Scalar) -> Column: # type: ignore[override] """ Compare for non-equality. @@ -234,7 +231,7 @@ def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __ge__(self: Column, other: Column | Scalar) -> Column: """ Compare for "greater than or equal to" `other`. @@ -250,7 +247,7 @@ def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __gt__(self: Column, other: Column | Scalar) -> Column: """ Compare for "greater than" `other`. @@ -266,7 +263,7 @@ def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __le__(self: Column, other: Column | Scalar) -> Column: """ Compare for "less than or equal to" `other`. @@ -282,7 +279,7 @@ def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __lt__(self: Column, other: Column | Scalar) -> Column: """ Compare for "less than" `other`. @@ -298,7 +295,7 @@ def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: + def __and__(self: Column, other: Column | bool) -> Column: """ Apply logical 'and' to `other` Column (or scalar) and this Column. @@ -306,7 +303,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: Parameters ---------- - other : Column[bool] or bool + other : Column or bool If Column, must have same length. Returns @@ -319,7 +316,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: If `self` or `other` is not boolean. """ - def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: + def __or__(self: Column, other: Column | bool) -> Column: """ Apply logical 'or' to `other` Column (or scalar) and this column. @@ -327,12 +324,12 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: Parameters ---------- - other : Column[bool] or Scalar + other : Column or Scalar If Column, must have same length. Returns ------- - Column[bool] + Column Raises ------ @@ -340,7 +337,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: If `self` or `other` is not boolean. """ - def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: + def __add__(self: Column, other: Column | Scalar) -> Column: """ Add `other` column or scalar to this column. @@ -356,7 +353,7 @@ def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: + def __sub__(self: Column, other: Column | Scalar) -> Column: """ Subtract `other` column or scalar from this column. @@ -372,7 +369,7 @@ def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __mul__(self, other: Column | Scalar) -> Column: """ Multiply `other` column or scalar with this column. @@ -388,7 +385,7 @@ def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __truediv__(self, other: Column | Scalar) -> Column: """ Divide this column by `other` column or scalar. True division, returns floats. @@ -404,7 +401,7 @@ def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __floordiv__(self, other: Column | Scalar) -> Column: """ Floor-divide `other` column or scalar to this column. @@ -420,7 +417,7 @@ def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __pow__(self, other: Column | Scalar) -> Column: """ Raise this column to the power of `other`. @@ -440,7 +437,7 @@ def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __mod__(self, other: Column | Scalar) -> Column: """ Returns modulus of this column by `other` (`%` operator). @@ -456,7 +453,7 @@ def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]: + def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]: """ Return quotient and remainder of integer division. See `divmod` builtin function. @@ -472,7 +469,7 @@ def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[A Column """ - def __invert__(self: Column[Bool]) -> Column[Bool]: + def __invert__(self: Column) -> Column: """ Invert truthiness of (boolean) elements. @@ -482,7 +479,7 @@ def __invert__(self: Column[Bool]) -> Column[Bool]: If any of the Column's columns is not boolean. """ - def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: + def any(self: Column, *, skip_nulls: bool = True) -> bool | NullType: """ Reduction returns a bool. @@ -492,7 +489,7 @@ def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: If column is not boolean. """ - def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: + def all(self: Column, *, skip_nulls: bool = True) -> bool | NullType: """ Reduction returns a bool. @@ -586,33 +583,33 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar Whether to skip null values. """ - def cumulative_max(self: Column[DType]) -> Column[DType]: + def cumulative_max(self: Column) -> Column: """ Reduction returns a Column. Any data type that supports comparisons must be supported. The returned value has the same dtype as the column. """ - def cumulative_min(self: Column[DType]) -> Column[DType]: + def cumulative_min(self: Column) -> Column: """ Reduction returns a Column. Any data type that supports comparisons must be supported. The returned value has the same dtype as the column. """ - def cumulative_sum(self: Column[DType]) -> Column[DType]: + def cumulative_sum(self: Column) -> Column: """ Reduction returns a Column. Must be supported for numerical and datetime data types. The returned value has the same dtype as the column. """ - def cumulative_prod(self: Column[DType]) -> Column[DType]: + def cumulative_prod(self: Column) -> Column: """ Reduction returns a Column. Must be supported for numerical and datetime data types. The returned value has the same dtype as the column. """ - def is_null(self) -> Column[Bool]: + def is_null(self) -> Column: """ Check for 'missing' or 'null' entries. @@ -631,7 +628,7 @@ def is_null(self) -> Column[Bool]: but note that the Standard makes no guarantees about them. """ - def is_nan(self) -> Column[Bool]: + def is_nan(self) -> Column: """ Check for nan entries. @@ -650,7 +647,7 @@ def is_nan(self) -> Column[Bool]: In particular, does not check for `np.timedelta64('NaT')`. """ - def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]: + def is_in(self: Column, values: Column) -> Column: """ Indicate whether the value at each row matches any value in `values`. @@ -665,16 +662,16 @@ def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]: Returns ------- - Column[bool] + Column """ - def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]: + def unique_indices(self, *, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values in Column. Returns ------- - Column[int] + Column Indices corresponding to unique values. Notes @@ -689,7 +686,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]: """ ... - def fill_nan(self: Column[DType], value: float | NullType, /) -> Column[DType]: + def fill_nan(self: Column, value: float | NullType, /) -> Column: """ Fill floating point ``nan`` values with the given fill value. @@ -703,7 +700,7 @@ def fill_nan(self: Column[DType], value: float | NullType, /) -> Column[DType]: """ ... - def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]: + def fill_null(self: Column, value: Scalar, /) -> Column: """ Fill null values with the given fill value. @@ -716,7 +713,7 @@ def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]: """ ... - def to_array_object(self, dtype: Any) -> Any: + def to_array_object(self, dtype: DType) -> Any: """ Convert to array-API-compliant object. @@ -751,7 +748,7 @@ def to_array_object(self, dtype: Any) -> Any: ``array-api-compat`` package to convert it to a Standard-compliant array. """ - def rename(self, name: str) -> Column[DType]: + def rename(self, name: str) -> Column: """ Rename column. diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 9a727cf3..1994c674 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -6,8 +6,7 @@ if TYPE_CHECKING: from .column_object import Column from .groupby_object import GroupBy - from . import Bool - from ._types import NullType, Scalar + from ._types import NullType, Scalar, DType __all__ = ["DataFrame"] @@ -90,7 +89,7 @@ def group_by(self, keys: str | list[str], /) -> GroupBy: """ ... - def get_column_by_name(self, name: str, /) -> Column[Any]: + def get_column_by_name(self, name: str, /) -> Column: """ Select a column by name. @@ -128,13 +127,13 @@ def select(self, names: Sequence[str], /) -> DataFrame: """ ... - def get_rows(self, indices: Column[Any]) -> DataFrame: + def get_rows(self, indices: Column) -> DataFrame: """ Select a subset of rows, similar to `ndarray.take`. Parameters ---------- - indices : Column[int] + indices : Column Positions of rows to select. Returns @@ -161,13 +160,13 @@ def slice_rows( """ ... - def filter(self, mask: Column[Bool]) -> DataFrame: + def filter(self, mask: Column) -> DataFrame: """ Select a subset of rows corresponding to a mask. Parameters ---------- - mask : Column[bool] + mask : Column Returns ------- @@ -180,7 +179,7 @@ def filter(self, mask: Column[Bool]) -> DataFrame: """ ... - def insert_column(self, column: Column[Any]) -> DataFrame: + def insert_column(self, column: Column) -> DataFrame: """ Insert column into DataFrame at rightmost location. @@ -209,7 +208,7 @@ def insert_column(self, column: Column[Any]) -> DataFrame: """ ... - def update_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: + def update_columns(self, columns: Column | Sequence[Column], /) -> DataFrame: """ Update values in existing column(s) from Dataframe. @@ -336,7 +335,7 @@ def sorted_indices( *, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[Any]: + ) -> Column: """ Return row numbers which would sort according to given columns. @@ -361,7 +360,7 @@ def sorted_indices( Returns ------- - Column[int] + Column Raises ------ @@ -687,7 +686,7 @@ def all(self, *, skip_nulls: bool = True) -> DataFrame: """ ... - def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: + def any_rowwise(self, *, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. @@ -701,7 +700,7 @@ def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: """ ... - def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: + def all_rowwise(self, *, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. @@ -821,7 +820,7 @@ def is_nan(self) -> DataFrame: """ ... - def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column[int]: + def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values across selected columns. @@ -833,7 +832,7 @@ def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: boo Returns ------- - Column[int] + Column Indices corresponding to unique values. Notes @@ -897,7 +896,7 @@ def fill_null( """ ... - def to_array_object(self, dtype: Any) -> Any: + def to_array_object(self, dtype: DType) -> Any: """ Convert to array-API-compliant object. diff --git a/spec/API_specification/dataframe_api/dtypes.py b/spec/API_specification/dataframe_api/dtypes.py new file mode 100644 index 00000000..c984542f --- /dev/null +++ b/spec/API_specification/dataframe_api/dtypes.py @@ -0,0 +1,33 @@ +class Int64: + """Integer type with 64 bits of precision.""" + +class Int32: + """Integer type with 32 bits of precision.""" + +class Int16: + """Integer type with 16 bits of precision.""" + +class Int8: + """Integer type with 8 bits of precision.""" + +class UInt64: + """Unsigned integer type with 64 bits of precision.""" + +class UInt32: + """Unsigned integer type with 32 bits of precision.""" + +class UInt16: + """Unsigned integer type with 16 bits of precision.""" + +class UInt8: + """Unsigned integer type with 8 bits of precision.""" + +class Float64: + """Floating point type with 64 bits of precision.""" + +class Float32: + """Floating point type with 32 bits of precision.""" + +class Bool: + """Boolean type with 8 bits of precision.""" + From a62fe35ff0345ea69a913fe6fd22be5597e3b17a Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Wed, 27 Sep 2023 14:36:48 +0100 Subject: [PATCH 2/8] Rename `get_column_names` to `column_names` and make property (#254) * get_column_names -> column_names property * get_column_names -> column_names --- spec/API_specification/dataframe_api/dataframe_object.py | 5 +++-- spec/purpose_and_scope.md | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 1994c674..c2a763c2 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -198,7 +198,7 @@ def insert_column(self, column: Column) -> DataFrame: .. code-block:: python new_column = df.get_column_by_name('a') + 1 - new_columns_names = ['a_plus_1'] + df.get_column_names() + new_columns_names = ['a_plus_1'] + df.column_names df = df.insert_column(new_column.rename('a_plus_1')) df = df.select(new_column_names) @@ -267,7 +267,8 @@ def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame: """ ... - def get_column_names(self) -> list[str]: + @property + def column_names(self) -> list[str]: """ Get column names. diff --git a/spec/purpose_and_scope.md b/spec/purpose_and_scope.md index a48d7bdf..8f0d989d 100644 --- a/spec/purpose_and_scope.md +++ b/spec/purpose_and_scope.md @@ -291,7 +291,7 @@ def my_dataframe_agnostic_function(df): mask = df.get_column_by_name('species') != 'setosa' df = df.filter(mask) - for column_name in df.get_column_names(): + for column_name in df.column_names if column_name == 'species': continue new_column = df.get_column_by_name(column_name) From 53f9c7639b3981486ffdbb640cde06b603fd678e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 27 Sep 2023 13:54:52 +0100 Subject: [PATCH 3/8] add namespace protocol --- .../API_specification/dataframe_api/_types.py | 103 +++++++++++++++++- .../dataframe_api/dataframe_object.py | 5 +- 2 files changed, 103 insertions(+), 5 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 2b43e115..ae45d47a 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -3,18 +3,22 @@ """ from __future__ import annotations -from dataclasses import dataclass from typing import ( + TYPE_CHECKING, Any, List, Literal, + Mapping, Optional, + Protocol, Sequence, Tuple, Union, TYPE_CHECKING, ) -from enum import Enum + +if TYPE_CHECKING: + from .dataframe_object import DataFrame if TYPE_CHECKING: from .dtypes import ( @@ -41,6 +45,100 @@ NullType = Any +class Namespace(Protocol): + __dataframe_api_version__: str + + class DataFrame: + ... + + class Column: + ... + + class Int64: + ... + + class Int32: + ... + + class Int16: + ... + + class Int8: + ... + + class UInt64: + ... + + class UInt32: + ... + + class UInt16: + ... + + class UInt8: + ... + + class Float64: + ... + + class Float32: + ... + + class Bool: + ... + + @staticmethod + def concat(dataframes: Sequence[DataFrame]) -> DataFrame: + ... + + @staticmethod + def column_from_sequence( + sequence: Sequence[Any], + *, + dtype: Any, + name: str = "", + api_version: str | None = None, + ) -> Column: + ... + + @staticmethod + def dataframe_from_dict( + data: Mapping[str, Column], *, api_version: str | None = None + ) -> DataFrame: + ... + + @staticmethod + def column_from_1d_array( + array: Any, *, dtype: Any, name: str = "", api_version: str | None = None + ) -> Column: + ... + + @staticmethod + def dataframe_from_2d_array( + array: Any, + *, + names: Sequence[str], + dtypes: Mapping[str, Any], + api_version: str | None = None, + ) -> DataFrame: + ... + + @staticmethod + def is_null(value: object, /) -> bool: + ... + + @staticmethod + def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool: + ... + + +class SupportsDataFrameAPI(Protocol): + def __dataframe_consortium_standard__( + self, *, api_version: str | None = None + ) -> DataFrame: + ... + + __all__ = [ "Any", "DataFrame", @@ -58,5 +156,4 @@ "device", "DType", "ellipsis", - "Enum", ] diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index c2a763c2..45cbc563 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -6,7 +6,8 @@ if TYPE_CHECKING: from .column_object import Column from .groupby_object import GroupBy - from ._types import NullType, Scalar, DType + from . import Bool + from ._types import NullType, Scalar, Namespace, DType __all__ = ["DataFrame"] @@ -36,7 +37,7 @@ class DataFrame: **Methods and Attributes** """ - def __dataframe_namespace__(self) -> Any: + def __dataframe_namespace__(self) -> Namespace: """ Returns an object that has all the top-level dataframe API functions on it. From 53b43539effc5023530aa35ac7d4cb25b3cfa887 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:15:51 +0100 Subject: [PATCH 4/8] fixup --- .../API_specification/dataframe_api/_types.py | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index ae45d47a..317eaf33 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -14,11 +14,11 @@ Sequence, Tuple, Union, - TYPE_CHECKING, ) if TYPE_CHECKING: - from .dataframe_object import DataFrame + from .dataframe_object import DataFrame as DataFrameType + from .column_object import Column as ColumnType if TYPE_CHECKING: from .dtypes import ( @@ -48,47 +48,58 @@ class Namespace(Protocol): __dataframe_api_version__: str - class DataFrame: + @staticmethod + def DataFrame() -> DataFrameType: ... - class Column: + @staticmethod + def Column() -> ColumnType: ... - class Int64: - ... + @staticmethod + def Int64() -> Int64:... + @staticmethod + def Int16() -> Int16:... - class Int32: + @staticmethod + def Int32() -> Int32: ... - class Int16: - ... - class Int8: + @staticmethod + def Int8() -> Int8: ... - class UInt64: + @staticmethod + def UInt64() -> UInt64: ... - class UInt32: + @staticmethod + def UInt32() -> UInt32: ... - class UInt16: + @staticmethod + def UInt16() -> UInt16: ... - class UInt8: + @staticmethod + def UInt8() -> UInt8: ... - class Float64: + @staticmethod + def Float64() -> Float64: ... - class Float32: + @staticmethod + def Float32() -> Float32: ... - class Bool: + @staticmethod + def Bool() -> Bool: ... @staticmethod - def concat(dataframes: Sequence[DataFrame]) -> DataFrame: + def concat(dataframes: Sequence[DataFrameType]) -> DataFrameType: ... @staticmethod @@ -98,19 +109,19 @@ def column_from_sequence( dtype: Any, name: str = "", api_version: str | None = None, - ) -> Column: + ) -> ColumnType: ... @staticmethod def dataframe_from_dict( - data: Mapping[str, Column], *, api_version: str | None = None - ) -> DataFrame: + data: Mapping[str, ColumnType], *, api_version: str | None = None + ) -> DataFrameType: ... @staticmethod def column_from_1d_array( array: Any, *, dtype: Any, name: str = "", api_version: str | None = None - ) -> Column: + ) -> ColumnType: ... @staticmethod @@ -120,7 +131,7 @@ def dataframe_from_2d_array( names: Sequence[str], dtypes: Mapping[str, Any], api_version: str | None = None, - ) -> DataFrame: + ) -> DataFrameType: ... @staticmethod @@ -135,7 +146,7 @@ def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool: class SupportsDataFrameAPI(Protocol): def __dataframe_consortium_standard__( self, *, api_version: str | None = None - ) -> DataFrame: + ) -> DataFrameType: ... From f5845679d9ccf31ed15ee8f7560d9cb8d7c449d8 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:31:20 +0100 Subject: [PATCH 5/8] fixup --- spec/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/conf.py b/spec/conf.py index c3aabb4d..94782518 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -85,6 +85,7 @@ ('py:class', 'Bool'), ('py:class', 'optional'), ('py:class', 'NullType'), + ('py:class', 'Namespace'), ] # NOTE: this alias handling isn't used yet - added in anticipation of future # need based on dataframe API aliases. From 280808cfdcab472427c96f97f7637a6da7d51051 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:16:56 +0100 Subject: [PATCH 6/8] fixup --- spec/API_specification/dataframe_api/_types.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 2aca0b1e..64c9c492 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -37,23 +37,6 @@ DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] -if TYPE_CHECKING: - from .dtypes import ( - Bool, - Float64, - Float32, - Int64, - Int32, - Int16, - Int8, - UInt64, - UInt32, - UInt16, - UInt8, - ) - - DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] - # Type alias: Mypy needs Any, but for readability we need to make clear this # is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.) Scalar = Any From f8e8617add76cb4e2971ad359e9ede41af8e35bc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:17:44 +0100 Subject: [PATCH 7/8] :art: --- .../API_specification/dataframe_api/_types.py | 43 ++++++------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 64c9c492..77379656 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -3,39 +3,20 @@ """ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, - List, - Literal, - Mapping, - Optional, - Protocol, - Sequence, - Tuple, - Union, -) +from typing import (TYPE_CHECKING, Any, List, Literal, Mapping, Optional, + Protocol, Sequence, Tuple, Union) if TYPE_CHECKING: - from .dataframe_object import DataFrame as DataFrameType from .column_object import Column as ColumnType + from .dataframe_object import DataFrame as DataFrameType if TYPE_CHECKING: - from .dtypes import ( - Bool, - Float64, - Float32, - Int64, - Int32, - Int16, - Int8, - UInt64, - UInt32, - UInt16, - UInt8, - ) - - DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] + from .dtypes import (Bool, Float32, Float64, Int8, Int16, Int32, Int64, + UInt8, UInt16, UInt32, UInt64) + + DType = Union[ + Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8 + ] # Type alias: Mypy needs Any, but for readability we need to make clear this # is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.) @@ -57,14 +38,16 @@ def Column() -> ColumnType: ... @staticmethod - def Int64() -> Int64:... + def Int64() -> Int64: + ... @staticmethod def Int32() -> Int32: ... @staticmethod - def Int16() -> Int16:... + def Int16() -> Int16: + ... @staticmethod def Int8() -> Int8: From f1f5add8000d34e87a8c7d29aa86823f502081e7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:18:04 +0100 Subject: [PATCH 8/8] :art: --- .../API_specification/dataframe_api/_types.py | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 77379656..d7d15dd6 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -3,16 +3,37 @@ """ from __future__ import annotations -from typing import (TYPE_CHECKING, Any, List, Literal, Mapping, Optional, - Protocol, Sequence, Tuple, Union) +from typing import ( + TYPE_CHECKING, + Any, + List, + Literal, + Mapping, + Optional, + Protocol, + Sequence, + Tuple, + Union, +) if TYPE_CHECKING: from .column_object import Column as ColumnType from .dataframe_object import DataFrame as DataFrameType if TYPE_CHECKING: - from .dtypes import (Bool, Float32, Float64, Int8, Int16, Int32, Int64, - UInt8, UInt16, UInt32, UInt64) + from .dtypes import ( + Bool, + Float32, + Float64, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + ) DType = Union[ Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8