diff --git a/.gitignore b/.gitignore index 0e28134f..a37fe28a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.swp _build +__pycache__ +spec/API_specification/generated/ diff --git a/spec/API_specification/column_object.rst b/spec/API_specification/column_object.rst new file mode 100644 index 00000000..06206d12 --- /dev/null +++ b/spec/API_specification/column_object.rst @@ -0,0 +1,23 @@ +.. _column-object: + +Column object +============= + +A conforming implementation of the dataframe API standard must provide and +support a column object having the following attributes and methods. + +------------------------------------------------- + +Methods +------- +TODO + +.. + NOTE: please keep the methods in alphabetical order + + .. currentmodule:: dataframe_api + + .. autosummary:: + :toctree: generated + :template: property.rst + diff --git a/spec/API_specification/column_selection.md b/spec/API_specification/column_selection.md deleted file mode 100644 index fd70e98e..00000000 --- a/spec/API_specification/column_selection.md +++ /dev/null @@ -1 +0,0 @@ -# Column selection diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 45eefa77..94462b17 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -2,6 +2,11 @@ Function stubs and API documentation for the DataFrame API standard. """ +from .column_object import * +from .dataframe_object import * +from .groupby_object import * + + __dataframe_api_version__: str = "YYYY.MM" """ String representing the version of the DataFrame API specification to which the diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py new file mode 100644 index 00000000..0987ecaa --- /dev/null +++ b/spec/API_specification/dataframe_api/_types.py @@ -0,0 +1,63 @@ +""" +Types for type annotations used in the dataframe API standard. + +The type variables should be replaced with the actual types for a given +library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame. +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import ( + Any, + List, + Literal, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + Protocol, +) +from enum import Enum + +array = TypeVar("array") +Scalar = TypeVar("Scalar") +device = TypeVar("device") +dtype = TypeVar("dtype") +SupportsDLPack = TypeVar("SupportsDLPack") +SupportsBufferProtocol = TypeVar("SupportsBufferProtocol") +PyCapsule = TypeVar("PyCapsule") +# ellipsis cannot actually be imported from anywhere, so include a dummy here +# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556 +ellipsis = TypeVar("ellipsis") + +_T_co = TypeVar("_T_co", covariant=True) + + +class NestedSequence(Protocol[_T_co]): + def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]: + ... + + def __len__(self, /) -> int: + ... + + +__all__ = [ + "Any", + "DataFrame", + "List", + "Literal", + "NestedSequence", + "Optional", + "PyCapsule", + "SupportsBufferProtocol", + "SupportsDLPack", + "Tuple", + "Union", + "Sequence", + "array", + "device", + "dtype", + "ellipsis", + "Enum", +] diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index b409cf49..9b3e0857 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -1,10 +1,13 @@ -__all__ = ["DataFrame"] - -from typing import Sequence, TYPE_CHECKING +from __future__ import annotations +from typing import Sequence, Union, TYPE_CHECKING if TYPE_CHECKING: from .column_object import Column from .groupby_object import GroupBy + from ._types import Scalar + + +__all__ = ["DataFrame"] class DataFrame: @@ -33,7 +36,7 @@ def get_column_by_name(self, name: str, /) -> Column: """ ... - def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame": + def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame: """ Select multiple columns by name. @@ -52,7 +55,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame": """ ... - def get_rows(self, indices: Sequence[int]) -> "DataFrame": + def get_rows(self, indices: Sequence[int]) -> DataFrame: """ Select a subset of rows, similar to `ndarray.take`. @@ -75,7 +78,7 @@ def get_rows(self, indices: Sequence[int]) -> "DataFrame": def slice_rows( self, start: int | None, stop: int | None, step: int | None - ) -> "DataFrame": + ) -> DataFrame: """ Select a subset of rows corresponding to a slice. @@ -91,7 +94,7 @@ def slice_rows( """ ... - def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame": + def get_rows_by_mask(self, mask: "Column[bool]") -> DataFrame: """ Select a subset of rows corresponding to a mask. @@ -110,7 +113,7 @@ def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame": """ ... - def insert(self, loc: int, label: str, value: Column) -> "DataFrame": + def insert(self, loc: int, label: str, value: Column) -> DataFrame: """ Insert column into DataFrame at specified location. @@ -124,7 +127,7 @@ def insert(self, loc: int, label: str, value: Column) -> "DataFrame": """ ... - def drop_column(self, label: str) -> "DataFrame": + def drop_column(self, label: str) -> DataFrame: """ Drop the specified column. @@ -143,7 +146,7 @@ def drop_column(self, label: str) -> "DataFrame": """ ... - def set_column(self, label: str, value: Column) -> "DataFrame": + def set_column(self, label: str, value: Column) -> DataFrame: """ Add or replace a column. @@ -158,8 +161,10 @@ def set_column(self, label: str, value: Column) -> "DataFrame": """ ... - def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __eq__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for equality. + Parameters ---------- other : DataFrame or Scalar @@ -173,8 +178,10 @@ def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __ne__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for non-equality. + Parameters ---------- other : DataFrame or Scalar @@ -188,8 +195,10 @@ def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __ge__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for "greater than or equal to" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -203,8 +212,10 @@ def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __gt__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for "greater than" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -218,8 +229,10 @@ def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __le__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __le__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for "less than or equal to" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -233,8 +246,10 @@ def __le__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __lt__(self, other: DataFrame | Scalar) -> DataFrame: """ + Compare for "less than" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -248,8 +263,10 @@ def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __add__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __add__(self, other: DataFrame | Scalar) -> DataFrame: """ + Add `other` dataframe or scalar to this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -263,8 +280,10 @@ def __add__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __sub__(self, other: DataFrame | Scalar) -> DataFrame: """ + Subtract `other` dataframe or scalar from this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -278,8 +297,10 @@ def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __mul__(self, other: DataFrame | Scalar) -> DataFrame: """ + Multiply `other` dataframe or scalar with this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -293,8 +314,10 @@ def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __truediv__(self, other: DataFrame | Scalar) -> DataFrame: """ + Divide this dataframe by `other` dataframe or scalar. True division, returns floats. + Parameters ---------- other : DataFrame or Scalar @@ -308,8 +331,10 @@ def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame: """ + Floor-divide (returns integers) this dataframe by `other` dataframe or scalar. + Parameters ---------- other : DataFrame or Scalar @@ -323,8 +348,10 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __pow__(self, other: DataFrame | Scalar) -> DataFrame: """ + Raise this dataframe to the power of `other`. + Parameters ---------- other : DataFrame or Scalar @@ -338,8 +365,10 @@ def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __mod__(self, other: DataFrame | Scalar) -> DataFrame: """ + Return modulus of this dataframe by `other` (`%` operator). + Parameters ---------- other : DataFrame or Scalar @@ -353,8 +382,10 @@ def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFrame"]: + def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]: """ + Return quotient and remainder of integer division. See `divmod` builtin function. + Parameters ---------- other : DataFrame or Scalar @@ -364,8 +395,7 @@ def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFra Returns ------- - DataFrame - DataFrame + A tuple of two DataFrame's """ ... diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py index a597e677..a00cc9ec 100644 --- a/spec/API_specification/dataframe_api/groupby_object.py +++ b/spec/API_specification/dataframe_api/groupby_object.py @@ -5,32 +5,32 @@ class GroupBy: - def any(self, skipna: bool = True) -> DataFrame: + def any(self, skipna: bool = True) -> "DataFrame": ... - def all(self, skipna: bool = True) -> DataFrame: + def all(self, skipna: bool = True) -> "DataFrame": ... - def min(self, skipna: bool = True) -> DataFrame: + def min(self, skipna: bool = True) -> "DataFrame": ... - def max(self, skipna: bool = True) -> DataFrame: + def max(self, skipna: bool = True) -> "DataFrame": ... - def sum(self, skipna: bool = True) -> DataFrame: + def sum(self, skipna: bool = True) -> "DataFrame": ... - def prod(self, skipna: bool = True) -> DataFrame: + def prod(self, skipna: bool = True) -> "DataFrame": ... - def median(self, skipna: bool = True) -> DataFrame: + def median(self, skipna: bool = True) -> "DataFrame": ... - def mean(self, skipna: bool = True) -> DataFrame: + def mean(self, skipna: bool = True) -> "DataFrame": ... - def std(self, skipna: bool = True) -> DataFrame: + def std(self, skipna: bool = True) -> "DataFrame": ... - def var(self, skipna: bool = True) -> DataFrame: + def var(self, skipna: bool = True) -> "DataFrame": ... diff --git a/spec/API_specification/dataframe_basics.md b/spec/API_specification/dataframe_basics.md deleted file mode 100644 index 668f564c..00000000 --- a/spec/API_specification/dataframe_basics.md +++ /dev/null @@ -1,9 +0,0 @@ -# Dataframe basics - -## Class name - - -## Dataframe size - - -## Columns names diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst new file mode 100644 index 00000000..e7f502f4 --- /dev/null +++ b/spec/API_specification/dataframe_object.rst @@ -0,0 +1,172 @@ +.. _dataframe-object: + +Dataframe object +================ + +A conforming implementation of the dataframe API standard must provide and +support a dataframe object having the following attributes and methods. + +------------------------------------------------- + +.. _operators: + +Operators +--------- + +A conforming implementation of the dataframe API standard must provide and +support a dataframe object supporting the following Python operators. + +Arithmetic Operators +~~~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support +an array object supporting the following Python arithmetic operators. + +- `x1 + x2`: :meth:`.DataFrame.__add__` + + - `operator.add(x1, x2) `_ + - `operator.__add__(x1, x2) `_ + +- `x1 - x2`: :meth:`.DataFrame.__sub__` + + - `operator.sub(x1, x2) `_ + - `operator.__sub__(x1, x2) `_ + +- `x1 * x2`: :meth:`.DataFrame.__mul__` + + - `operator.mul(x1, x2) `_ + - `operator.__mul__(x1, x2) `_ + +- `x1 / x2`: :meth:`.DataFrame.__truediv__` + + - `operator.truediv(x1,x2) `_ + - `operator.__truediv__(x1, x2) `_ + +- `x1 // x2`: :meth:`.DataFrame.__floordiv__` + + - `operator.floordiv(x1, x2) `_ + - `operator.__floordiv__(x1, x2) `_ + +- `x1 % x2`: :meth:`.DataFrame.__mod__` + + - `operator.mod(x1, x2) `_ + - `operator.__mod__(x1, x2) `_ + +- `x1 ** x2`: :meth:`.DataFrame.__pow__` + + - `operator.pow(x1, x2) `_ + - `operator.__pow__(x1, x2) `_ + +Arithmetic operators should be defined for a dataframe having real-valued data types. + +.. note:: + + TODO: figure out whether we want to add ``__neg__`` and ``__pos__``, those + are the two missing arithmetic operators. + + +Comparison Operators +~~~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the dataframe API standard must provide and +support a dataframe object supporting the following Python comparison +operators. + +- `x1 < x2`: :meth:`.DataFrame.__lt__` + + - `operator.lt(x1, x2) `_ + - `operator.__lt__(x1, x2) `_ + +- `x1 <= x2`: :meth:`.DataFrame.__le__` + + - `operator.le(x1, x2) `_ + - `operator.__le__(x1, x2) `_ + +- `x1 > x2`: :meth:`.DataFrame.__gt__` + + - `operator.gt(x1, x2) `_ + - `operator.__gt__(x1, x2) `_ + +- `x1 >= x2`: :meth:`.DataFrame.__ge__` + + - `operator.ge(x1, x2) `_ + - `operator.__ge__(x1, x2) `_ + +- `x1 == x2`: :meth:`.DataFrame.__eq__` + + - `operator.eq(x1, x2) `_ + - `operator.__eq__(x1, x2) `_ + +- `x1 != x2`: :meth:`.DataFrame.__ne__` + + - `operator.ne(x1, x2) `_ + - `operator.__ne__(x1, x2) `_ + +Comparison operators should be defined for dataframes having any data type. + +In-place Operators +~~~~~~~~~~~~~~~~~~ + +TODO + +Reflected Operators +~~~~~~~~~~~~~~~~~~~ + +TODO + +Arithmetic Operators +"""""""""""""""""""" + +- ``__radd__`` +- ``__rsub__`` +- ``__rmul__`` +- ``__rtruediv__`` +- ``__rfloordiv__`` +- ``__rpow__`` +- ``__rmod__`` + +------------------------------------------------- + +.. currentmodule:: dataframe_api + +Attributes +---------- + +TODO + +.. + NOTE: please keep the attributes in alphabetical order + + +.. + autosummary:: + :toctree: generated + :template: property.rst + + DataFrame.shape + +------------------------------------------------- + +Methods +------- +.. + NOTE: please keep the methods in alphabetical order + + +.. autosummary:: + :toctree: generated + :template: property.rst + + DataFrame.__add__ + DataFrame.__eq__ + DataFrame.__floordiv__ + DataFrame.__ge__ + DataFrame.__gt__ + DataFrame.__le__ + DataFrame.__lt__ + DataFrame.__ne__ + DataFrame.__mod__ + DataFrame.__mul__ + DataFrame.__pow__ + DataFrame.__sub__ + DataFrame.__truediv__ diff --git a/spec/API_specification/filter_rows.md b/spec/API_specification/filter_rows.md deleted file mode 100644 index 4cd7cd01..00000000 --- a/spec/API_specification/filter_rows.md +++ /dev/null @@ -1 +0,0 @@ -# Filter rows diff --git a/spec/API_specification/groupby_object.rst b/spec/API_specification/groupby_object.rst new file mode 100644 index 00000000..60b9b2bb --- /dev/null +++ b/spec/API_specification/groupby_object.rst @@ -0,0 +1,31 @@ +.. _groupby-object: + +Groupby object +============== + +A conforming implementation of the dataframe API standard must provide and +support a groupby object having the following attributes and methods. + +------------------------------------------------- + +Methods +------- +.. + NOTE: please keep the methods in alphabetical order + +.. currentmodule:: dataframe_api + +.. autosummary:: + :toctree: generated + :template: property.rst + + GroupBy.all + GroupBy.any + GroupBy.max + GroupBy.min + GroupBy.mean + GroupBy.median + GroupBy.prod + GroupBy.std + GroupBy.sum + GroupBy.var diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index d8a9dddb..de195405 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -1,10 +1,12 @@ API specification ================= +.. currentmodule:: dataframe_api + .. toctree:: :caption: API specification - :maxdepth: 1 + :maxdepth: 3 - dataframe_basics - column_selection - filter_rows + dataframe_object + column_object + groupby_object diff --git a/spec/_templates/attribute.rst b/spec/_templates/attribute.rst new file mode 100644 index 00000000..30d21295 --- /dev/null +++ b/spec/_templates/attribute.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. autodata:: {{ name }} diff --git a/spec/_templates/method.rst b/spec/_templates/method.rst new file mode 100644 index 00000000..3a85f287 --- /dev/null +++ b/spec/_templates/method.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. autofunction:: {{ name }} diff --git a/spec/_templates/property.rst b/spec/_templates/property.rst new file mode 100644 index 00000000..baf31cea --- /dev/null +++ b/spec/_templates/property.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. auto{{ objtype }}:: {{ objname }} \ No newline at end of file diff --git a/spec/conf.py b/spec/conf.py index a45bb3b3..dfce68a6 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -65,7 +65,7 @@ # them don't actually refer to anything that we have a document for. nitpick_ignore = [ ('py:class', 'array'), - ('py:class', 'dataframe'), + ('py:class', 'DataFrame'), ('py:class', 'device'), ('py:class', 'dtype'), ('py:class', 'NestedSequence'), @@ -73,11 +73,12 @@ ('py:class', 'PyCapsule'), ('py:class', 'enum.Enum'), ('py:class', 'ellipsis'), + ('py:class', 'Scalar'), ] # NOTE: this alias handling isn't used yet - added in anticipation of future -# need based on array API aliases. +# need based on dataframe API aliases. # In dataframe_object.py we have to use aliased names for some types because they -# would otherwise refer back to method objects of array +# would otherwise refer back to method objects of `dataframe` autodoc_type_aliases = { 'dataframe': 'dataframe', 'Device': 'device',